blob: 66671cfb122290e8fbc33a7cc71ee1f44cec9679 [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
12import httplib
13import logging
14import socket
15import time
16import xmlrpclib
17
18from autotest_lib.client.bin import utils
19from autotest_lib.client.common_lib import error
beeps5e8c45a2013-12-17 22:05:11 -080020from autotest_lib.client.common_lib import global_config
Dan Shi0942b1d2015-03-31 11:07:00 -070021from autotest_lib.client.common_lib import lsbrelease_utils
beeps5e8c45a2013-12-17 22:05:11 -080022from autotest_lib.client.common_lib.cros import autoupdater
23from autotest_lib.client.common_lib.cros import dev_server
Fang Deng5d518f42013-08-02 14:04:32 -070024from autotest_lib.client.common_lib.cros import retry
Gabe Black1e1c41b2015-02-04 23:55:15 -080025from autotest_lib.client.common_lib.cros.graphite import autotest_stats
Christopher Wileycef1f902014-06-19 11:11:23 -070026from autotest_lib.client.common_lib.cros.network import ping_runner
Hsinyu Chaoe0b08e62015-08-11 10:50:37 +000027from autotest_lib.client.cros import constants as client_constants
Richard Barnettee519dcd2016-08-15 17:37:17 -070028from autotest_lib.server import afe_utils
beeps5e8c45a2013-12-17 22:05:11 -080029from autotest_lib.server import site_utils as server_site_utils
Cheng-Yi Chiang22612862015-08-20 20:39:57 +080030from autotest_lib.server.cros import dnsname_mangler
Fang Deng5d518f42013-08-02 14:04:32 -070031from autotest_lib.server.cros.servo import servo
Simran Basi0739d682015-02-25 16:22:56 -080032from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Fang Deng5d518f42013-08-02 14:04:32 -070033from autotest_lib.server.hosts import ssh_host
Fang Dengd4fe7392013-09-20 12:18:21 -070034from autotest_lib.site_utils.rpm_control_system import rpm_client
Fang Deng5d518f42013-08-02 14:04:32 -070035
36
Simran Basi0739d682015-02-25 16:22:56 -080037# Names of the host attributes in the database that represent the values for
38# the servo_host and servo_port for a servo connected to the DUT.
39SERVO_HOST_ATTR = 'servo_host'
40SERVO_PORT_ATTR = 'servo_port'
Richard Barnettee519dcd2016-08-15 17:37:17 -070041SERVO_BOARD_ATTR = 'servo_board'
Simran Basi0739d682015-02-25 16:22:56 -080042
Richard Barnette17bfc6c2016-08-04 18:41:43 -070043DEFAULT_PORT = 9999
44
Dan Shi3b2adf62015-09-02 17:46:54 -070045_CONFIG = global_config.global_config
xixuan6cf6d2f2016-01-29 15:29:00 -080046ENABLE_SSH_TUNNEL_FOR_SERVO = _CONFIG.get_config_value(
47 'CROS', 'enable_ssh_tunnel_for_servo', type=bool, default=False)
Simran Basi0739d682015-02-25 16:22:56 -080048
Fang Deng5d518f42013-08-02 14:04:32 -070049class ServoHostException(error.AutoservError):
50 """This is the base class for exceptions raised by ServoHost."""
51 pass
52
53
54class ServoHostVerifyFailure(ServoHostException):
55 """Raised when servo verification fails."""
56 pass
57
58
Fang Dengd4fe7392013-09-20 12:18:21 -070059class ServoHostRepairFailure(ServoHostException):
60 """Raised when a repair method fails to repair a servo host."""
61 pass
62
63
Fang Dengf0ea6142013-10-10 21:43:16 -070064class ServoHostRepairMethodNA(ServoHostException):
65 """Raised when a repair method is not applicable."""
66 pass
67
68
Fang Deng5d518f42013-08-02 14:04:32 -070069class ServoHostRepairTotalFailure(ServoHostException):
70 """Raised if all attempts to repair a servo host fail."""
71 pass
72
73
Fang Deng5d518f42013-08-02 14:04:32 -070074class ServoHost(ssh_host.SSHHost):
75 """Host class for a host that controls a servo, e.g. beaglebone."""
76
77 # Timeout for getting the value of 'pwr_button'.
78 PWR_BUTTON_CMD_TIMEOUT_SECS = 15
79 # Timeout for rebooting servo host.
80 REBOOT_TIMEOUT_SECS = 90
81 HOST_DOWN_TIMEOUT_SECS = 60
82 # Delay after rebooting for servod to become fully functional.
83 REBOOT_DELAY_SECS = 20
84 # Servod process name.
85 SERVOD_PROCESS = 'servod'
Dan Shie5b3c512014-08-21 12:12:09 -070086 # Timeout for initializing servo signals.
87 INITIALIZE_SERVO_TIMEOUT_SECS = 30
xixuan6cf6d2f2016-01-29 15:29:00 -080088 # Ready test function
89 SERVO_READY_METHOD = 'get_version'
Fang Deng5d518f42013-08-02 14:04:32 -070090
Fang Dengd4fe7392013-09-20 12:18:21 -070091 _MAX_POWER_CYCLE_ATTEMPTS = 3
Gabe Black1e1c41b2015-02-04 23:55:15 -080092 _timer = autotest_stats.Timer('servo_host')
Fang Dengd4fe7392013-09-20 12:18:21 -070093
Fang Deng5d518f42013-08-02 14:04:32 -070094
Richard Barnette17bfc6c2016-08-04 18:41:43 -070095 def _initialize(self, servo_host='localhost',
Richard Barnettee519dcd2016-08-15 17:37:17 -070096 servo_port=DEFAULT_PORT, servo_board=None,
97 required_by_test=True,
Richard Barnette17bfc6c2016-08-04 18:41:43 -070098 is_in_lab=None, *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -070099 """Initialize a ServoHost instance.
100
101 A ServoHost instance represents a host that controls a servo.
102
103 @param servo_host: Name of the host where the servod process
104 is running.
105 @param servo_port: Port the servod process is listening on.
Dan Shi4d478522014-02-14 13:46:32 -0800106 @param required_by_test: True if servo is required by test.
107 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
108 to None, for which utils.host_is_in_lab_zone will be
109 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700110
111 """
112 super(ServoHost, self)._initialize(hostname=servo_host,
113 *args, **dargs)
Richard Barnettee519dcd2016-08-15 17:37:17 -0700114 self.servo_port = servo_port
115 self.servo_board = servo_board
116 self.required_by_test = required_by_test
117 self._servo = None
118 self._servod_server = None
119 self._is_localhost = (self.hostname == 'localhost')
120 if self._is_localhost:
121 self._is_in_lab = False
122 elif is_in_lab is None:
Dan Shi4d478522014-02-14 13:46:32 -0800123 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
124 else:
125 self._is_in_lab = is_in_lab
xixuan6cf6d2f2016-01-29 15:29:00 -0800126
Richard Barnettee519dcd2016-08-15 17:37:17 -0700127 # Commands on the servo host must be run by the superuser.
128 # Our account on a remote host is root, but if our target is
129 # localhost then we might be running unprivileged. If so,
130 # `sudo` will have to be added to the commands.
Fang Deng5d518f42013-08-02 14:04:32 -0700131 if self._is_localhost:
132 self._sudo_required = utils.system_output('id -u') != '0'
133 else:
134 self._sudo_required = False
Richard Barnettee519dcd2016-08-15 17:37:17 -0700135
Dan Shi4d478522014-02-14 13:46:32 -0800136 # Create a cache of Servo object. This must be called at the end of
137 # _initialize to make sure all attributes are set.
Dan Shi4d478522014-02-14 13:46:32 -0800138 try:
xixuan2b80c182016-03-28 11:59:30 -0700139 if ENABLE_SSH_TUNNEL_FOR_SERVO:
140 self._servod_server = self.rpc_server_tracker.xmlrpc_connect(
141 None, servo_port, ready_test_name=self.SERVO_READY_METHOD,
142 timeout_seconds=60)
143 else:
144 remote = 'http://%s:%s' % (self.hostname, servo_port)
145 self._servod_server = xmlrpclib.ServerProxy(remote)
Dan Shi4d478522014-02-14 13:46:32 -0800146 self.verify()
Alex Millercc589692014-04-21 18:00:22 -0700147 except Exception:
Dan Shibbb0cb62014-03-24 17:50:57 -0700148 if required_by_test:
149 if not self.is_in_lab():
150 raise
151 else:
J. Richard Barnettec2d99cf2015-11-18 12:46:15 -0800152 self.repair()
Fang Deng5d518f42013-08-02 14:04:32 -0700153
154
155 def is_in_lab(self):
156 """Check whether the servo host is a lab device.
157
158 @returns: True if the servo host is in Cros Lab, otherwise False.
159
160 """
161 return self._is_in_lab
162
163
164 def is_localhost(self):
165 """Checks whether the servo host points to localhost.
166
167 @returns: True if it points to localhost, otherwise False.
168
169 """
170 return self._is_localhost
171
172
173 def get_servod_server_proxy(self):
174 """Return a proxy that can be used to communicate with servod server.
175
176 @returns: An xmlrpclib.ServerProxy that is connected to the servod
177 server on the host.
178
179 """
180 return self._servod_server
181
182
183 def get_wait_up_processes(self):
184 """Get the list of local processes to wait for in wait_up.
185
186 Override get_wait_up_processes in
187 autotest_lib.client.common_lib.hosts.base_classes.Host.
188 Wait for servod process to go up. Called by base class when
189 rebooting the device.
190
191 """
192 processes = [self.SERVOD_PROCESS]
193 return processes
194
195
beeps5e8c45a2013-12-17 22:05:11 -0800196 def _is_cros_host(self):
197 """Check if a servo host is running chromeos.
198
199 @return: True if the servo host is running chromeos.
200 False if it isn't, or we don't have enough information.
201 """
202 try:
203 result = self.run('grep -q CHROMEOS /etc/lsb-release',
204 ignore_status=True, timeout=10)
205 except (error.AutoservRunError, error.AutoservSSHTimeout):
206 return False
207 return result.exit_status == 0
208
209
Fang Deng5d518f42013-08-02 14:04:32 -0700210 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
211 connect_timeout=None, alive_interval=None):
212 """Override default make_ssh_command to use tuned options.
213
214 Tuning changes:
215 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
216 connection failure. Consistency with remote_access.py.
217
218 - ServerAliveInterval=180; which causes SSH to ping connection every
219 180 seconds. In conjunction with ServerAliveCountMax ensures
220 that if the connection dies, Autotest will bail out quickly.
221
222 - ServerAliveCountMax=3; consistency with remote_access.py.
223
224 - ConnectAttempts=4; reduce flakiness in connection errors;
225 consistency with remote_access.py.
226
227 - UserKnownHostsFile=/dev/null; we don't care about the keys.
228
229 - SSH protocol forced to 2; needed for ServerAliveInterval.
230
231 @param user User name to use for the ssh connection.
232 @param port Port on the target host to use for ssh connection.
233 @param opts Additional options to the ssh command.
234 @param hosts_file Ignored.
235 @param connect_timeout Ignored.
236 @param alive_interval Ignored.
237
238 @returns: An ssh command with the requested settings.
239
240 """
241 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
242 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
243 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
244 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
245 ' -o Protocol=2 -l %s -p %d')
246 return base_command % (opts, user, port)
247
248
249 def _make_scp_cmd(self, sources, dest):
250 """Format scp command.
251
252 Given a list of source paths and a destination path, produces the
253 appropriate scp command for encoding it. Remote paths must be
254 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
255 to allow additional ssh options.
256
257 @param sources: A list of source paths to copy from.
258 @param dest: Destination path to copy to.
259
260 @returns: An scp command that copies |sources| on local machine to
261 |dest| on the remote servo host.
262
263 """
264 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
265 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
266 return command % (self.master_ssh_option,
267 self.port, ' '.join(sources), dest)
268
269
270 def run(self, command, timeout=3600, ignore_status=False,
271 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
272 connect_timeout=30, options='', stdin=None, verbose=True, args=()):
273 """Run a command on the servo host.
274
275 Extends method `run` in SSHHost. If the servo host is a remote device,
276 it will call `run` in SSHost without changing anything.
277 If the servo host is 'localhost', it will call utils.system_output.
278
279 @param command: The command line string.
280 @param timeout: Time limit in seconds before attempting to
281 kill the running process. The run() function
282 will take a few seconds longer than 'timeout'
283 to complete if it has to kill the process.
284 @param ignore_status: Do not raise an exception, no matter
285 what the exit code of the command is.
286 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
287 @param connect_timeout: SSH connection timeout (in seconds)
288 Ignored if host is 'localhost'.
289 @param options: String with additional ssh command options
290 Ignored if host is 'localhost'.
291 @param stdin: Stdin to pass (a string) to the executed command.
292 @param verbose: Log the commands.
293 @param args: Sequence of strings to pass as arguments to command by
294 quoting them in " and escaping their contents if necessary.
295
296 @returns: A utils.CmdResult object.
297
298 @raises AutoservRunError if the command failed.
299 @raises AutoservSSHTimeout SSH connection has timed out. Only applies
300 when servo host is not 'localhost'.
301
302 """
303 run_args = {'command': command, 'timeout': timeout,
304 'ignore_status': ignore_status, 'stdout_tee': stdout_tee,
305 'stderr_tee': stderr_tee, 'stdin': stdin,
306 'verbose': verbose, 'args': args}
307 if self.is_localhost():
308 if self._sudo_required:
309 run_args['command'] = 'sudo -n %s' % command
310 try:
311 return utils.run(**run_args)
312 except error.CmdError as e:
313 logging.error(e)
314 raise error.AutoservRunError('command execution error',
315 e.result_obj)
316 else:
317 run_args['connect_timeout'] = connect_timeout
318 run_args['options'] = options
319 return super(ServoHost, self).run(**run_args)
320
321
Dan Shi33412a82014-06-10 15:12:27 -0700322 @_timer.decorate
Fang Deng5d518f42013-08-02 14:04:32 -0700323 def _check_servod(self):
324 """A sanity check of the servod state."""
325 msg_prefix = 'Servod error: %s'
326 error_msg = None
327 try:
328 timeout, _ = retry.timeout(
329 self._servod_server.get, args=('pwr_button', ),
330 timeout_sec=self.PWR_BUTTON_CMD_TIMEOUT_SECS)
331 if timeout:
332 error_msg = msg_prefix % 'Request timed out.'
333 except (socket.error, xmlrpclib.Error, httplib.BadStatusLine) as e:
334 error_msg = msg_prefix % e
335 if error_msg:
336 raise ServoHostVerifyFailure(error_msg)
337
338
Dan Shi33412a82014-06-10 15:12:27 -0700339 def _check_servo_config(self):
340 """Check if config file exists for servod.
341
342 If servod config file does not exist, there is no need to verify if
343 servo is working. The servo could be attached to a board not supported
344 yet.
345
346 @raises ServoHostVerifyFailure if /var/lib/servod/config does not exist.
347
348 """
Kevin Chengcdece6b2016-07-27 12:55:01 -0700349 if self._is_localhost or not self._is_cros_host():
350 logging.info('We will skip servo config check, either %s '
351 'is not running chromeos or we cannot find enough '
352 'information about the host.', self.hostname)
Simran Basi0739d682015-02-25 16:22:56 -0800353 return
Kevin Chengcdece6b2016-07-27 12:55:01 -0700354
355 failure_data = []
356 servod_config_file = '/var/lib/servod/config'
Richard Barnettee519dcd2016-08-15 17:37:17 -0700357 config_files = ['%s_%s' % (servod_config_file, self.servo_port),
Kevin Chengcdece6b2016-07-27 12:55:01 -0700358 servod_config_file]
359
360 # We'll need to check for two types of config files since we're
361 # transistioning to support a new servo setup and we need to keep both
362 # to enable successful reverts.
363 # TODO(kevcheng): We can get rid of checking for servod_config_file once
364 # the fleet of beaglebones all have new style config file.
365 for config_file in config_files:
366 try:
367 self.run('test -f %s' % config_file)
Ricky Liang86b80182014-06-13 14:39:42 +0800368 return
Kevin Chengcdece6b2016-07-27 12:55:01 -0700369 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
370 failure_data.append((config_file, e))
371
372 failure_message = ('Servo config file check failed for %s: ' %
373 self.hostname)
374 for data in failure_data:
375 failure_message += '%s (%s) ' % (data[0], data[1])
376 raise ServoHostVerifyFailure(failure_message)
Dan Shi33412a82014-06-10 15:12:27 -0700377
378
Dan Shie5b3c512014-08-21 12:12:09 -0700379 def _check_servod_status(self):
380 """Check if servod process is running.
381
382 If servod is not running, there is no need to verify if servo is
383 working. Check the process before making any servod call can avoid
384 long timeout that eventually fail any servod call.
385 If the servo host is set to localhost, failure of servod status check
386 will be ignored, as servo call may use ssh tunnel.
387
388 @raises ServoHostVerifyFailure if servod process does not exist.
389
390 """
391 try:
Dan Shi18040e42014-09-03 11:14:00 -0700392 pids = [str(int(s)) for s in
393 self.run('pgrep servod').stdout.strip().split('\n')]
394 logging.info('servod is running, PID=%s', ','.join(pids))
Dan Shie5b3c512014-08-21 12:12:09 -0700395 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
396 if self._is_localhost:
397 logging.info('Ignoring servod status check failure. servo host '
398 'is set to localhost, servo call may use ssh '
399 'tunnel to go through.')
400 else:
401 raise ServoHostVerifyFailure(
402 'Servod status check failed for %s: %s' %
403 (self.hostname, e))
404
405
Dan Shi0942b1d2015-03-31 11:07:00 -0700406 def get_release_version(self):
407 """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release.
408
409 @returns The version string in lsb-release, under attribute
410 CHROMEOS_RELEASE_VERSION.
411 """
412 lsb_release_content = self.run(
413 'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip()
414 return lsbrelease_utils.get_chromeos_release_version(
415 lsb_release_content=lsb_release_content)
416
417
Richard Barnette3a7697f2016-04-20 11:33:27 -0700418 def _check_for_reboot(self, updater):
419 """
420 Reboot this servo host if an upgrade is waiting.
421
422 If the host has successfully downloaded and finalized a new
423 build, reboot.
424
425 @param updater: a ChromiumOSUpdater instance for checking
426 whether reboot is needed.
427 @return Return a (status, build) tuple reflecting the
428 update_engine status and current build of the host
429 at the end of the call.
430 """
431 current_build_number = self.get_release_version()
432 status = updater.check_update_status()
433 if status == autoupdater.UPDATER_NEED_REBOOT:
434 logging.info('Rebooting beaglebone host %s from build %s',
435 self.hostname, current_build_number)
436 # Tell the reboot() call not to wait for completion.
437 # Otherwise, the call will log reboot failure if servo does
438 # not come back. The logged reboot failure will lead to
439 # test job failure. If the test does not require servo, we
440 # don't want servo failure to fail the test with error:
441 # `Host did not return from reboot` in status.log.
442 reboot_cmd = 'sleep 1 ; reboot & sleep 10; reboot -f',
Richard Barnetteab9769f2016-06-01 15:01:44 -0700443 self.reboot(reboot_cmd=reboot_cmd, fastsync=True, wait=False)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700444
445 # We told the reboot() call not to wait, but we need to wait
446 # for the reboot before we continue. Alas. The code from
447 # here below is basically a copy of Host.wait_for_restart(),
448 # with the logging bits ripped out, so that they can't cause
449 # the failure logging problem described above.
450 #
451 # The black stain that this has left on my soul can never be
452 # erased.
453 old_boot_id = self.get_boot_id()
454 if not self.wait_down(timeout=self.WAIT_DOWN_REBOOT_TIMEOUT,
455 warning_timer=self.WAIT_DOWN_REBOOT_WARNING,
456 old_boot_id=old_boot_id):
457 raise error.AutoservHostError(
458 'servo host %s failed to shut down.' %
459 self.hostname)
460 if self.wait_up(timeout=120):
461 current_build_number = self.get_release_version()
462 status = updater.check_update_status()
463 logging.info('servo host %s back from reboot, with build %s',
464 self.hostname, current_build_number)
465 else:
466 raise error.AutoservHostError(
467 'servo host %s failed to come back from reboot.' %
468 self.hostname)
469 return status, current_build_number
470
471
beeps5e8c45a2013-12-17 22:05:11 -0800472 @_timer.decorate
Richard Barnette3a7697f2016-04-20 11:33:27 -0700473 def update_image(self, wait_for_update=False):
beeps5e8c45a2013-12-17 22:05:11 -0800474 """Update the image on the servo host, if needed.
475
J. Richard Barnette84895392015-04-30 12:31:01 -0700476 This method recognizes the following cases:
477 * If the Host is not running Chrome OS, do nothing.
478 * If a previously triggered update is now complete, reboot
479 to the new version.
480 * If the host is processing a previously triggered update,
481 do nothing.
482 * If the host is running a version of Chrome OS different
483 from the default for servo Hosts, trigger an update, but
484 don't wait for it to complete.
beeps5e8c45a2013-12-17 22:05:11 -0800485
Richard Barnette3a7697f2016-04-20 11:33:27 -0700486 @param wait_for_update If an update needs to be applied and
487 this is true, then don't return until the update is
488 downloaded and finalized, and the host rebooted.
beeps5e8c45a2013-12-17 22:05:11 -0800489 @raises dev_server.DevServerException: If all the devservers are down.
490 @raises site_utils.ParseBuildNameException: If the devserver returns
491 an invalid build name.
492 @raises autoupdater.ChromiumOSError: If something goes wrong in the
493 checking update engine client status or applying an update.
494 @raises AutoservRunError: If the update_engine_client isn't present on
495 the host, and the host is a cros_host.
J. Richard Barnette84895392015-04-30 12:31:01 -0700496
beeps5e8c45a2013-12-17 22:05:11 -0800497 """
Dan Shib795b5a2015-09-24 13:26:35 -0700498 # servod could be running in a Ubuntu workstation.
beeps5e8c45a2013-12-17 22:05:11 -0800499 if not self._is_cros_host():
500 logging.info('Not attempting an update, either %s is not running '
501 'chromeos or we cannot find enough information about '
502 'the host.', self.hostname)
503 return
504
Dan Shib795b5a2015-09-24 13:26:35 -0700505 if lsbrelease_utils.is_moblab():
506 logging.info('Not attempting an update, %s is running moblab.',
507 self.hostname)
508 return
509
Richard Barnette3a7697f2016-04-20 11:33:27 -0700510 board = _CONFIG.get_config_value('CROS', 'servo_board')
J. Richard Barnette84895392015-04-30 12:31:01 -0700511 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
512 target_version = afe.run('get_stable_version', board=board)
Dan Shi3b2adf62015-09-02 17:46:54 -0700513 build_pattern = _CONFIG.get_config_value(
J. Richard Barnette84895392015-04-30 12:31:01 -0700514 'CROS', 'stable_build_pattern')
515 target_build = build_pattern % (board, target_version)
516 target_build_number = server_site_utils.ParseBuildName(
517 target_build)[3]
beeps5e8c45a2013-12-17 22:05:11 -0800518 ds = dev_server.ImageServer.resolve(self.hostname)
J. Richard Barnette84895392015-04-30 12:31:01 -0700519 url = ds.get_update_url(target_build)
beeps5e8c45a2013-12-17 22:05:11 -0800520
521 updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700522 status, current_build_number = self._check_for_reboot(updater)
523 update_pending = True
beeps5e8c45a2013-12-17 22:05:11 -0800524 if status in autoupdater.UPDATER_PROCESSING_UPDATE:
525 logging.info('servo host %s already processing an update, update '
526 'engine client status=%s', self.hostname, status)
J. Richard Barnette84895392015-04-30 12:31:01 -0700527 elif current_build_number != target_build_number:
beeps5e8c45a2013-12-17 22:05:11 -0800528 logging.info('Using devserver url: %s to trigger update on '
529 'servo host %s, from %s to %s', url, self.hostname,
J. Richard Barnette84895392015-04-30 12:31:01 -0700530 current_build_number, target_build_number)
beeps5e8c45a2013-12-17 22:05:11 -0800531 try:
J. Richard Barnette84895392015-04-30 12:31:01 -0700532 ds.stage_artifacts(target_build,
533 artifacts=['full_payload'])
534 except Exception as e:
535 logging.error('Staging artifacts failed: %s', str(e))
536 logging.error('Abandoning update for this cycle.')
beeps5e8c45a2013-12-17 22:05:11 -0800537 else:
J. Richard Barnette84895392015-04-30 12:31:01 -0700538 try:
Richard Barnette7e53aa02016-05-20 10:49:40 -0700539 # TODO(jrbarnette): This 'touch' is a gross hack
540 # to get us past crbug.com/613603. Once that
541 # bug is resolved, we should remove this code.
542 self.run('touch /home/chronos/.oobe_completed')
J. Richard Barnette84895392015-04-30 12:31:01 -0700543 updater.trigger_update()
544 except autoupdater.RootFSUpdateError as e:
545 trigger_download_status = 'failed with %s' % str(e)
546 autotest_stats.Counter(
547 'servo_host.RootFSUpdateError').increment()
548 else:
549 trigger_download_status = 'passed'
550 logging.info('Triggered download and update %s for %s, '
551 'update engine currently in status %s',
552 trigger_download_status, self.hostname,
553 updater.check_update_status())
beeps5e8c45a2013-12-17 22:05:11 -0800554 else:
555 logging.info('servo host %s does not require an update.',
556 self.hostname)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700557 update_pending = False
558
559 if update_pending and wait_for_update:
560 logging.info('Waiting for servo update to complete.')
561 self.run('update_engine_client --follow', ignore_status=True)
562 status, current_build_number = self._check_for_reboot(updater)
563 if (status != autoupdater.UPDATER_IDLE or
564 current_build_number != target_build_number):
565 logging.error('Update failed; status: %s, '
566 'actual build: %s',
567 status, current_build_number)
568 message = ('Servo host failed to update from %s to %s' %
569 (current_build_number, target_build_number))
570 raise error.AutoservHostError(message)
beeps5e8c45a2013-12-17 22:05:11 -0800571
572
Fang Deng5d518f42013-08-02 14:04:32 -0700573 def verify_software(self):
beeps5e8c45a2013-12-17 22:05:11 -0800574 """Update the servo host and verify it's in a good state.
Fang Deng5d518f42013-08-02 14:04:32 -0700575
576 It overrides the base class function for verify_software.
beeps5e8c45a2013-12-17 22:05:11 -0800577 If an update is available, downloads and applies it. Then verifies:
Fang Deng5d518f42013-08-02 14:04:32 -0700578 1) Whether basic servo command can run successfully.
579 2) Whether USB is in a good state. crbug.com/225932
580
581 @raises ServoHostVerifyFailure if servo host does not pass the checks.
582
583 """
Richard Barnette79d78c42016-05-25 09:31:21 -0700584 # TODO(jrbarnette) Old versions of beaglebone_servo include
585 # the powerd package. In some (not yet understood)
586 # circumstances, powerd on beaglebone will shut down after
587 # attempting to suspend. Current versions of
588 # beaglebone_servo don't have powerd, but until we can purge
589 # the lab of the old images, we need to make sure powerd
590 # isn't running.
591 self.run('stop powerd', ignore_status=True)
592
beeps5e8c45a2013-12-17 22:05:11 -0800593 logging.info('Applying an update to the servo host, if necessary.')
Richard Barnette3a7697f2016-04-20 11:33:27 -0700594 self.update_image(wait_for_update=False)
Dan Shi33412a82014-06-10 15:12:27 -0700595 self._check_servo_config()
Dan Shie5b3c512014-08-21 12:12:09 -0700596 self._check_servod_status()
597
Dan Shi4d478522014-02-14 13:46:32 -0800598 # If servo is already initialized, we don't need to do it again, call
599 # _check_servod should be enough.
600 if self._servo:
601 self._check_servod()
602 else:
603 self._servo = servo.Servo(servo_host=self)
Dan Shie5b3c512014-08-21 12:12:09 -0700604 timeout, _ = retry.timeout(
605 self._servo.initialize_dut,
606 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
607 if timeout:
608 raise ServoHostVerifyFailure('Servo initialize timed out.')
Fang Deng5d518f42013-08-02 14:04:32 -0700609 logging.info('Sanity checks pass on servo host %s', self.hostname)
610
611
612 def _repair_with_sysrq_reboot(self):
613 """Reboot with magic SysRq key."""
614 self.reboot(timeout=self.REBOOT_TIMEOUT_SECS,
615 down_timeout=self.HOST_DOWN_TIMEOUT_SECS,
Richard Barnetteab9769f2016-06-01 15:01:44 -0700616 reboot_cmd='echo "b" > /proc/sysrq-trigger',
Fang Deng5d518f42013-08-02 14:04:32 -0700617 fastsync=True)
618 time.sleep(self.REBOOT_DELAY_SECS)
619
620
Fang Dengd4fe7392013-09-20 12:18:21 -0700621 def has_power(self):
622 """Return whether or not the servo host is powered by PoE."""
623 # TODO(fdeng): See crbug.com/302791
624 # For now, assume all servo hosts in the lab have power.
625 return self.is_in_lab()
626
627
628 def power_cycle(self):
629 """Cycle power to this host via PoE if it is a lab device.
630
631 @raises ServoHostRepairFailure if it fails to power cycle the
632 servo host.
633
634 """
635 if self.has_power():
636 try:
637 rpm_client.set_power(self.hostname, 'CYCLE')
638 except (socket.error, xmlrpclib.Error,
639 httplib.BadStatusLine,
640 rpm_client.RemotePowerException) as e:
641 raise ServoHostRepairFailure(
642 'Power cycling %s failed: %s' % (self.hostname, e))
643 else:
644 logging.info('Skipping power cycling, not a lab device.')
645
646
Fang Deng5d518f42013-08-02 14:04:32 -0700647 def _powercycle_to_repair(self):
Fang Dengd4fe7392013-09-20 12:18:21 -0700648 """Power cycle the servo host using PoE.
649
650 @raises ServoHostRepairFailure if it fails to fix the servo host.
Fang Dengf0ea6142013-10-10 21:43:16 -0700651 @raises ServoHostRepairMethodNA if it does not support power.
Fang Dengd4fe7392013-09-20 12:18:21 -0700652
653 """
654 if not self.has_power():
Fang Dengf0ea6142013-10-10 21:43:16 -0700655 raise ServoHostRepairMethodNA('%s does not support power.' %
656 self.hostname)
Fang Dengd4fe7392013-09-20 12:18:21 -0700657 logging.info('Attempting repair via PoE powercycle.')
658 failed_cycles = 0
659 self.power_cycle()
660 while not self.wait_up(timeout=self.REBOOT_TIMEOUT_SECS):
661 failed_cycles += 1
662 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
663 raise ServoHostRepairFailure(
664 'Powercycled host %s %d times; device did not come back'
665 ' online.' % (self.hostname, failed_cycles))
666 self.power_cycle()
667 logging.info('Powercycling was successful after %d failures.',
668 failed_cycles)
669 # Allow some time for servod to get started.
670 time.sleep(self.REBOOT_DELAY_SECS)
Fang Deng5d518f42013-08-02 14:04:32 -0700671
672
J. Richard Barnettec2d99cf2015-11-18 12:46:15 -0800673 def repair(self):
Fang Deng5d518f42013-08-02 14:04:32 -0700674 """Attempt to repair servo host.
675
676 This overrides the base class function for repair.
677 Note if the host is not in Cros Lab, the repair procedure
678 will be skipped.
679
680 @raises ServoHostRepairTotalFailure if all attempts fail.
681
682 """
683 if not self.is_in_lab():
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700684 logging.warning('Skip repairing servo host %s: Not a lab device.',
Fang Deng5d518f42013-08-02 14:04:32 -0700685 self.hostname)
686 return
687 logging.info('Attempting to repair servo host %s.', self.hostname)
Dan Shi4d478522014-02-14 13:46:32 -0800688 # Reset the cache to guarantee servo initialization being called later.
689 self._servo = None
Tom Wai-Hong Tam0635dce2016-06-02 02:17:50 +0800690 repair_funcs = [self._repair_with_sysrq_reboot,
691 self._powercycle_to_repair]
Fang Deng5d518f42013-08-02 14:04:32 -0700692 errors = []
693 for repair_func in repair_funcs:
Fang Dengf0ea6142013-10-10 21:43:16 -0700694 counter_prefix = 'servo_host_repair.%s.' % repair_func.__name__
Fang Deng5d518f42013-08-02 14:04:32 -0700695 try:
696 repair_func()
697 self.verify()
Gabe Black1e1c41b2015-02-04 23:55:15 -0800698 autotest_stats.Counter(counter_prefix + 'SUCCEEDED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700699 return
Fang Dengf0ea6142013-10-10 21:43:16 -0700700 except ServoHostRepairMethodNA as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700701 logging.warning('Repair method NA: %s', e)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800702 autotest_stats.Counter(counter_prefix + 'RepairNA').increment()
Fang Dengf0ea6142013-10-10 21:43:16 -0700703 errors.append(str(e))
Fang Deng5d518f42013-08-02 14:04:32 -0700704 except Exception as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700705 logging.warning('Failed to repair servo: %s', e)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800706 autotest_stats.Counter(counter_prefix + 'FAILED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700707 errors.append(str(e))
Gabe Black1e1c41b2015-02-04 23:55:15 -0800708 autotest_stats.Counter('servo_host_repair.Full_Repair_Failed'). \
709 increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700710 raise ServoHostRepairTotalFailure(
711 'All attempts at repairing the servo failed:\n%s' %
712 '\n'.join(errors))
713
714
Dan Shi4d478522014-02-14 13:46:32 -0800715 def get_servo(self):
716 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700717
Dan Shi4d478522014-02-14 13:46:32 -0800718 @return: a servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700719 """
Dan Shi4d478522014-02-14 13:46:32 -0800720 return self._servo
721
722
Richard Barnetteea3e4602016-06-10 12:36:41 -0700723def make_servo_hostname(dut_hostname):
724 """Given a DUT's hostname, return the hostname of its servo.
725
726 @param dut_hostname: hostname of a DUT.
727
728 @return hostname of the DUT's servo.
729
730 """
731 host_parts = dut_hostname.split('.')
732 host_parts[0] = host_parts[0] + '-servo'
733 return '.'.join(host_parts)
734
735
736def servo_host_is_up(servo_hostname):
737 """
738 Given a servo host name, return if it's up or not.
739
740 @param servo_hostname: hostname of the servo host.
741
742 @return True if it's up, False otherwise
743 """
744 # Technically, this duplicates the SSH ping done early in the servo
745 # proxy initialization code. However, this ping ends in a couple
746 # seconds when if fails, rather than the 60 seconds it takes to decide
747 # that an SSH ping has timed out. Specifically, that timeout happens
748 # when our servo DNS name resolves, but there is no host at that IP.
749 logging.info('Pinging servo host at %s', servo_hostname)
750 ping_config = ping_runner.PingConfig(
751 servo_hostname, count=3,
752 ignore_result=True, ignore_status=True)
753 return ping_runner.PingRunner().ping(ping_config).received > 0
754
755
Richard Barnettee519dcd2016-08-15 17:37:17 -0700756def _map_afe_board_to_servo_board(afe_board):
757 """Map a board we get from the AFE to a servo appropriate value.
758
759 Many boards are identical to other boards for servo's purposes.
760 This function makes that mapping.
761
762 @param afe_board string board name received from AFE.
763 @return board we expect servo to have.
764
765 """
766 KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
767 BOARD_MAP = {'gizmo': 'panther'}
768 mapped_board = afe_board
769 if afe_board in BOARD_MAP:
770 mapped_board = BOARD_MAP[afe_board]
771 else:
772 for suffix in KNOWN_SUFFIXES:
773 if afe_board.endswith(suffix):
774 mapped_board = afe_board[0:-len(suffix)]
775 break
776 if mapped_board != afe_board:
777 logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
778 return mapped_board
779
780
Richard Barnetteea3e4602016-06-10 12:36:41 -0700781def _get_standard_servo_args(dut_host):
782 """
783 Return servo data associated with a given DUT.
784
785 This checks for the presence of servo host and port attached to the
786 given `dut_host`. This data should be stored in the
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700787 `_afe_host.attributes` field in the provided `dut_host` parameter.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700788
789 @param dut_host Instance of `Host` on which to find the servo
790 attributes.
791 @return A tuple of `servo_args` dict with host and an option port,
792 plus an `is_in_lab` flag indicating whether this in the CrOS
793 test lab, or some different environment.
794 """
795 servo_args = None
796 is_in_lab = False
797 is_ssp_moblab = False
798 if utils.is_in_container():
799 is_moblab = _CONFIG.get_config_value(
800 'SSP', 'is_moblab', type=bool, default=False)
801 is_ssp_moblab = is_moblab
802 else:
803 is_moblab = utils.is_moblab()
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700804 attrs = dut_host._afe_host.attributes
Richard Barnetteea3e4602016-06-10 12:36:41 -0700805 if attrs and SERVO_HOST_ATTR in attrs:
806 servo_host = attrs[SERVO_HOST_ATTR]
807 if (is_ssp_moblab and servo_host in ['localhost', '127.0.0.1']):
808 servo_host = _CONFIG.get_config_value(
809 'SSP', 'host_container_ip', type=str, default=None)
810 servo_args = {SERVO_HOST_ATTR: servo_host}
811 if SERVO_PORT_ATTR in attrs:
812 servo_args[SERVO_PORT_ATTR] = attrs[SERVO_PORT_ATTR]
813 is_in_lab = (not is_moblab
814 and utils.host_is_in_lab_zone(servo_host))
815
816 # TODO(jrbarnette): This test to use the default lab servo hostname
817 # is a legacy that we need only until every host in the DB has
818 # proper attributes.
819 elif (not is_moblab and
820 not dnsname_mangler.is_ip_address(dut_host.hostname)):
821 servo_host = make_servo_hostname(dut_host.hostname)
822 is_in_lab = utils.host_is_in_lab_zone(servo_host)
823 if is_in_lab:
824 servo_args = {SERVO_HOST_ATTR: servo_host}
Richard Barnettee519dcd2016-08-15 17:37:17 -0700825 if servo_args is not None:
826 servo_board = afe_utils.get_board(dut_host)
827 servo_board = _map_afe_board_to_servo_board(servo_board)
828 servo_args[SERVO_BOARD_ATTR] = servo_board
Richard Barnetteea3e4602016-06-10 12:36:41 -0700829 return servo_args, is_in_lab
830
831
Dan Shi023aae32016-05-25 11:13:01 -0700832def create_servo_host(dut, servo_args, try_lab_servo=False,
833 skip_host_up_check=False):
Richard Barnetteea3e4602016-06-10 12:36:41 -0700834 """
835 Create a ServoHost object for a given DUT, if appropriate.
Dan Shi4d478522014-02-14 13:46:32 -0800836
Richard Barnetteea3e4602016-06-10 12:36:41 -0700837 This function attempts to create a `ServoHost` object for a servo
838 connected to the given `dut`. The function distinguishes these
839 cases:
840 * No servo parameters for the DUT can be determined. No servo
841 host is created.
842 * The servo host should be created if parameters can be
843 determined.
844 * The servo host should not be created even if parameters are
845 known.
Fang Denge545abb2014-12-30 18:43:47 -0800846
Richard Barnetteea3e4602016-06-10 12:36:41 -0700847 Servo parameters consist of a host name and port number, and are
848 determined from one of these sources, in order of priority:
849 * Servo attributes from the `dut` parameter take precedence over
850 all other sources of information.
851 * If a DNS entry for the servo based on the DUT hostname exists in
852 the CrOS lab network, that hostname is used with the default
853 port.
854 * If no other options are found, the parameters will be taken
855 from a `servo_args` dict passed in from the caller.
Fang Denge545abb2014-12-30 18:43:47 -0800856
Richard Barnetteea3e4602016-06-10 12:36:41 -0700857 A servo host object will be created if servo parameters can be
858 determined and any of the following criteria are met:
859 * The `servo_args` parameter was not `None`.
860 * The `skip_host_up_check` parameter is true.
861 * The `try_lab_servo` parameter is true, and the specified
862 servo host responds to ping.
Dan Shi4d478522014-02-14 13:46:32 -0800863
Richard Barnetteea3e4602016-06-10 12:36:41 -0700864 The servo host will be checked via `verify()` at the time of
865 creation. Failures are ignored unless the `servo_args` parameter
866 was not `None`. In that case:
867 * If the servo appears to be in the test lab, an attempt will
868 be made to repair it.
869 * If the error isn't repaired, the exception from `verify()` will
870 be passed back to the caller.
871
872 @param dut An instance of `Host` from which to take
873 servo parameters (if available).
874 @param servo_args A dictionary with servo parameters to use if
875 they can't be found from `dut`. If this
876 argument is supplied, unrepaired exceptions
877 from `verify()` will be passed back to the
878 caller.
879 @param try_lab_servo If not true, servo host creation will be
880 skipped unless otherwise required by the
881 caller.
882 @param skip_host_up_check If true, do not check whether the host
883 responds to ping.
Dan Shi4d478522014-02-14 13:46:32 -0800884
885 @returns: A ServoHost object or None. See comments above.
886
887 """
Dan Shi5401d2e2015-09-10 15:42:06 -0700888 required_by_test = servo_args is not None
Richard Barnetteea3e4602016-06-10 12:36:41 -0700889 is_in_lab = False
890 if try_lab_servo or required_by_test:
891 servo_args_override, is_in_lab = _get_standard_servo_args(dut)
892 if servo_args_override is not None:
893 servo_args = servo_args_override
894 if servo_args is None:
895 return None
896 if (required_by_test or skip_host_up_check
897 or servo_host_is_up(servo_args[SERVO_HOST_ATTR])):
898 return ServoHost(required_by_test=required_by_test,
899 is_in_lab=is_in_lab, **servo_args)
Dan Shi4d478522014-02-14 13:46:32 -0800900 else:
Dan Shibbb0cb62014-03-24 17:50:57 -0700901 return None