blob: c74a3782525c4b0969c88e623277cf0b91bf585c [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
12import httplib
13import logging
14import socket
Kevin Cheng79589982016-10-25 13:26:04 -070015import traceback
Fang Deng5d518f42013-08-02 14:04:32 -070016import xmlrpclib
17
18from autotest_lib.client.bin import utils
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070019from autotest_lib.client.common_lib import control_data
Fang Deng5d518f42013-08-02 14:04:32 -070020from autotest_lib.client.common_lib import error
beeps5e8c45a2013-12-17 22:05:11 -080021from autotest_lib.client.common_lib import global_config
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070022from autotest_lib.client.common_lib import host_states
Richard Barnette9a26ad62016-06-10 12:03:08 -070023from autotest_lib.client.common_lib import hosts
Dan Shi0942b1d2015-03-31 11:07:00 -070024from autotest_lib.client.common_lib import lsbrelease_utils
beeps5e8c45a2013-12-17 22:05:11 -080025from autotest_lib.client.common_lib.cros import autoupdater
26from autotest_lib.client.common_lib.cros import dev_server
Fang Deng5d518f42013-08-02 14:04:32 -070027from autotest_lib.client.common_lib.cros import retry
Kevin Cheng79589982016-10-25 13:26:04 -070028from autotest_lib.client.common_lib.cros.graphite import autotest_es
Christopher Wileycef1f902014-06-19 11:11:23 -070029from autotest_lib.client.common_lib.cros.network import ping_runner
Hsinyu Chaoe0b08e62015-08-11 10:50:37 +000030from autotest_lib.client.cros import constants as client_constants
Richard Barnettee519dcd2016-08-15 17:37:17 -070031from autotest_lib.server import afe_utils
beeps5e8c45a2013-12-17 22:05:11 -080032from autotest_lib.server import site_utils as server_site_utils
Cheng-Yi Chiang22612862015-08-20 20:39:57 +080033from autotest_lib.server.cros import dnsname_mangler
Simran Basi0739d682015-02-25 16:22:56 -080034from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070035from autotest_lib.server.cros.dynamic_suite import control_file_getter
Richard Barnette9a26ad62016-06-10 12:03:08 -070036from autotest_lib.server.cros.servo import servo
37from autotest_lib.server.hosts import servo_repair
Fang Deng5d518f42013-08-02 14:04:32 -070038from autotest_lib.server.hosts import ssh_host
Fang Dengd4fe7392013-09-20 12:18:21 -070039from autotest_lib.site_utils.rpm_control_system import rpm_client
Fang Deng5d518f42013-08-02 14:04:32 -070040
Aviv Keshet11836322016-11-22 11:32:01 -080041from chromite.lib import metrics
Fang Deng5d518f42013-08-02 14:04:32 -070042
Simran Basi0739d682015-02-25 16:22:56 -080043# Names of the host attributes in the database that represent the values for
44# the servo_host and servo_port for a servo connected to the DUT.
45SERVO_HOST_ATTR = 'servo_host'
46SERVO_PORT_ATTR = 'servo_port'
Richard Barnettee519dcd2016-08-15 17:37:17 -070047SERVO_BOARD_ATTR = 'servo_board'
Kevin Cheng643ce8a2016-09-15 15:42:12 -070048SERVO_SERIAL_ATTR = 'servo_serial'
Simran Basi0739d682015-02-25 16:22:56 -080049
Dan Shi3b2adf62015-09-02 17:46:54 -070050_CONFIG = global_config.global_config
xixuan6cf6d2f2016-01-29 15:29:00 -080051ENABLE_SSH_TUNNEL_FOR_SERVO = _CONFIG.get_config_value(
52 'CROS', 'enable_ssh_tunnel_for_servo', type=bool, default=False)
Simran Basi0739d682015-02-25 16:22:56 -080053
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070054AUTOTEST_BASE = _CONFIG.get_config_value(
55 'SCHEDULER', 'drone_installation_directory',
56 default='/usr/local/autotest')
57
58_SERVO_HOST_REBOOT_TEST_NAME = 'servohost_Reboot'
Kevin Cheng55265902016-10-19 12:46:50 -070059_SERVO_HOST_FORCE_REBOOT_TEST_NAME = 'servohost_Reboot.force_reboot'
Fang Deng5d518f42013-08-02 14:04:32 -070060
Fang Deng5d518f42013-08-02 14:04:32 -070061class ServoHost(ssh_host.SSHHost):
62 """Host class for a host that controls a servo, e.g. beaglebone."""
63
Richard Barnette9a26ad62016-06-10 12:03:08 -070064 DEFAULT_PORT = 9999
65
Dan Shie5b3c512014-08-21 12:12:09 -070066 # Timeout for initializing servo signals.
67 INITIALIZE_SERVO_TIMEOUT_SECS = 30
Richard Barnette9a26ad62016-06-10 12:03:08 -070068
xixuan6cf6d2f2016-01-29 15:29:00 -080069 # Ready test function
70 SERVO_READY_METHOD = 'get_version'
Fang Deng5d518f42013-08-02 14:04:32 -070071
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070072 REBOOT_CMD = 'sleep 1; reboot & sleep 10; reboot -f'
73
Fang Deng5d518f42013-08-02 14:04:32 -070074
Richard Barnette17bfc6c2016-08-04 18:41:43 -070075 def _initialize(self, servo_host='localhost',
Richard Barnettee519dcd2016-08-15 17:37:17 -070076 servo_port=DEFAULT_PORT, servo_board=None,
Kevin Cheng643ce8a2016-09-15 15:42:12 -070077 servo_serial=None, is_in_lab=None, *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -070078 """Initialize a ServoHost instance.
79
80 A ServoHost instance represents a host that controls a servo.
81
82 @param servo_host: Name of the host where the servod process
83 is running.
84 @param servo_port: Port the servod process is listening on.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070085 @param servo_board: Board that the servo is connected to.
Dan Shi4d478522014-02-14 13:46:32 -080086 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
87 to None, for which utils.host_is_in_lab_zone will be
88 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -070089
90 """
91 super(ServoHost, self)._initialize(hostname=servo_host,
92 *args, **dargs)
Richard Barnettee519dcd2016-08-15 17:37:17 -070093 self.servo_port = servo_port
94 self.servo_board = servo_board
Kevin Cheng643ce8a2016-09-15 15:42:12 -070095 self.servo_serial = servo_serial
Richard Barnettee519dcd2016-08-15 17:37:17 -070096 self._servo = None
Richard Barnette9a26ad62016-06-10 12:03:08 -070097 self._repair_strategy = (
98 servo_repair.create_servo_repair_strategy())
Richard Barnettee519dcd2016-08-15 17:37:17 -070099 self._is_localhost = (self.hostname == 'localhost')
100 if self._is_localhost:
101 self._is_in_lab = False
102 elif is_in_lab is None:
Dan Shi4d478522014-02-14 13:46:32 -0800103 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
104 else:
105 self._is_in_lab = is_in_lab
xixuan6cf6d2f2016-01-29 15:29:00 -0800106
Richard Barnettee519dcd2016-08-15 17:37:17 -0700107 # Commands on the servo host must be run by the superuser.
108 # Our account on a remote host is root, but if our target is
109 # localhost then we might be running unprivileged. If so,
110 # `sudo` will have to be added to the commands.
Fang Deng5d518f42013-08-02 14:04:32 -0700111 if self._is_localhost:
112 self._sudo_required = utils.system_output('id -u') != '0'
113 else:
114 self._sudo_required = False
Richard Barnettee519dcd2016-08-15 17:37:17 -0700115
Richard Barnette9a26ad62016-06-10 12:03:08 -0700116
117 def connect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700118 """Establish a connection to the servod server on this host.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700119
120 Initializes `self._servo` and then verifies that all network
121 connections are working. This will create an ssh tunnel if
122 it's required.
123
124 As a side effect of testing the connection, all signals on the
125 target servo are reset to default values, and the USB stick is
126 set to the neutral (off) position.
127 """
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700128 servo_obj = servo.Servo(servo_host=self, servo_serial=self.servo_serial)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700129 timeout, _ = retry.timeout(
130 servo_obj.initialize_dut,
131 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
132 if timeout:
133 raise hosts.AutoservVerifyError(
134 'Servo initialize timed out.')
135 self._servo = servo_obj
136
137
138 def disconnect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700139 """Disconnect our servo if it exists.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700140
141 If we've previously successfully connected to our servo,
142 disconnect any established ssh tunnel, and set `self._servo`
143 back to `None`.
144 """
145 if self._servo:
146 # N.B. This call is safe even without a tunnel:
147 # rpc_server_tracker.disconnect() silently ignores
148 # unknown ports.
149 self.rpc_server_tracker.disconnect(self.servo_port)
150 self._servo = None
Fang Deng5d518f42013-08-02 14:04:32 -0700151
152
153 def is_in_lab(self):
154 """Check whether the servo host is a lab device.
155
156 @returns: True if the servo host is in Cros Lab, otherwise False.
157
158 """
159 return self._is_in_lab
160
161
162 def is_localhost(self):
163 """Checks whether the servo host points to localhost.
164
165 @returns: True if it points to localhost, otherwise False.
166
167 """
168 return self._is_localhost
169
170
171 def get_servod_server_proxy(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700172 """Return a proxy that can be used to communicate with servod server.
Fang Deng5d518f42013-08-02 14:04:32 -0700173
174 @returns: An xmlrpclib.ServerProxy that is connected to the servod
175 server on the host.
Fang Deng5d518f42013-08-02 14:04:32 -0700176 """
Richard Barnette9a26ad62016-06-10 12:03:08 -0700177 if ENABLE_SSH_TUNNEL_FOR_SERVO and not self.is_localhost():
178 return self.rpc_server_tracker.xmlrpc_connect(
179 None, self.servo_port,
180 ready_test_name=self.SERVO_READY_METHOD,
181 timeout_seconds=60)
182 else:
183 remote = 'http://%s:%s' % (self.hostname, self.servo_port)
184 return xmlrpclib.ServerProxy(remote)
Fang Deng5d518f42013-08-02 14:04:32 -0700185
186
Richard Barnette9a26ad62016-06-10 12:03:08 -0700187 def is_cros_host(self):
beeps5e8c45a2013-12-17 22:05:11 -0800188 """Check if a servo host is running chromeos.
189
190 @return: True if the servo host is running chromeos.
191 False if it isn't, or we don't have enough information.
192 """
193 try:
194 result = self.run('grep -q CHROMEOS /etc/lsb-release',
195 ignore_status=True, timeout=10)
196 except (error.AutoservRunError, error.AutoservSSHTimeout):
197 return False
198 return result.exit_status == 0
199
200
Fang Deng5d518f42013-08-02 14:04:32 -0700201 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
202 connect_timeout=None, alive_interval=None):
203 """Override default make_ssh_command to use tuned options.
204
205 Tuning changes:
206 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
207 connection failure. Consistency with remote_access.py.
208
209 - ServerAliveInterval=180; which causes SSH to ping connection every
210 180 seconds. In conjunction with ServerAliveCountMax ensures
211 that if the connection dies, Autotest will bail out quickly.
212
213 - ServerAliveCountMax=3; consistency with remote_access.py.
214
215 - ConnectAttempts=4; reduce flakiness in connection errors;
216 consistency with remote_access.py.
217
218 - UserKnownHostsFile=/dev/null; we don't care about the keys.
219
220 - SSH protocol forced to 2; needed for ServerAliveInterval.
221
222 @param user User name to use for the ssh connection.
223 @param port Port on the target host to use for ssh connection.
224 @param opts Additional options to the ssh command.
225 @param hosts_file Ignored.
226 @param connect_timeout Ignored.
227 @param alive_interval Ignored.
228
229 @returns: An ssh command with the requested settings.
230
231 """
232 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
233 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
234 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
235 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
236 ' -o Protocol=2 -l %s -p %d')
237 return base_command % (opts, user, port)
238
239
240 def _make_scp_cmd(self, sources, dest):
241 """Format scp command.
242
243 Given a list of source paths and a destination path, produces the
244 appropriate scp command for encoding it. Remote paths must be
245 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
246 to allow additional ssh options.
247
248 @param sources: A list of source paths to copy from.
249 @param dest: Destination path to copy to.
250
251 @returns: An scp command that copies |sources| on local machine to
252 |dest| on the remote servo host.
253
254 """
255 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
256 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
257 return command % (self.master_ssh_option,
258 self.port, ' '.join(sources), dest)
259
260
261 def run(self, command, timeout=3600, ignore_status=False,
262 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
Luigi Semenzatobfbd1f32017-01-06 10:41:18 -0800263 connect_timeout=30, ssh_failure_retry_ok=False,
264 options='', stdin=None, verbose=True, args=()):
Fang Deng5d518f42013-08-02 14:04:32 -0700265 """Run a command on the servo host.
266
267 Extends method `run` in SSHHost. If the servo host is a remote device,
268 it will call `run` in SSHost without changing anything.
269 If the servo host is 'localhost', it will call utils.system_output.
270
271 @param command: The command line string.
272 @param timeout: Time limit in seconds before attempting to
273 kill the running process. The run() function
274 will take a few seconds longer than 'timeout'
275 to complete if it has to kill the process.
276 @param ignore_status: Do not raise an exception, no matter
277 what the exit code of the command is.
278 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
279 @param connect_timeout: SSH connection timeout (in seconds)
280 Ignored if host is 'localhost'.
281 @param options: String with additional ssh command options
282 Ignored if host is 'localhost'.
Luigi Semenzatobfbd1f32017-01-06 10:41:18 -0800283 @param ssh_failure_retry_ok: when True and ssh connection failure is
284 suspected, OK to retry command (but not
285 compulsory, and likely not needed here)
Fang Deng5d518f42013-08-02 14:04:32 -0700286 @param stdin: Stdin to pass (a string) to the executed command.
287 @param verbose: Log the commands.
288 @param args: Sequence of strings to pass as arguments to command by
289 quoting them in " and escaping their contents if necessary.
290
291 @returns: A utils.CmdResult object.
292
293 @raises AutoservRunError if the command failed.
294 @raises AutoservSSHTimeout SSH connection has timed out. Only applies
295 when servo host is not 'localhost'.
296
297 """
298 run_args = {'command': command, 'timeout': timeout,
299 'ignore_status': ignore_status, 'stdout_tee': stdout_tee,
300 'stderr_tee': stderr_tee, 'stdin': stdin,
301 'verbose': verbose, 'args': args}
302 if self.is_localhost():
303 if self._sudo_required:
Michael Tangf9b3ada2016-11-18 16:01:05 -0800304 run_args['command'] = 'sudo -n sh -c "%s"' % utils.sh_escape(
305 command)
Fang Deng5d518f42013-08-02 14:04:32 -0700306 try:
307 return utils.run(**run_args)
308 except error.CmdError as e:
309 logging.error(e)
310 raise error.AutoservRunError('command execution error',
311 e.result_obj)
312 else:
313 run_args['connect_timeout'] = connect_timeout
314 run_args['options'] = options
315 return super(ServoHost, self).run(**run_args)
316
317
Richard Barnette9a26ad62016-06-10 12:03:08 -0700318 def _get_release_version(self):
Dan Shi0942b1d2015-03-31 11:07:00 -0700319 """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release.
320
321 @returns The version string in lsb-release, under attribute
322 CHROMEOS_RELEASE_VERSION.
323 """
324 lsb_release_content = self.run(
325 'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip()
326 return lsbrelease_utils.get_chromeos_release_version(
327 lsb_release_content=lsb_release_content)
328
329
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700330 def get_attached_duts(self, afe):
331 """Gather a list of duts that use this servo host.
332
333 @param afe: afe instance.
334
335 @returns list of duts.
Richard Barnette3a7697f2016-04-20 11:33:27 -0700336 """
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700337 return afe.get_hosts_by_attribute(
338 attribute=SERVO_HOST_ATTR, value=self.hostname)
339
340
341 def get_board(self):
342 """Determine the board for this servo host.
343
344 @returns a string representing this servo host's board.
345 """
346 return lsbrelease_utils.get_current_board(
347 lsb_release_content=self.run('cat /etc/lsb-release').stdout)
348
349
350 def _choose_dut_for_synchronized_reboot(self, dut_list, afe):
351 """Choose which dut to schedule servo host reboot job.
352
353 We'll want a semi-deterministic way of selecting which host should be
354 scheduled for the servo host reboot job. For now we'll sort the
355 list with the expectation the dut list will stay consistent.
356 From there we'll grab the first dut that is available so we
357 don't schedule a job on a dut that will never run.
358
359 @param dut_list: List of the dut hostnames to choose from.
360 @param afe: Instance of the AFE.
361
362 @return hostname of dut to schedule job on.
363 """
364 afe_hosts = afe.get_hosts(dut_list)
365 afe_hosts.sort()
366 for afe_host in afe_hosts:
367 if afe_host.status not in host_states.UNAVAILABLE_STATES:
368 return afe_host.hostname
369 # If they're all unavailable, just return the first sorted dut.
370 dut_list.sort()
371 return dut_list[0]
372
373
374 def _sync_job_scheduled_for_duts(self, dut_list, afe):
375 """Checks if a synchronized reboot has been scheduled for these duts.
376
377 Grab all the host queue entries that aren't completed for the duts and
378 see if any of them have the expected job name.
379
380 @param dut_list: List of duts to check on.
381 @param afe: Instance of the AFE.
382
383 @returns True if the job is scheduled, False otherwise.
384 """
385 afe_hosts = afe.get_hosts(dut_list)
386 for afe_host in afe_hosts:
387 hqes = afe.get_host_queue_entries(host=afe_host.id, complete=0)
388 for hqe in hqes:
389 job = afe.get_jobs(id=hqe.job.id)
Kevin Cheng55265902016-10-19 12:46:50 -0700390 if job and job[0].name in (_SERVO_HOST_REBOOT_TEST_NAME,
391 _SERVO_HOST_FORCE_REBOOT_TEST_NAME):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700392 return True
393 return False
394
395
Kevin Cheng55265902016-10-19 12:46:50 -0700396 def schedule_synchronized_reboot(self, dut_list, afe, force_reboot=False):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700397 """Schedule a job to reboot the servo host.
398
399 When we schedule a job, it will create a ServoHost object which will
400 go through this entire flow of checking if a reboot is needed and
401 trying to schedule it. There is probably a better approach to setting
402 up a synchronized reboot but I'm coming up short on better ideas so I
403 apologize for this circus show.
404
Kevin Cheng55265902016-10-19 12:46:50 -0700405 @param dut_list: List of duts that need to be locked.
406 @param afe: Instance of afe.
407 @param force_reboot: Boolean to indicate if a forced reboot should be
408 scheduled or not.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700409 """
410 # If we've already scheduled job on a dut, we're done here.
411 if self._sync_job_scheduled_for_duts(dut_list, afe):
412 return
413
414 # Looks like we haven't scheduled a job yet.
Kevin Cheng55265902016-10-19 12:46:50 -0700415 test = (_SERVO_HOST_REBOOT_TEST_NAME if not force_reboot
416 else _SERVO_HOST_FORCE_REBOOT_TEST_NAME)
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700417 dut = self._choose_dut_for_synchronized_reboot(dut_list, afe)
418 getter = control_file_getter.FileSystemGetter([AUTOTEST_BASE])
Kevin Cheng55265902016-10-19 12:46:50 -0700419 control_file = getter.get_control_file_contents_by_name(test)
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700420 control_type = control_data.CONTROL_TYPE_NAMES.SERVER
Kevin Cheng79589982016-10-25 13:26:04 -0700421 try:
422 afe.create_job(control_file=control_file, name=test,
423 control_type=control_type, hosts=[dut])
424 except Exception as e:
425 # Sometimes creating the job will raise an exception. We'll log it
426 # but we don't want to fail because of it.
427 logging.exception('Scheduling reboot job failed: %s', e)
428 metadata = {'dut': dut,
429 'servo_host': self.hostname,
430 'error': str(e),
431 'details': traceback.format_exc()}
432 # We want to track how often we fail here so we can justify
433 # investing some effort into hardening up afe.create_job().
434 autotest_es.post(use_http=True,
435 type_str='servohost_Reboot_schedule_fail',
436 metadata=metadata)
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700437
438
439 def reboot(self, *args, **dargs):
440 """Reboot using special servo host reboot command."""
441 super(ServoHost, self).reboot(reboot_cmd=self.REBOOT_CMD,
442 *args, **dargs)
443
444
445 def _check_for_reboot(self, updater):
446 """Reboot this servo host if an upgrade is waiting.
Richard Barnette3a7697f2016-04-20 11:33:27 -0700447
448 If the host has successfully downloaded and finalized a new
449 build, reboot.
450
451 @param updater: a ChromiumOSUpdater instance for checking
452 whether reboot is needed.
453 @return Return a (status, build) tuple reflecting the
454 update_engine status and current build of the host
455 at the end of the call.
456 """
Richard Barnette9a26ad62016-06-10 12:03:08 -0700457 current_build_number = self._get_release_version()
Richard Barnette3a7697f2016-04-20 11:33:27 -0700458 status = updater.check_update_status()
459 if status == autoupdater.UPDATER_NEED_REBOOT:
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700460 # Check if we need to schedule an organized reboot.
Kevin Cheng79589982016-10-25 13:26:04 -0700461 afe = frontend_wrappers.RetryingAFE(
462 timeout_min=5, delay_sec=10,
463 server=server_site_utils.get_global_afe_hostname())
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700464 dut_list = self.get_attached_duts(afe)
465 logging.info('servo host has the following duts: %s', dut_list)
466 if len(dut_list) > 1:
467 logging.info('servo host has multiple duts, scheduling '
468 'synchronized reboot')
469 self.schedule_synchronized_reboot(dut_list, afe)
470 return status, current_build_number
471
472 logging.info('Rebooting servo host %s from build %s',
Richard Barnette3a7697f2016-04-20 11:33:27 -0700473 self.hostname, current_build_number)
474 # Tell the reboot() call not to wait for completion.
475 # Otherwise, the call will log reboot failure if servo does
476 # not come back. The logged reboot failure will lead to
477 # test job failure. If the test does not require servo, we
478 # don't want servo failure to fail the test with error:
479 # `Host did not return from reboot` in status.log.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700480 self.reboot(fastsync=True, wait=False)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700481
482 # We told the reboot() call not to wait, but we need to wait
483 # for the reboot before we continue. Alas. The code from
484 # here below is basically a copy of Host.wait_for_restart(),
485 # with the logging bits ripped out, so that they can't cause
486 # the failure logging problem described above.
487 #
488 # The black stain that this has left on my soul can never be
489 # erased.
490 old_boot_id = self.get_boot_id()
491 if not self.wait_down(timeout=self.WAIT_DOWN_REBOOT_TIMEOUT,
492 warning_timer=self.WAIT_DOWN_REBOOT_WARNING,
493 old_boot_id=old_boot_id):
494 raise error.AutoservHostError(
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700495 'servo host %s failed to shut down.' %
496 self.hostname)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700497 if self.wait_up(timeout=120):
Richard Barnette9a26ad62016-06-10 12:03:08 -0700498 current_build_number = self._get_release_version()
Richard Barnette3a7697f2016-04-20 11:33:27 -0700499 status = updater.check_update_status()
500 logging.info('servo host %s back from reboot, with build %s',
501 self.hostname, current_build_number)
502 else:
503 raise error.AutoservHostError(
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700504 'servo host %s failed to come back from reboot.' %
505 self.hostname)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700506 return status, current_build_number
507
508
Richard Barnette3a7697f2016-04-20 11:33:27 -0700509 def update_image(self, wait_for_update=False):
beeps5e8c45a2013-12-17 22:05:11 -0800510 """Update the image on the servo host, if needed.
511
J. Richard Barnette84895392015-04-30 12:31:01 -0700512 This method recognizes the following cases:
513 * If the Host is not running Chrome OS, do nothing.
514 * If a previously triggered update is now complete, reboot
515 to the new version.
516 * If the host is processing a previously triggered update,
517 do nothing.
518 * If the host is running a version of Chrome OS different
519 from the default for servo Hosts, trigger an update, but
520 don't wait for it to complete.
beeps5e8c45a2013-12-17 22:05:11 -0800521
Richard Barnette3a7697f2016-04-20 11:33:27 -0700522 @param wait_for_update If an update needs to be applied and
523 this is true, then don't return until the update is
524 downloaded and finalized, and the host rebooted.
beeps5e8c45a2013-12-17 22:05:11 -0800525 @raises dev_server.DevServerException: If all the devservers are down.
526 @raises site_utils.ParseBuildNameException: If the devserver returns
527 an invalid build name.
528 @raises autoupdater.ChromiumOSError: If something goes wrong in the
529 checking update engine client status or applying an update.
530 @raises AutoservRunError: If the update_engine_client isn't present on
531 the host, and the host is a cros_host.
J. Richard Barnette84895392015-04-30 12:31:01 -0700532
beeps5e8c45a2013-12-17 22:05:11 -0800533 """
Dan Shib795b5a2015-09-24 13:26:35 -0700534 # servod could be running in a Ubuntu workstation.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700535 if not self.is_cros_host():
beeps5e8c45a2013-12-17 22:05:11 -0800536 logging.info('Not attempting an update, either %s is not running '
537 'chromeos or we cannot find enough information about '
538 'the host.', self.hostname)
539 return
540
Dan Shib795b5a2015-09-24 13:26:35 -0700541 if lsbrelease_utils.is_moblab():
542 logging.info('Not attempting an update, %s is running moblab.',
543 self.hostname)
544 return
545
Richard Barnette383ef9c2016-12-13 11:56:49 -0800546 target_build = afe_utils.get_stable_cros_image_name(self.get_board())
J. Richard Barnette84895392015-04-30 12:31:01 -0700547 target_build_number = server_site_utils.ParseBuildName(
548 target_build)[3]
xixuanfa2d92a2016-12-09 09:45:27 -0800549 # For servo image staging, we want it as more widely distributed as
550 # possible, so that devservers' load can be evenly distributed. So use
551 # hostname instead of target_build as hash.
552 ds = dev_server.ImageServer.resolve(self.hostname,
553 hostname=self.hostname)
J. Richard Barnette84895392015-04-30 12:31:01 -0700554 url = ds.get_update_url(target_build)
beeps5e8c45a2013-12-17 22:05:11 -0800555
556 updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700557 status, current_build_number = self._check_for_reboot(updater)
558 update_pending = True
beeps5e8c45a2013-12-17 22:05:11 -0800559 if status in autoupdater.UPDATER_PROCESSING_UPDATE:
560 logging.info('servo host %s already processing an update, update '
561 'engine client status=%s', self.hostname, status)
J. Richard Barnette84895392015-04-30 12:31:01 -0700562 elif current_build_number != target_build_number:
beeps5e8c45a2013-12-17 22:05:11 -0800563 logging.info('Using devserver url: %s to trigger update on '
564 'servo host %s, from %s to %s', url, self.hostname,
J. Richard Barnette84895392015-04-30 12:31:01 -0700565 current_build_number, target_build_number)
beeps5e8c45a2013-12-17 22:05:11 -0800566 try:
J. Richard Barnette84895392015-04-30 12:31:01 -0700567 ds.stage_artifacts(target_build,
568 artifacts=['full_payload'])
569 except Exception as e:
570 logging.error('Staging artifacts failed: %s', str(e))
571 logging.error('Abandoning update for this cycle.')
beeps5e8c45a2013-12-17 22:05:11 -0800572 else:
J. Richard Barnette84895392015-04-30 12:31:01 -0700573 try:
Richard Barnette7e53aa02016-05-20 10:49:40 -0700574 # TODO(jrbarnette): This 'touch' is a gross hack
575 # to get us past crbug.com/613603. Once that
576 # bug is resolved, we should remove this code.
577 self.run('touch /home/chronos/.oobe_completed')
J. Richard Barnette84895392015-04-30 12:31:01 -0700578 updater.trigger_update()
579 except autoupdater.RootFSUpdateError as e:
580 trigger_download_status = 'failed with %s' % str(e)
Aviv Keshet11836322016-11-22 11:32:01 -0800581 metrics.Counter('chromeos/autotest/servo/'
582 'rootfs_update_failed').increment()
J. Richard Barnette84895392015-04-30 12:31:01 -0700583 else:
584 trigger_download_status = 'passed'
585 logging.info('Triggered download and update %s for %s, '
586 'update engine currently in status %s',
587 trigger_download_status, self.hostname,
588 updater.check_update_status())
beeps5e8c45a2013-12-17 22:05:11 -0800589 else:
590 logging.info('servo host %s does not require an update.',
591 self.hostname)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700592 update_pending = False
593
594 if update_pending and wait_for_update:
595 logging.info('Waiting for servo update to complete.')
596 self.run('update_engine_client --follow', ignore_status=True)
beeps5e8c45a2013-12-17 22:05:11 -0800597
598
Richard Barnette1edbb162016-11-01 11:47:50 -0700599 def verify(self, silent=False):
600 """Update the servo host and verify it's in a good state.
601
602 @param silent If true, suppress logging in `status.log`.
603 """
Richard Barnette79d78c42016-05-25 09:31:21 -0700604 # TODO(jrbarnette) Old versions of beaglebone_servo include
Richard Barnette9a26ad62016-06-10 12:03:08 -0700605 # the powerd package. If you touch the .oobe_completed file
606 # (as we do to work around an update_engine problem), then
607 # powerd will eventually shut down the beaglebone for lack
608 # of (apparent) activity. Current versions of
Richard Barnette79d78c42016-05-25 09:31:21 -0700609 # beaglebone_servo don't have powerd, but until we can purge
610 # the lab of the old images, we need to make sure powerd
611 # isn't running.
612 self.run('stop powerd', ignore_status=True)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700613 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700614 self._repair_strategy.verify(self, silent)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700615 except:
616 self.disconnect_servo()
617 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700618
619
Richard Barnette1edbb162016-11-01 11:47:50 -0700620 def repair(self, silent=False):
621 """Attempt to repair servo host.
622
623 @param silent If true, suppress logging in `status.log`.
624 """
Richard Barnette9a26ad62016-06-10 12:03:08 -0700625 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700626 self._repair_strategy.repair(self, silent)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700627 except:
628 self.disconnect_servo()
629 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700630
631
Fang Dengd4fe7392013-09-20 12:18:21 -0700632 def has_power(self):
633 """Return whether or not the servo host is powered by PoE."""
634 # TODO(fdeng): See crbug.com/302791
635 # For now, assume all servo hosts in the lab have power.
636 return self.is_in_lab()
637
638
639 def power_cycle(self):
640 """Cycle power to this host via PoE if it is a lab device.
641
Richard Barnette9a26ad62016-06-10 12:03:08 -0700642 @raises AutoservRepairError if it fails to power cycle the
Fang Dengd4fe7392013-09-20 12:18:21 -0700643 servo host.
644
645 """
646 if self.has_power():
647 try:
648 rpm_client.set_power(self.hostname, 'CYCLE')
649 except (socket.error, xmlrpclib.Error,
650 httplib.BadStatusLine,
651 rpm_client.RemotePowerException) as e:
Richard Barnette9a26ad62016-06-10 12:03:08 -0700652 raise hosts.AutoservRepairError(
Fang Dengd4fe7392013-09-20 12:18:21 -0700653 'Power cycling %s failed: %s' % (self.hostname, e))
654 else:
655 logging.info('Skipping power cycling, not a lab device.')
656
657
Dan Shi4d478522014-02-14 13:46:32 -0800658 def get_servo(self):
659 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700660
Dan Shi4d478522014-02-14 13:46:32 -0800661 @return: a servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700662 """
Dan Shi4d478522014-02-14 13:46:32 -0800663 return self._servo
664
665
Richard Barnetteea3e4602016-06-10 12:36:41 -0700666def make_servo_hostname(dut_hostname):
667 """Given a DUT's hostname, return the hostname of its servo.
668
669 @param dut_hostname: hostname of a DUT.
670
671 @return hostname of the DUT's servo.
672
673 """
674 host_parts = dut_hostname.split('.')
675 host_parts[0] = host_parts[0] + '-servo'
676 return '.'.join(host_parts)
677
678
679def servo_host_is_up(servo_hostname):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700680 """Given a servo host name, return if it's up or not.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700681
682 @param servo_hostname: hostname of the servo host.
683
684 @return True if it's up, False otherwise
685 """
686 # Technically, this duplicates the SSH ping done early in the servo
687 # proxy initialization code. However, this ping ends in a couple
688 # seconds when if fails, rather than the 60 seconds it takes to decide
689 # that an SSH ping has timed out. Specifically, that timeout happens
690 # when our servo DNS name resolves, but there is no host at that IP.
691 logging.info('Pinging servo host at %s', servo_hostname)
692 ping_config = ping_runner.PingConfig(
693 servo_hostname, count=3,
694 ignore_result=True, ignore_status=True)
695 return ping_runner.PingRunner().ping(ping_config).received > 0
696
697
Richard Barnettee519dcd2016-08-15 17:37:17 -0700698def _map_afe_board_to_servo_board(afe_board):
699 """Map a board we get from the AFE to a servo appropriate value.
700
701 Many boards are identical to other boards for servo's purposes.
702 This function makes that mapping.
703
704 @param afe_board string board name received from AFE.
705 @return board we expect servo to have.
706
707 """
708 KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
709 BOARD_MAP = {'gizmo': 'panther'}
710 mapped_board = afe_board
711 if afe_board in BOARD_MAP:
712 mapped_board = BOARD_MAP[afe_board]
713 else:
714 for suffix in KNOWN_SUFFIXES:
715 if afe_board.endswith(suffix):
716 mapped_board = afe_board[0:-len(suffix)]
717 break
718 if mapped_board != afe_board:
719 logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
720 return mapped_board
721
722
Richard Barnetteea3e4602016-06-10 12:36:41 -0700723def _get_standard_servo_args(dut_host):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700724 """Return servo data associated with a given DUT.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700725
726 This checks for the presence of servo host and port attached to the
727 given `dut_host`. This data should be stored in the
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700728 `_afe_host.attributes` field in the provided `dut_host` parameter.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700729
730 @param dut_host Instance of `Host` on which to find the servo
731 attributes.
732 @return A tuple of `servo_args` dict with host and an option port,
733 plus an `is_in_lab` flag indicating whether this in the CrOS
734 test lab, or some different environment.
735 """
736 servo_args = None
737 is_in_lab = False
738 is_ssp_moblab = False
739 if utils.is_in_container():
740 is_moblab = _CONFIG.get_config_value(
741 'SSP', 'is_moblab', type=bool, default=False)
742 is_ssp_moblab = is_moblab
743 else:
744 is_moblab = utils.is_moblab()
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700745 attrs = dut_host._afe_host.attributes
Richard Barnetteea3e4602016-06-10 12:36:41 -0700746 if attrs and SERVO_HOST_ATTR in attrs:
747 servo_host = attrs[SERVO_HOST_ATTR]
748 if (is_ssp_moblab and servo_host in ['localhost', '127.0.0.1']):
749 servo_host = _CONFIG.get_config_value(
750 'SSP', 'host_container_ip', type=str, default=None)
751 servo_args = {SERVO_HOST_ATTR: servo_host}
752 if SERVO_PORT_ATTR in attrs:
Kevin Cheng692e5292016-08-14 00:23:24 -0700753 try:
754 servo_port = attrs[SERVO_PORT_ATTR]
755 servo_args[SERVO_PORT_ATTR] = int(servo_port)
756 except ValueError:
757 logging.error('servo port is not an int: %s', servo_port)
758 # Let's set the servo args to None since we're not creating
759 # the ServoHost object with the proper port now.
760 servo_args = None
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700761 if SERVO_SERIAL_ATTR in attrs:
762 servo_args[SERVO_SERIAL_ATTR] = attrs[SERVO_SERIAL_ATTR]
Richard Barnetteea3e4602016-06-10 12:36:41 -0700763 is_in_lab = (not is_moblab
764 and utils.host_is_in_lab_zone(servo_host))
765
766 # TODO(jrbarnette): This test to use the default lab servo hostname
767 # is a legacy that we need only until every host in the DB has
768 # proper attributes.
769 elif (not is_moblab and
770 not dnsname_mangler.is_ip_address(dut_host.hostname)):
771 servo_host = make_servo_hostname(dut_host.hostname)
772 is_in_lab = utils.host_is_in_lab_zone(servo_host)
773 if is_in_lab:
774 servo_args = {SERVO_HOST_ATTR: servo_host}
Richard Barnette9a26ad62016-06-10 12:03:08 -0700775 if servo_args is not None:
776 servo_board = afe_utils.get_board(dut_host)
777 if servo_board is not None:
778 servo_board = _map_afe_board_to_servo_board(servo_board)
779 servo_args[SERVO_BOARD_ATTR] = servo_board
Richard Barnetteea3e4602016-06-10 12:36:41 -0700780 return servo_args, is_in_lab
781
782
Dan Shi023aae32016-05-25 11:13:01 -0700783def create_servo_host(dut, servo_args, try_lab_servo=False,
Richard Barnette9a26ad62016-06-10 12:03:08 -0700784 try_servo_repair=False):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700785 """Create a ServoHost object for a given DUT, if appropriate.
Dan Shi4d478522014-02-14 13:46:32 -0800786
Richard Barnette9a26ad62016-06-10 12:03:08 -0700787 This function attempts to create and verify or repair a `ServoHost`
788 object for a servo connected to the given `dut`, subject to various
789 constraints imposed by the parameters:
790 * When the `servo_args` parameter is not `None`, a servo
791 host must be created, and must be checked with `repair()`.
792 * Otherwise, if a servo exists in the lab and `try_lab_servo` is
793 true:
794 * If `try_servo_repair` is true, then create a servo host and
795 check it with `repair()`.
796 * Otherwise, if the servo responds to `ping` then create a
797 servo host and check it with `verify()`.
Fang Denge545abb2014-12-30 18:43:47 -0800798
Richard Barnette9a26ad62016-06-10 12:03:08 -0700799 In cases where `servo_args` was not `None`, repair failure
800 exceptions are passed back to the caller; otherwise, exceptions
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700801 are logged and then discarded. Note that this only happens in cases
802 where we're called from a test (not special task) control file that
803 has an explicit dependency on servo. In that case, we require that
804 repair not write to `status.log`, so as to avoid polluting test
805 results.
806
807 TODO(jrbarnette): The special handling for servo in test control
808 files is a thorn in my flesh; I dearly hope to see it cut out before
809 my retirement.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700810
811 Parameters for a servo host consist of a host name, port number, and
812 DUT board, and are determined from one of these sources, in order of
813 priority:
Richard Barnetteea3e4602016-06-10 12:36:41 -0700814 * Servo attributes from the `dut` parameter take precedence over
815 all other sources of information.
816 * If a DNS entry for the servo based on the DUT hostname exists in
817 the CrOS lab network, that hostname is used with the default
Richard Barnette9a26ad62016-06-10 12:03:08 -0700818 port and the DUT's board.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700819 * If no other options are found, the parameters will be taken
Richard Barnette9a26ad62016-06-10 12:03:08 -0700820 from the `servo_args` dict passed in from the caller.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700821
822 @param dut An instance of `Host` from which to take
823 servo parameters (if available).
824 @param servo_args A dictionary with servo parameters to use if
825 they can't be found from `dut`. If this
826 argument is supplied, unrepaired exceptions
827 from `verify()` will be passed back to the
828 caller.
829 @param try_lab_servo If not true, servo host creation will be
830 skipped unless otherwise required by the
831 caller.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700832 @param try_servo_repair If true, check a servo host with
833 `repair()` instead of `verify()`.
Dan Shi4d478522014-02-14 13:46:32 -0800834
835 @returns: A ServoHost object or None. See comments above.
836
837 """
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700838 servo_dependency = servo_args is not None
Richard Barnetteea3e4602016-06-10 12:36:41 -0700839 is_in_lab = False
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700840 if dut is not None and (try_lab_servo or servo_dependency):
Richard Barnetteea3e4602016-06-10 12:36:41 -0700841 servo_args_override, is_in_lab = _get_standard_servo_args(dut)
842 if servo_args_override is not None:
843 servo_args = servo_args_override
844 if servo_args is None:
845 return None
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700846 if (not servo_dependency and not try_servo_repair and
Richard Barnette9a26ad62016-06-10 12:03:08 -0700847 not servo_host_is_up(servo_args[SERVO_HOST_ATTR])):
Dan Shibbb0cb62014-03-24 17:50:57 -0700848 return None
Richard Barnette9a26ad62016-06-10 12:03:08 -0700849 newhost = ServoHost(is_in_lab=is_in_lab, **servo_args)
850 # Note that the logic of repair() includes everything done
851 # by verify(). It's sufficient to call one or the other;
852 # we don't need both.
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700853 if servo_dependency:
854 newhost.repair(silent=True)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700855 else:
856 try:
857 if try_servo_repair:
858 newhost.repair()
859 else:
860 newhost.verify()
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700861 except Exception:
Richard Barnette9a26ad62016-06-10 12:03:08 -0700862 operation = 'repair' if try_servo_repair else 'verification'
863 logging.exception('Servo %s failed for %s',
864 operation, newhost.hostname)
865 return newhost