blob: e6461f6137d477979854f6794cc24deae3954dbc [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
12import httplib
13import logging
14import socket
Fang Deng5d518f42013-08-02 14:04:32 -070015import xmlrpclib
16
17from autotest_lib.client.bin import utils
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070018from autotest_lib.client.common_lib import control_data
Fang Deng5d518f42013-08-02 14:04:32 -070019from autotest_lib.client.common_lib import error
beeps5e8c45a2013-12-17 22:05:11 -080020from autotest_lib.client.common_lib import global_config
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070021from autotest_lib.client.common_lib import host_states
Richard Barnette9a26ad62016-06-10 12:03:08 -070022from autotest_lib.client.common_lib import hosts
Dan Shi0942b1d2015-03-31 11:07:00 -070023from autotest_lib.client.common_lib import lsbrelease_utils
beeps5e8c45a2013-12-17 22:05:11 -080024from autotest_lib.client.common_lib.cros import autoupdater
25from autotest_lib.client.common_lib.cros import dev_server
Fang Deng5d518f42013-08-02 14:04:32 -070026from autotest_lib.client.common_lib.cros import retry
Christopher Wileycef1f902014-06-19 11:11:23 -070027from autotest_lib.client.common_lib.cros.network import ping_runner
Hsinyu Chaoe0b08e62015-08-11 10:50:37 +000028from autotest_lib.client.cros import constants as client_constants
Richard Barnettee519dcd2016-08-15 17:37:17 -070029from autotest_lib.server import afe_utils
beeps5e8c45a2013-12-17 22:05:11 -080030from autotest_lib.server import site_utils as server_site_utils
Cheng-Yi Chiang22612862015-08-20 20:39:57 +080031from autotest_lib.server.cros import dnsname_mangler
Simran Basi0739d682015-02-25 16:22:56 -080032from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070033from autotest_lib.server.cros.dynamic_suite import control_file_getter
Richard Barnette9a26ad62016-06-10 12:03:08 -070034from autotest_lib.server.cros.servo import servo
35from autotest_lib.server.hosts import servo_repair
Prathmesh Prabhuc2c6d542018-04-20 14:28:45 -070036from autotest_lib.server.hosts import base_classes
Fang Deng5d518f42013-08-02 14:04:32 -070037from autotest_lib.server.hosts import ssh_host
Fang Dengd4fe7392013-09-20 12:18:21 -070038from autotest_lib.site_utils.rpm_control_system import rpm_client
Fang Deng5d518f42013-08-02 14:04:32 -070039
Dan Shi5e2efb72017-02-07 11:40:23 -080040try:
41 from chromite.lib import metrics
42except ImportError:
43 metrics = utils.metrics_mock
44
Fang Deng5d518f42013-08-02 14:04:32 -070045
Simran Basi0739d682015-02-25 16:22:56 -080046# Names of the host attributes in the database that represent the values for
47# the servo_host and servo_port for a servo connected to the DUT.
48SERVO_HOST_ATTR = 'servo_host'
49SERVO_PORT_ATTR = 'servo_port'
Richard Barnettee519dcd2016-08-15 17:37:17 -070050SERVO_BOARD_ATTR = 'servo_board'
Kevin Cheng643ce8a2016-09-15 15:42:12 -070051SERVO_SERIAL_ATTR = 'servo_serial'
Simran Basi0739d682015-02-25 16:22:56 -080052
Dan Shi3b2adf62015-09-02 17:46:54 -070053_CONFIG = global_config.global_config
xixuan6cf6d2f2016-01-29 15:29:00 -080054ENABLE_SSH_TUNNEL_FOR_SERVO = _CONFIG.get_config_value(
55 'CROS', 'enable_ssh_tunnel_for_servo', type=bool, default=False)
Simran Basi0739d682015-02-25 16:22:56 -080056
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070057AUTOTEST_BASE = _CONFIG.get_config_value(
58 'SCHEDULER', 'drone_installation_directory',
59 default='/usr/local/autotest')
60
61_SERVO_HOST_REBOOT_TEST_NAME = 'servohost_Reboot'
Kevin Cheng55265902016-10-19 12:46:50 -070062_SERVO_HOST_FORCE_REBOOT_TEST_NAME = 'servohost_Reboot.force_reboot'
Fang Deng5d518f42013-08-02 14:04:32 -070063
Fang Deng5d518f42013-08-02 14:04:32 -070064class ServoHost(ssh_host.SSHHost):
65 """Host class for a host that controls a servo, e.g. beaglebone."""
66
Richard Barnette9a26ad62016-06-10 12:03:08 -070067 DEFAULT_PORT = 9999
68
Dan Shie5b3c512014-08-21 12:12:09 -070069 # Timeout for initializing servo signals.
Wai-Hong Tam37b6ed32017-09-19 15:52:39 -070070 INITIALIZE_SERVO_TIMEOUT_SECS = 60
Richard Barnette9a26ad62016-06-10 12:03:08 -070071
xixuan6cf6d2f2016-01-29 15:29:00 -080072 # Ready test function
73 SERVO_READY_METHOD = 'get_version'
Fang Deng5d518f42013-08-02 14:04:32 -070074
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070075 REBOOT_CMD = 'sleep 1; reboot & sleep 10; reboot -f'
76
Fang Deng5d518f42013-08-02 14:04:32 -070077
Richard Barnette17bfc6c2016-08-04 18:41:43 -070078 def _initialize(self, servo_host='localhost',
Richard Barnettee519dcd2016-08-15 17:37:17 -070079 servo_port=DEFAULT_PORT, servo_board=None,
Kevin Cheng643ce8a2016-09-15 15:42:12 -070080 servo_serial=None, is_in_lab=None, *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -070081 """Initialize a ServoHost instance.
82
83 A ServoHost instance represents a host that controls a servo.
84
85 @param servo_host: Name of the host where the servod process
86 is running.
87 @param servo_port: Port the servod process is listening on.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070088 @param servo_board: Board that the servo is connected to.
Dan Shi4d478522014-02-14 13:46:32 -080089 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
90 to None, for which utils.host_is_in_lab_zone will be
91 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -070092
93 """
94 super(ServoHost, self)._initialize(hostname=servo_host,
95 *args, **dargs)
Richard Barnettee519dcd2016-08-15 17:37:17 -070096 self.servo_port = servo_port
97 self.servo_board = servo_board
Kevin Cheng643ce8a2016-09-15 15:42:12 -070098 self.servo_serial = servo_serial
Richard Barnettee519dcd2016-08-15 17:37:17 -070099 self._servo = None
Richard Barnette9a26ad62016-06-10 12:03:08 -0700100 self._repair_strategy = (
101 servo_repair.create_servo_repair_strategy())
Richard Barnettee519dcd2016-08-15 17:37:17 -0700102 self._is_localhost = (self.hostname == 'localhost')
103 if self._is_localhost:
104 self._is_in_lab = False
105 elif is_in_lab is None:
Dan Shi4d478522014-02-14 13:46:32 -0800106 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
107 else:
108 self._is_in_lab = is_in_lab
xixuan6cf6d2f2016-01-29 15:29:00 -0800109
Richard Barnettee519dcd2016-08-15 17:37:17 -0700110 # Commands on the servo host must be run by the superuser.
111 # Our account on a remote host is root, but if our target is
112 # localhost then we might be running unprivileged. If so,
113 # `sudo` will have to be added to the commands.
Fang Deng5d518f42013-08-02 14:04:32 -0700114 if self._is_localhost:
115 self._sudo_required = utils.system_output('id -u') != '0'
116 else:
117 self._sudo_required = False
Richard Barnettee519dcd2016-08-15 17:37:17 -0700118
Richard Barnette9a26ad62016-06-10 12:03:08 -0700119
120 def connect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700121 """Establish a connection to the servod server on this host.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700122
123 Initializes `self._servo` and then verifies that all network
124 connections are working. This will create an ssh tunnel if
125 it's required.
126
127 As a side effect of testing the connection, all signals on the
128 target servo are reset to default values, and the USB stick is
129 set to the neutral (off) position.
130 """
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700131 servo_obj = servo.Servo(servo_host=self, servo_serial=self.servo_serial)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700132 timeout, _ = retry.timeout(
133 servo_obj.initialize_dut,
134 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
135 if timeout:
136 raise hosts.AutoservVerifyError(
137 'Servo initialize timed out.')
138 self._servo = servo_obj
139
140
141 def disconnect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700142 """Disconnect our servo if it exists.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700143
144 If we've previously successfully connected to our servo,
145 disconnect any established ssh tunnel, and set `self._servo`
146 back to `None`.
147 """
148 if self._servo:
149 # N.B. This call is safe even without a tunnel:
150 # rpc_server_tracker.disconnect() silently ignores
151 # unknown ports.
152 self.rpc_server_tracker.disconnect(self.servo_port)
153 self._servo = None
Fang Deng5d518f42013-08-02 14:04:32 -0700154
155
156 def is_in_lab(self):
157 """Check whether the servo host is a lab device.
158
159 @returns: True if the servo host is in Cros Lab, otherwise False.
160
161 """
162 return self._is_in_lab
163
164
165 def is_localhost(self):
166 """Checks whether the servo host points to localhost.
167
168 @returns: True if it points to localhost, otherwise False.
169
170 """
171 return self._is_localhost
172
173
174 def get_servod_server_proxy(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700175 """Return a proxy that can be used to communicate with servod server.
Fang Deng5d518f42013-08-02 14:04:32 -0700176
177 @returns: An xmlrpclib.ServerProxy that is connected to the servod
178 server on the host.
Fang Deng5d518f42013-08-02 14:04:32 -0700179 """
Richard Barnette9a26ad62016-06-10 12:03:08 -0700180 if ENABLE_SSH_TUNNEL_FOR_SERVO and not self.is_localhost():
181 return self.rpc_server_tracker.xmlrpc_connect(
182 None, self.servo_port,
183 ready_test_name=self.SERVO_READY_METHOD,
184 timeout_seconds=60)
185 else:
186 remote = 'http://%s:%s' % (self.hostname, self.servo_port)
187 return xmlrpclib.ServerProxy(remote)
Fang Deng5d518f42013-08-02 14:04:32 -0700188
189
Richard Barnette9a26ad62016-06-10 12:03:08 -0700190 def is_cros_host(self):
beeps5e8c45a2013-12-17 22:05:11 -0800191 """Check if a servo host is running chromeos.
192
193 @return: True if the servo host is running chromeos.
194 False if it isn't, or we don't have enough information.
195 """
196 try:
197 result = self.run('grep -q CHROMEOS /etc/lsb-release',
198 ignore_status=True, timeout=10)
199 except (error.AutoservRunError, error.AutoservSSHTimeout):
200 return False
201 return result.exit_status == 0
202
203
Fang Deng5d518f42013-08-02 14:04:32 -0700204 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
Dean Liaoe3e75f62017-11-14 10:36:43 +0800205 connect_timeout=None, alive_interval=None,
206 alive_count_max=None, connection_attempts=None):
Fang Deng5d518f42013-08-02 14:04:32 -0700207 """Override default make_ssh_command to use tuned options.
208
209 Tuning changes:
210 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
211 connection failure. Consistency with remote_access.py.
212
213 - ServerAliveInterval=180; which causes SSH to ping connection every
214 180 seconds. In conjunction with ServerAliveCountMax ensures
215 that if the connection dies, Autotest will bail out quickly.
216
217 - ServerAliveCountMax=3; consistency with remote_access.py.
218
219 - ConnectAttempts=4; reduce flakiness in connection errors;
220 consistency with remote_access.py.
221
222 - UserKnownHostsFile=/dev/null; we don't care about the keys.
223
224 - SSH protocol forced to 2; needed for ServerAliveInterval.
225
226 @param user User name to use for the ssh connection.
227 @param port Port on the target host to use for ssh connection.
228 @param opts Additional options to the ssh command.
229 @param hosts_file Ignored.
230 @param connect_timeout Ignored.
231 @param alive_interval Ignored.
Dean Liaoe3e75f62017-11-14 10:36:43 +0800232 @param alive_count_max Ignored.
233 @param connection_attempts Ignored.
Fang Deng5d518f42013-08-02 14:04:32 -0700234
235 @returns: An ssh command with the requested settings.
236
237 """
Dean Liaoe3e75f62017-11-14 10:36:43 +0800238 options = ' '.join([opts, '-o Protocol=2'])
239 return super(ServoHost, self).make_ssh_command(
240 user=user, port=port, opts=options, hosts_file='/dev/null',
241 connect_timeout=30, alive_interval=180, alive_count_max=3,
242 connection_attempts=4)
Fang Deng5d518f42013-08-02 14:04:32 -0700243
244
245 def _make_scp_cmd(self, sources, dest):
246 """Format scp command.
247
248 Given a list of source paths and a destination path, produces the
249 appropriate scp command for encoding it. Remote paths must be
250 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
251 to allow additional ssh options.
252
253 @param sources: A list of source paths to copy from.
254 @param dest: Destination path to copy to.
255
256 @returns: An scp command that copies |sources| on local machine to
257 |dest| on the remote servo host.
258
259 """
260 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
261 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
262 return command % (self.master_ssh_option,
263 self.port, ' '.join(sources), dest)
264
265
266 def run(self, command, timeout=3600, ignore_status=False,
267 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
Luigi Semenzatobfbd1f32017-01-06 10:41:18 -0800268 connect_timeout=30, ssh_failure_retry_ok=False,
269 options='', stdin=None, verbose=True, args=()):
Fang Deng5d518f42013-08-02 14:04:32 -0700270 """Run a command on the servo host.
271
272 Extends method `run` in SSHHost. If the servo host is a remote device,
273 it will call `run` in SSHost without changing anything.
274 If the servo host is 'localhost', it will call utils.system_output.
275
276 @param command: The command line string.
277 @param timeout: Time limit in seconds before attempting to
278 kill the running process. The run() function
279 will take a few seconds longer than 'timeout'
280 to complete if it has to kill the process.
281 @param ignore_status: Do not raise an exception, no matter
282 what the exit code of the command is.
283 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
284 @param connect_timeout: SSH connection timeout (in seconds)
285 Ignored if host is 'localhost'.
286 @param options: String with additional ssh command options
287 Ignored if host is 'localhost'.
Luigi Semenzatobfbd1f32017-01-06 10:41:18 -0800288 @param ssh_failure_retry_ok: when True and ssh connection failure is
289 suspected, OK to retry command (but not
290 compulsory, and likely not needed here)
Fang Deng5d518f42013-08-02 14:04:32 -0700291 @param stdin: Stdin to pass (a string) to the executed command.
292 @param verbose: Log the commands.
293 @param args: Sequence of strings to pass as arguments to command by
294 quoting them in " and escaping their contents if necessary.
295
296 @returns: A utils.CmdResult object.
297
298 @raises AutoservRunError if the command failed.
299 @raises AutoservSSHTimeout SSH connection has timed out. Only applies
300 when servo host is not 'localhost'.
301
302 """
303 run_args = {'command': command, 'timeout': timeout,
304 'ignore_status': ignore_status, 'stdout_tee': stdout_tee,
305 'stderr_tee': stderr_tee, 'stdin': stdin,
306 'verbose': verbose, 'args': args}
307 if self.is_localhost():
308 if self._sudo_required:
Michael Tangf9b3ada2016-11-18 16:01:05 -0800309 run_args['command'] = 'sudo -n sh -c "%s"' % utils.sh_escape(
310 command)
Fang Deng5d518f42013-08-02 14:04:32 -0700311 try:
312 return utils.run(**run_args)
313 except error.CmdError as e:
314 logging.error(e)
315 raise error.AutoservRunError('command execution error',
316 e.result_obj)
317 else:
318 run_args['connect_timeout'] = connect_timeout
319 run_args['options'] = options
320 return super(ServoHost, self).run(**run_args)
321
322
Richard Barnette9a26ad62016-06-10 12:03:08 -0700323 def _get_release_version(self):
Dan Shi0942b1d2015-03-31 11:07:00 -0700324 """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release.
325
326 @returns The version string in lsb-release, under attribute
327 CHROMEOS_RELEASE_VERSION.
328 """
329 lsb_release_content = self.run(
330 'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip()
331 return lsbrelease_utils.get_chromeos_release_version(
332 lsb_release_content=lsb_release_content)
333
334
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700335 def get_attached_duts(self, afe):
336 """Gather a list of duts that use this servo host.
337
338 @param afe: afe instance.
339
340 @returns list of duts.
Richard Barnette3a7697f2016-04-20 11:33:27 -0700341 """
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700342 return afe.get_hosts_by_attribute(
343 attribute=SERVO_HOST_ATTR, value=self.hostname)
344
345
346 def get_board(self):
347 """Determine the board for this servo host.
348
349 @returns a string representing this servo host's board.
350 """
351 return lsbrelease_utils.get_current_board(
352 lsb_release_content=self.run('cat /etc/lsb-release').stdout)
353
354
355 def _choose_dut_for_synchronized_reboot(self, dut_list, afe):
356 """Choose which dut to schedule servo host reboot job.
357
358 We'll want a semi-deterministic way of selecting which host should be
359 scheduled for the servo host reboot job. For now we'll sort the
360 list with the expectation the dut list will stay consistent.
361 From there we'll grab the first dut that is available so we
362 don't schedule a job on a dut that will never run.
363
364 @param dut_list: List of the dut hostnames to choose from.
365 @param afe: Instance of the AFE.
366
367 @return hostname of dut to schedule job on.
368 """
369 afe_hosts = afe.get_hosts(dut_list)
370 afe_hosts.sort()
371 for afe_host in afe_hosts:
372 if afe_host.status not in host_states.UNAVAILABLE_STATES:
373 return afe_host.hostname
374 # If they're all unavailable, just return the first sorted dut.
375 dut_list.sort()
376 return dut_list[0]
377
378
379 def _sync_job_scheduled_for_duts(self, dut_list, afe):
380 """Checks if a synchronized reboot has been scheduled for these duts.
381
382 Grab all the host queue entries that aren't completed for the duts and
383 see if any of them have the expected job name.
384
385 @param dut_list: List of duts to check on.
386 @param afe: Instance of the AFE.
387
388 @returns True if the job is scheduled, False otherwise.
389 """
390 afe_hosts = afe.get_hosts(dut_list)
391 for afe_host in afe_hosts:
392 hqes = afe.get_host_queue_entries(host=afe_host.id, complete=0)
393 for hqe in hqes:
394 job = afe.get_jobs(id=hqe.job.id)
Kevin Cheng55265902016-10-19 12:46:50 -0700395 if job and job[0].name in (_SERVO_HOST_REBOOT_TEST_NAME,
396 _SERVO_HOST_FORCE_REBOOT_TEST_NAME):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700397 return True
398 return False
399
400
Kevin Cheng55265902016-10-19 12:46:50 -0700401 def schedule_synchronized_reboot(self, dut_list, afe, force_reboot=False):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700402 """Schedule a job to reboot the servo host.
403
404 When we schedule a job, it will create a ServoHost object which will
405 go through this entire flow of checking if a reboot is needed and
406 trying to schedule it. There is probably a better approach to setting
407 up a synchronized reboot but I'm coming up short on better ideas so I
408 apologize for this circus show.
409
Kevin Cheng55265902016-10-19 12:46:50 -0700410 @param dut_list: List of duts that need to be locked.
411 @param afe: Instance of afe.
412 @param force_reboot: Boolean to indicate if a forced reboot should be
413 scheduled or not.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700414 """
415 # If we've already scheduled job on a dut, we're done here.
416 if self._sync_job_scheduled_for_duts(dut_list, afe):
417 return
418
419 # Looks like we haven't scheduled a job yet.
Kevin Cheng55265902016-10-19 12:46:50 -0700420 test = (_SERVO_HOST_REBOOT_TEST_NAME if not force_reboot
421 else _SERVO_HOST_FORCE_REBOOT_TEST_NAME)
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700422 dut = self._choose_dut_for_synchronized_reboot(dut_list, afe)
423 getter = control_file_getter.FileSystemGetter([AUTOTEST_BASE])
Kevin Cheng55265902016-10-19 12:46:50 -0700424 control_file = getter.get_control_file_contents_by_name(test)
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700425 control_type = control_data.CONTROL_TYPE_NAMES.SERVER
Kevin Cheng79589982016-10-25 13:26:04 -0700426 try:
427 afe.create_job(control_file=control_file, name=test,
428 control_type=control_type, hosts=[dut])
429 except Exception as e:
430 # Sometimes creating the job will raise an exception. We'll log it
431 # but we don't want to fail because of it.
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700432 logging.exception('Scheduling reboot job failed due to Exception.')
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700433
434
435 def reboot(self, *args, **dargs):
436 """Reboot using special servo host reboot command."""
437 super(ServoHost, self).reboot(reboot_cmd=self.REBOOT_CMD,
438 *args, **dargs)
439
440
441 def _check_for_reboot(self, updater):
442 """Reboot this servo host if an upgrade is waiting.
Richard Barnette3a7697f2016-04-20 11:33:27 -0700443
444 If the host has successfully downloaded and finalized a new
445 build, reboot.
446
447 @param updater: a ChromiumOSUpdater instance for checking
448 whether reboot is needed.
449 @return Return a (status, build) tuple reflecting the
450 update_engine status and current build of the host
451 at the end of the call.
452 """
Richard Barnette9a26ad62016-06-10 12:03:08 -0700453 current_build_number = self._get_release_version()
Richard Barnette3a7697f2016-04-20 11:33:27 -0700454 status = updater.check_update_status()
455 if status == autoupdater.UPDATER_NEED_REBOOT:
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700456 # Check if we need to schedule an organized reboot.
Kevin Cheng79589982016-10-25 13:26:04 -0700457 afe = frontend_wrappers.RetryingAFE(
458 timeout_min=5, delay_sec=10,
459 server=server_site_utils.get_global_afe_hostname())
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700460 dut_list = self.get_attached_duts(afe)
461 logging.info('servo host has the following duts: %s', dut_list)
462 if len(dut_list) > 1:
463 logging.info('servo host has multiple duts, scheduling '
464 'synchronized reboot')
465 self.schedule_synchronized_reboot(dut_list, afe)
466 return status, current_build_number
467
468 logging.info('Rebooting servo host %s from build %s',
Richard Barnette3a7697f2016-04-20 11:33:27 -0700469 self.hostname, current_build_number)
470 # Tell the reboot() call not to wait for completion.
471 # Otherwise, the call will log reboot failure if servo does
472 # not come back. The logged reboot failure will lead to
473 # test job failure. If the test does not require servo, we
474 # don't want servo failure to fail the test with error:
475 # `Host did not return from reboot` in status.log.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700476 self.reboot(fastsync=True, wait=False)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700477
478 # We told the reboot() call not to wait, but we need to wait
479 # for the reboot before we continue. Alas. The code from
480 # here below is basically a copy of Host.wait_for_restart(),
481 # with the logging bits ripped out, so that they can't cause
482 # the failure logging problem described above.
483 #
484 # The black stain that this has left on my soul can never be
485 # erased.
486 old_boot_id = self.get_boot_id()
487 if not self.wait_down(timeout=self.WAIT_DOWN_REBOOT_TIMEOUT,
488 warning_timer=self.WAIT_DOWN_REBOOT_WARNING,
489 old_boot_id=old_boot_id):
490 raise error.AutoservHostError(
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700491 'servo host %s failed to shut down.' %
492 self.hostname)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700493 if self.wait_up(timeout=120):
Richard Barnette9a26ad62016-06-10 12:03:08 -0700494 current_build_number = self._get_release_version()
Richard Barnette3a7697f2016-04-20 11:33:27 -0700495 status = updater.check_update_status()
496 logging.info('servo host %s back from reboot, with build %s',
497 self.hostname, current_build_number)
498 else:
499 raise error.AutoservHostError(
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700500 'servo host %s failed to come back from reboot.' %
501 self.hostname)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700502 return status, current_build_number
503
504
Richard Barnette3a7697f2016-04-20 11:33:27 -0700505 def update_image(self, wait_for_update=False):
beeps5e8c45a2013-12-17 22:05:11 -0800506 """Update the image on the servo host, if needed.
507
J. Richard Barnette84895392015-04-30 12:31:01 -0700508 This method recognizes the following cases:
509 * If the Host is not running Chrome OS, do nothing.
510 * If a previously triggered update is now complete, reboot
511 to the new version.
512 * If the host is processing a previously triggered update,
513 do nothing.
514 * If the host is running a version of Chrome OS different
515 from the default for servo Hosts, trigger an update, but
516 don't wait for it to complete.
beeps5e8c45a2013-12-17 22:05:11 -0800517
Richard Barnette3a7697f2016-04-20 11:33:27 -0700518 @param wait_for_update If an update needs to be applied and
519 this is true, then don't return until the update is
520 downloaded and finalized, and the host rebooted.
beeps5e8c45a2013-12-17 22:05:11 -0800521 @raises dev_server.DevServerException: If all the devservers are down.
522 @raises site_utils.ParseBuildNameException: If the devserver returns
523 an invalid build name.
524 @raises autoupdater.ChromiumOSError: If something goes wrong in the
525 checking update engine client status or applying an update.
526 @raises AutoservRunError: If the update_engine_client isn't present on
527 the host, and the host is a cros_host.
J. Richard Barnette84895392015-04-30 12:31:01 -0700528
beeps5e8c45a2013-12-17 22:05:11 -0800529 """
Dan Shib795b5a2015-09-24 13:26:35 -0700530 # servod could be running in a Ubuntu workstation.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700531 if not self.is_cros_host():
beeps5e8c45a2013-12-17 22:05:11 -0800532 logging.info('Not attempting an update, either %s is not running '
533 'chromeos or we cannot find enough information about '
534 'the host.', self.hostname)
535 return
536
Dan Shib795b5a2015-09-24 13:26:35 -0700537 if lsbrelease_utils.is_moblab():
538 logging.info('Not attempting an update, %s is running moblab.',
539 self.hostname)
540 return
541
Richard Barnette383ef9c2016-12-13 11:56:49 -0800542 target_build = afe_utils.get_stable_cros_image_name(self.get_board())
J. Richard Barnette84895392015-04-30 12:31:01 -0700543 target_build_number = server_site_utils.ParseBuildName(
544 target_build)[3]
xixuanfa2d92a2016-12-09 09:45:27 -0800545 # For servo image staging, we want it as more widely distributed as
546 # possible, so that devservers' load can be evenly distributed. So use
547 # hostname instead of target_build as hash.
548 ds = dev_server.ImageServer.resolve(self.hostname,
549 hostname=self.hostname)
J. Richard Barnette84895392015-04-30 12:31:01 -0700550 url = ds.get_update_url(target_build)
beeps5e8c45a2013-12-17 22:05:11 -0800551
552 updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700553 status, current_build_number = self._check_for_reboot(updater)
554 update_pending = True
beeps5e8c45a2013-12-17 22:05:11 -0800555 if status in autoupdater.UPDATER_PROCESSING_UPDATE:
556 logging.info('servo host %s already processing an update, update '
557 'engine client status=%s', self.hostname, status)
Allen Li66aa2542017-06-26 15:26:27 -0700558 elif status == autoupdater.UPDATER_NEED_REBOOT:
559 return
J. Richard Barnette84895392015-04-30 12:31:01 -0700560 elif current_build_number != target_build_number:
beeps5e8c45a2013-12-17 22:05:11 -0800561 logging.info('Using devserver url: %s to trigger update on '
562 'servo host %s, from %s to %s', url, self.hostname,
J. Richard Barnette84895392015-04-30 12:31:01 -0700563 current_build_number, target_build_number)
beeps5e8c45a2013-12-17 22:05:11 -0800564 try:
J. Richard Barnette84895392015-04-30 12:31:01 -0700565 ds.stage_artifacts(target_build,
566 artifacts=['full_payload'])
567 except Exception as e:
568 logging.error('Staging artifacts failed: %s', str(e))
569 logging.error('Abandoning update for this cycle.')
beeps5e8c45a2013-12-17 22:05:11 -0800570 else:
J. Richard Barnette84895392015-04-30 12:31:01 -0700571 try:
Richard Barnette7e53aa02016-05-20 10:49:40 -0700572 # TODO(jrbarnette): This 'touch' is a gross hack
573 # to get us past crbug.com/613603. Once that
574 # bug is resolved, we should remove this code.
575 self.run('touch /home/chronos/.oobe_completed')
J. Richard Barnette84895392015-04-30 12:31:01 -0700576 updater.trigger_update()
577 except autoupdater.RootFSUpdateError as e:
578 trigger_download_status = 'failed with %s' % str(e)
Aviv Keshet11836322016-11-22 11:32:01 -0800579 metrics.Counter('chromeos/autotest/servo/'
580 'rootfs_update_failed').increment()
J. Richard Barnette84895392015-04-30 12:31:01 -0700581 else:
582 trigger_download_status = 'passed'
583 logging.info('Triggered download and update %s for %s, '
584 'update engine currently in status %s',
585 trigger_download_status, self.hostname,
586 updater.check_update_status())
beeps5e8c45a2013-12-17 22:05:11 -0800587 else:
588 logging.info('servo host %s does not require an update.',
589 self.hostname)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700590 update_pending = False
591
592 if update_pending and wait_for_update:
593 logging.info('Waiting for servo update to complete.')
594 self.run('update_engine_client --follow', ignore_status=True)
beeps5e8c45a2013-12-17 22:05:11 -0800595
596
Richard Barnette1edbb162016-11-01 11:47:50 -0700597 def verify(self, silent=False):
598 """Update the servo host and verify it's in a good state.
599
600 @param silent If true, suppress logging in `status.log`.
601 """
Richard Barnette79d78c42016-05-25 09:31:21 -0700602 # TODO(jrbarnette) Old versions of beaglebone_servo include
Richard Barnette9a26ad62016-06-10 12:03:08 -0700603 # the powerd package. If you touch the .oobe_completed file
604 # (as we do to work around an update_engine problem), then
605 # powerd will eventually shut down the beaglebone for lack
606 # of (apparent) activity. Current versions of
Richard Barnette79d78c42016-05-25 09:31:21 -0700607 # beaglebone_servo don't have powerd, but until we can purge
608 # the lab of the old images, we need to make sure powerd
609 # isn't running.
610 self.run('stop powerd', ignore_status=True)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700611 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700612 self._repair_strategy.verify(self, silent)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700613 except:
614 self.disconnect_servo()
615 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700616
617
Richard Barnette1edbb162016-11-01 11:47:50 -0700618 def repair(self, silent=False):
619 """Attempt to repair servo host.
620
621 @param silent If true, suppress logging in `status.log`.
622 """
Richard Barnette9a26ad62016-06-10 12:03:08 -0700623 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700624 self._repair_strategy.repair(self, silent)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700625 except:
626 self.disconnect_servo()
627 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700628
629
Fang Dengd4fe7392013-09-20 12:18:21 -0700630 def has_power(self):
631 """Return whether or not the servo host is powered by PoE."""
632 # TODO(fdeng): See crbug.com/302791
633 # For now, assume all servo hosts in the lab have power.
634 return self.is_in_lab()
635
636
637 def power_cycle(self):
638 """Cycle power to this host via PoE if it is a lab device.
639
Richard Barnette9a26ad62016-06-10 12:03:08 -0700640 @raises AutoservRepairError if it fails to power cycle the
Fang Dengd4fe7392013-09-20 12:18:21 -0700641 servo host.
642
643 """
644 if self.has_power():
645 try:
646 rpm_client.set_power(self.hostname, 'CYCLE')
647 except (socket.error, xmlrpclib.Error,
648 httplib.BadStatusLine,
649 rpm_client.RemotePowerException) as e:
Richard Barnette9a26ad62016-06-10 12:03:08 -0700650 raise hosts.AutoservRepairError(
Fang Dengd4fe7392013-09-20 12:18:21 -0700651 'Power cycling %s failed: %s' % (self.hostname, e))
652 else:
653 logging.info('Skipping power cycling, not a lab device.')
654
655
Dan Shi4d478522014-02-14 13:46:32 -0800656 def get_servo(self):
657 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700658
Dan Shi4d478522014-02-14 13:46:32 -0800659 @return: a servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700660 """
Dan Shi4d478522014-02-14 13:46:32 -0800661 return self._servo
662
663
Richard Barnetteea3e4602016-06-10 12:36:41 -0700664def make_servo_hostname(dut_hostname):
665 """Given a DUT's hostname, return the hostname of its servo.
666
667 @param dut_hostname: hostname of a DUT.
668
669 @return hostname of the DUT's servo.
670
671 """
672 host_parts = dut_hostname.split('.')
673 host_parts[0] = host_parts[0] + '-servo'
674 return '.'.join(host_parts)
675
676
677def servo_host_is_up(servo_hostname):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700678 """Given a servo host name, return if it's up or not.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700679
680 @param servo_hostname: hostname of the servo host.
681
682 @return True if it's up, False otherwise
683 """
684 # Technically, this duplicates the SSH ping done early in the servo
685 # proxy initialization code. However, this ping ends in a couple
686 # seconds when if fails, rather than the 60 seconds it takes to decide
687 # that an SSH ping has timed out. Specifically, that timeout happens
688 # when our servo DNS name resolves, but there is no host at that IP.
689 logging.info('Pinging servo host at %s', servo_hostname)
690 ping_config = ping_runner.PingConfig(
691 servo_hostname, count=3,
692 ignore_result=True, ignore_status=True)
693 return ping_runner.PingRunner().ping(ping_config).received > 0
694
695
Richard Barnettee519dcd2016-08-15 17:37:17 -0700696def _map_afe_board_to_servo_board(afe_board):
697 """Map a board we get from the AFE to a servo appropriate value.
698
699 Many boards are identical to other boards for servo's purposes.
700 This function makes that mapping.
701
702 @param afe_board string board name received from AFE.
703 @return board we expect servo to have.
704
705 """
706 KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
707 BOARD_MAP = {'gizmo': 'panther'}
708 mapped_board = afe_board
709 if afe_board in BOARD_MAP:
710 mapped_board = BOARD_MAP[afe_board]
711 else:
712 for suffix in KNOWN_SUFFIXES:
713 if afe_board.endswith(suffix):
714 mapped_board = afe_board[0:-len(suffix)]
715 break
716 if mapped_board != afe_board:
717 logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
718 return mapped_board
719
720
Richard Barnetteea3e4602016-06-10 12:36:41 -0700721def _get_standard_servo_args(dut_host):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700722 """Return servo data associated with a given DUT.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700723
724 This checks for the presence of servo host and port attached to the
725 given `dut_host`. This data should be stored in the
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700726 `_afe_host.attributes` field in the provided `dut_host` parameter.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700727
728 @param dut_host Instance of `Host` on which to find the servo
729 attributes.
730 @return A tuple of `servo_args` dict with host and an option port,
731 plus an `is_in_lab` flag indicating whether this in the CrOS
732 test lab, or some different environment.
733 """
734 servo_args = None
735 is_in_lab = False
736 is_ssp_moblab = False
737 if utils.is_in_container():
738 is_moblab = _CONFIG.get_config_value(
739 'SSP', 'is_moblab', type=bool, default=False)
740 is_ssp_moblab = is_moblab
741 else:
742 is_moblab = utils.is_moblab()
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700743 attrs = dut_host._afe_host.attributes
Richard Barnetteea3e4602016-06-10 12:36:41 -0700744 if attrs and SERVO_HOST_ATTR in attrs:
745 servo_host = attrs[SERVO_HOST_ATTR]
746 if (is_ssp_moblab and servo_host in ['localhost', '127.0.0.1']):
747 servo_host = _CONFIG.get_config_value(
748 'SSP', 'host_container_ip', type=str, default=None)
749 servo_args = {SERVO_HOST_ATTR: servo_host}
750 if SERVO_PORT_ATTR in attrs:
Kevin Cheng692e5292016-08-14 00:23:24 -0700751 try:
752 servo_port = attrs[SERVO_PORT_ATTR]
753 servo_args[SERVO_PORT_ATTR] = int(servo_port)
754 except ValueError:
755 logging.error('servo port is not an int: %s', servo_port)
756 # Let's set the servo args to None since we're not creating
757 # the ServoHost object with the proper port now.
758 servo_args = None
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700759 if SERVO_SERIAL_ATTR in attrs:
760 servo_args[SERVO_SERIAL_ATTR] = attrs[SERVO_SERIAL_ATTR]
Richard Barnetteea3e4602016-06-10 12:36:41 -0700761 is_in_lab = (not is_moblab
762 and utils.host_is_in_lab_zone(servo_host))
763
764 # TODO(jrbarnette): This test to use the default lab servo hostname
765 # is a legacy that we need only until every host in the DB has
766 # proper attributes.
767 elif (not is_moblab and
768 not dnsname_mangler.is_ip_address(dut_host.hostname)):
769 servo_host = make_servo_hostname(dut_host.hostname)
770 is_in_lab = utils.host_is_in_lab_zone(servo_host)
771 if is_in_lab:
772 servo_args = {SERVO_HOST_ATTR: servo_host}
Richard Barnette9a26ad62016-06-10 12:03:08 -0700773 if servo_args is not None:
Prathmesh Prabhua3bb7652017-02-09 11:42:13 -0800774 info = dut_host.host_info_store.get()
775 if info.board:
776 servo_args[SERVO_BOARD_ATTR] = _map_afe_board_to_servo_board(
777 info.board)
Richard Barnetteea3e4602016-06-10 12:36:41 -0700778 return servo_args, is_in_lab
779
780
Dan Shi023aae32016-05-25 11:13:01 -0700781def create_servo_host(dut, servo_args, try_lab_servo=False,
Richard Barnette9a26ad62016-06-10 12:03:08 -0700782 try_servo_repair=False):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700783 """Create a ServoHost object for a given DUT, if appropriate.
Dan Shi4d478522014-02-14 13:46:32 -0800784
Richard Barnette9a26ad62016-06-10 12:03:08 -0700785 This function attempts to create and verify or repair a `ServoHost`
786 object for a servo connected to the given `dut`, subject to various
787 constraints imposed by the parameters:
788 * When the `servo_args` parameter is not `None`, a servo
789 host must be created, and must be checked with `repair()`.
790 * Otherwise, if a servo exists in the lab and `try_lab_servo` is
791 true:
792 * If `try_servo_repair` is true, then create a servo host and
793 check it with `repair()`.
794 * Otherwise, if the servo responds to `ping` then create a
795 servo host and check it with `verify()`.
Fang Denge545abb2014-12-30 18:43:47 -0800796
Richard Barnette9a26ad62016-06-10 12:03:08 -0700797 In cases where `servo_args` was not `None`, repair failure
798 exceptions are passed back to the caller; otherwise, exceptions
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700799 are logged and then discarded. Note that this only happens in cases
800 where we're called from a test (not special task) control file that
801 has an explicit dependency on servo. In that case, we require that
802 repair not write to `status.log`, so as to avoid polluting test
803 results.
804
805 TODO(jrbarnette): The special handling for servo in test control
806 files is a thorn in my flesh; I dearly hope to see it cut out before
807 my retirement.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700808
809 Parameters for a servo host consist of a host name, port number, and
810 DUT board, and are determined from one of these sources, in order of
811 priority:
Richard Barnetteea3e4602016-06-10 12:36:41 -0700812 * Servo attributes from the `dut` parameter take precedence over
813 all other sources of information.
814 * If a DNS entry for the servo based on the DUT hostname exists in
815 the CrOS lab network, that hostname is used with the default
Richard Barnette9a26ad62016-06-10 12:03:08 -0700816 port and the DUT's board.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700817 * If no other options are found, the parameters will be taken
Richard Barnette9a26ad62016-06-10 12:03:08 -0700818 from the `servo_args` dict passed in from the caller.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700819
820 @param dut An instance of `Host` from which to take
821 servo parameters (if available).
822 @param servo_args A dictionary with servo parameters to use if
823 they can't be found from `dut`. If this
824 argument is supplied, unrepaired exceptions
825 from `verify()` will be passed back to the
826 caller.
827 @param try_lab_servo If not true, servo host creation will be
828 skipped unless otherwise required by the
829 caller.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700830 @param try_servo_repair If true, check a servo host with
831 `repair()` instead of `verify()`.
Dan Shi4d478522014-02-14 13:46:32 -0800832
833 @returns: A ServoHost object or None. See comments above.
834
835 """
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700836 servo_dependency = servo_args is not None
Richard Barnetteea3e4602016-06-10 12:36:41 -0700837 is_in_lab = False
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700838 if dut is not None and (try_lab_servo or servo_dependency):
Richard Barnetteea3e4602016-06-10 12:36:41 -0700839 servo_args_override, is_in_lab = _get_standard_servo_args(dut)
840 if servo_args_override is not None:
841 servo_args = servo_args_override
842 if servo_args is None:
843 return None
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700844 if (not servo_dependency and not try_servo_repair and
Richard Barnette9a26ad62016-06-10 12:03:08 -0700845 not servo_host_is_up(servo_args[SERVO_HOST_ATTR])):
Dan Shibbb0cb62014-03-24 17:50:57 -0700846 return None
Richard Barnette9a26ad62016-06-10 12:03:08 -0700847 newhost = ServoHost(is_in_lab=is_in_lab, **servo_args)
Prathmesh Prabhuc2c6d542018-04-20 14:28:45 -0700848 base_classes.send_creation_metric(newhost)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700849 # Note that the logic of repair() includes everything done
850 # by verify(). It's sufficient to call one or the other;
851 # we don't need both.
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700852 if servo_dependency:
853 newhost.repair(silent=True)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700854 else:
855 try:
856 if try_servo_repair:
857 newhost.repair()
858 else:
859 newhost.verify()
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700860 except Exception:
Richard Barnette9a26ad62016-06-10 12:03:08 -0700861 operation = 'repair' if try_servo_repair else 'verification'
862 logging.exception('Servo %s failed for %s',
863 operation, newhost.hostname)
864 return newhost