blob: d9e880d932c57bf4229bb73ab0b7709b46d08693 [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
12import httplib
13import logging
14import socket
Fang Deng5d518f42013-08-02 14:04:32 -070015import xmlrpclib
Raul E Rangel52ca2e82018-07-03 14:10:14 -060016import os
Fang Deng5d518f42013-08-02 14:04:32 -070017
18from autotest_lib.client.bin import utils
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070019from autotest_lib.client.common_lib import control_data
Fang Deng5d518f42013-08-02 14:04:32 -070020from autotest_lib.client.common_lib import error
beeps5e8c45a2013-12-17 22:05:11 -080021from autotest_lib.client.common_lib import global_config
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070022from autotest_lib.client.common_lib import host_states
Richard Barnette9a26ad62016-06-10 12:03:08 -070023from autotest_lib.client.common_lib import hosts
Dan Shi0942b1d2015-03-31 11:07:00 -070024from autotest_lib.client.common_lib import lsbrelease_utils
beeps5e8c45a2013-12-17 22:05:11 -080025from autotest_lib.client.common_lib.cros import dev_server
Fang Deng5d518f42013-08-02 14:04:32 -070026from autotest_lib.client.common_lib.cros import retry
Christopher Wileycef1f902014-06-19 11:11:23 -070027from autotest_lib.client.common_lib.cros.network import ping_runner
Hsinyu Chaoe0b08e62015-08-11 10:50:37 +000028from autotest_lib.client.cros import constants as client_constants
Richard Barnettee519dcd2016-08-15 17:37:17 -070029from autotest_lib.server import afe_utils
Prathmesh Prabhucbd5ebb2018-08-28 17:04:50 -070030from autotest_lib.server import site_utils as server_utils
Richard Barnetted31580e2018-05-14 19:58:00 +000031from autotest_lib.server.cros import autoupdater
Cheng-Yi Chiang22612862015-08-20 20:39:57 +080032from autotest_lib.server.cros import dnsname_mangler
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070033from autotest_lib.server.cros.dynamic_suite import control_file_getter
Richard Barnetted31580e2018-05-14 19:58:00 +000034from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Richard Barnette9a26ad62016-06-10 12:03:08 -070035from autotest_lib.server.cros.servo import servo
Richard Barnetted31580e2018-05-14 19:58:00 +000036from autotest_lib.server.hosts import servo_repair
Fang Deng5d518f42013-08-02 14:04:32 -070037from autotest_lib.server.hosts import ssh_host
Fang Dengd4fe7392013-09-20 12:18:21 -070038from autotest_lib.site_utils.rpm_control_system import rpm_client
Fang Deng5d518f42013-08-02 14:04:32 -070039
Dan Shi5e2efb72017-02-07 11:40:23 -080040try:
41 from chromite.lib import metrics
42except ImportError:
43 metrics = utils.metrics_mock
44
Fang Deng5d518f42013-08-02 14:04:32 -070045
Simran Basi0739d682015-02-25 16:22:56 -080046# Names of the host attributes in the database that represent the values for
47# the servo_host and servo_port for a servo connected to the DUT.
48SERVO_HOST_ATTR = 'servo_host'
49SERVO_PORT_ATTR = 'servo_port'
Richard Barnettee519dcd2016-08-15 17:37:17 -070050SERVO_BOARD_ATTR = 'servo_board'
Nick Sanders2f3c9852018-10-24 12:10:24 -070051# Model is inferred from host labels.
52SERVO_MODEL_ATTR = 'servo_model'
Kevin Cheng643ce8a2016-09-15 15:42:12 -070053SERVO_SERIAL_ATTR = 'servo_serial'
Prathmesh Prabhucba44292018-08-28 17:44:45 -070054SERVO_ATTR_KEYS = (
55 SERVO_BOARD_ATTR,
56 SERVO_HOST_ATTR,
57 SERVO_PORT_ATTR,
58 SERVO_SERIAL_ATTR,
59)
Simran Basi0739d682015-02-25 16:22:56 -080060
Dan Shi3b2adf62015-09-02 17:46:54 -070061_CONFIG = global_config.global_config
xixuan6cf6d2f2016-01-29 15:29:00 -080062ENABLE_SSH_TUNNEL_FOR_SERVO = _CONFIG.get_config_value(
63 'CROS', 'enable_ssh_tunnel_for_servo', type=bool, default=False)
Simran Basi0739d682015-02-25 16:22:56 -080064
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070065AUTOTEST_BASE = _CONFIG.get_config_value(
66 'SCHEDULER', 'drone_installation_directory',
67 default='/usr/local/autotest')
68
69_SERVO_HOST_REBOOT_TEST_NAME = 'servohost_Reboot'
Kevin Cheng55265902016-10-19 12:46:50 -070070_SERVO_HOST_FORCE_REBOOT_TEST_NAME = 'servohost_Reboot.force_reboot'
Fang Deng5d518f42013-08-02 14:04:32 -070071
Fang Deng5d518f42013-08-02 14:04:32 -070072class ServoHost(ssh_host.SSHHost):
73 """Host class for a host that controls a servo, e.g. beaglebone."""
74
Raul E Rangel52ca2e82018-07-03 14:10:14 -060075 DEFAULT_PORT = int(os.getenv('SERVOD_PORT', '9999'))
Richard Barnette9a26ad62016-06-10 12:03:08 -070076
Dan Shie5b3c512014-08-21 12:12:09 -070077 # Timeout for initializing servo signals.
Wai-Hong Tam37b6ed32017-09-19 15:52:39 -070078 INITIALIZE_SERVO_TIMEOUT_SECS = 60
Richard Barnette9a26ad62016-06-10 12:03:08 -070079
xixuan6cf6d2f2016-01-29 15:29:00 -080080 # Ready test function
81 SERVO_READY_METHOD = 'get_version'
Fang Deng5d518f42013-08-02 14:04:32 -070082
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070083 REBOOT_CMD = 'sleep 1; reboot & sleep 10; reboot -f'
84
Fang Deng5d518f42013-08-02 14:04:32 -070085
Richard Barnette17bfc6c2016-08-04 18:41:43 -070086 def _initialize(self, servo_host='localhost',
Richard Barnettee519dcd2016-08-15 17:37:17 -070087 servo_port=DEFAULT_PORT, servo_board=None,
Nick Sanders2f3c9852018-10-24 12:10:24 -070088 servo_model=None, servo_serial=None, is_in_lab=None,
89 *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -070090 """Initialize a ServoHost instance.
91
92 A ServoHost instance represents a host that controls a servo.
93
94 @param servo_host: Name of the host where the servod process
95 is running.
Raul E Rangel52ca2e82018-07-03 14:10:14 -060096 @param servo_port: Port the servod process is listening on. Defaults
97 to the SERVOD_PORT environment variable if set,
98 otherwise 9999.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070099 @param servo_board: Board that the servo is connected to.
Nick Sanders2f3c9852018-10-24 12:10:24 -0700100 @param servo_model: Model that the servo is connected to.
Dan Shi4d478522014-02-14 13:46:32 -0800101 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
102 to None, for which utils.host_is_in_lab_zone will be
103 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700104
105 """
106 super(ServoHost, self)._initialize(hostname=servo_host,
107 *args, **dargs)
Richard Barnette42f4db92018-08-23 15:05:15 -0700108 self.servo_port = int(servo_port)
Richard Barnettee519dcd2016-08-15 17:37:17 -0700109 self.servo_board = servo_board
Nick Sanders2f3c9852018-10-24 12:10:24 -0700110 self.servo_model = servo_model
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700111 self.servo_serial = servo_serial
Richard Barnettee519dcd2016-08-15 17:37:17 -0700112 self._servo = None
Richard Barnette9a26ad62016-06-10 12:03:08 -0700113 self._repair_strategy = (
114 servo_repair.create_servo_repair_strategy())
Richard Barnettee519dcd2016-08-15 17:37:17 -0700115 self._is_localhost = (self.hostname == 'localhost')
116 if self._is_localhost:
117 self._is_in_lab = False
118 elif is_in_lab is None:
Dan Shi4d478522014-02-14 13:46:32 -0800119 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
120 else:
121 self._is_in_lab = is_in_lab
xixuan6cf6d2f2016-01-29 15:29:00 -0800122
Richard Barnettee519dcd2016-08-15 17:37:17 -0700123 # Commands on the servo host must be run by the superuser.
124 # Our account on a remote host is root, but if our target is
125 # localhost then we might be running unprivileged. If so,
126 # `sudo` will have to be added to the commands.
Fang Deng5d518f42013-08-02 14:04:32 -0700127 if self._is_localhost:
128 self._sudo_required = utils.system_output('id -u') != '0'
129 else:
130 self._sudo_required = False
Richard Barnettee519dcd2016-08-15 17:37:17 -0700131
Richard Barnette9a26ad62016-06-10 12:03:08 -0700132
133 def connect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700134 """Establish a connection to the servod server on this host.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700135
136 Initializes `self._servo` and then verifies that all network
137 connections are working. This will create an ssh tunnel if
138 it's required.
139
140 As a side effect of testing the connection, all signals on the
141 target servo are reset to default values, and the USB stick is
142 set to the neutral (off) position.
143 """
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700144 servo_obj = servo.Servo(servo_host=self, servo_serial=self.servo_serial)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700145 timeout, _ = retry.timeout(
146 servo_obj.initialize_dut,
147 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
148 if timeout:
149 raise hosts.AutoservVerifyError(
150 'Servo initialize timed out.')
151 self._servo = servo_obj
152
153
154 def disconnect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700155 """Disconnect our servo if it exists.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700156
157 If we've previously successfully connected to our servo,
158 disconnect any established ssh tunnel, and set `self._servo`
159 back to `None`.
160 """
161 if self._servo:
162 # N.B. This call is safe even without a tunnel:
163 # rpc_server_tracker.disconnect() silently ignores
164 # unknown ports.
165 self.rpc_server_tracker.disconnect(self.servo_port)
166 self._servo = None
Fang Deng5d518f42013-08-02 14:04:32 -0700167
168
169 def is_in_lab(self):
170 """Check whether the servo host is a lab device.
171
172 @returns: True if the servo host is in Cros Lab, otherwise False.
173
174 """
175 return self._is_in_lab
176
177
178 def is_localhost(self):
179 """Checks whether the servo host points to localhost.
180
181 @returns: True if it points to localhost, otherwise False.
182
183 """
184 return self._is_localhost
185
186
187 def get_servod_server_proxy(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700188 """Return a proxy that can be used to communicate with servod server.
Fang Deng5d518f42013-08-02 14:04:32 -0700189
190 @returns: An xmlrpclib.ServerProxy that is connected to the servod
191 server on the host.
Fang Deng5d518f42013-08-02 14:04:32 -0700192 """
Richard Barnette9a26ad62016-06-10 12:03:08 -0700193 if ENABLE_SSH_TUNNEL_FOR_SERVO and not self.is_localhost():
194 return self.rpc_server_tracker.xmlrpc_connect(
195 None, self.servo_port,
196 ready_test_name=self.SERVO_READY_METHOD,
197 timeout_seconds=60)
198 else:
199 remote = 'http://%s:%s' % (self.hostname, self.servo_port)
200 return xmlrpclib.ServerProxy(remote)
Fang Deng5d518f42013-08-02 14:04:32 -0700201
202
Richard Barnette9a26ad62016-06-10 12:03:08 -0700203 def is_cros_host(self):
beeps5e8c45a2013-12-17 22:05:11 -0800204 """Check if a servo host is running chromeos.
205
206 @return: True if the servo host is running chromeos.
207 False if it isn't, or we don't have enough information.
208 """
209 try:
210 result = self.run('grep -q CHROMEOS /etc/lsb-release',
211 ignore_status=True, timeout=10)
212 except (error.AutoservRunError, error.AutoservSSHTimeout):
213 return False
214 return result.exit_status == 0
215
216
Fang Deng5d518f42013-08-02 14:04:32 -0700217 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
Dean Liaoe3e75f62017-11-14 10:36:43 +0800218 connect_timeout=None, alive_interval=None,
219 alive_count_max=None, connection_attempts=None):
Fang Deng5d518f42013-08-02 14:04:32 -0700220 """Override default make_ssh_command to use tuned options.
221
222 Tuning changes:
223 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
224 connection failure. Consistency with remote_access.py.
225
226 - ServerAliveInterval=180; which causes SSH to ping connection every
227 180 seconds. In conjunction with ServerAliveCountMax ensures
228 that if the connection dies, Autotest will bail out quickly.
229
230 - ServerAliveCountMax=3; consistency with remote_access.py.
231
232 - ConnectAttempts=4; reduce flakiness in connection errors;
233 consistency with remote_access.py.
234
235 - UserKnownHostsFile=/dev/null; we don't care about the keys.
236
237 - SSH protocol forced to 2; needed for ServerAliveInterval.
238
239 @param user User name to use for the ssh connection.
240 @param port Port on the target host to use for ssh connection.
241 @param opts Additional options to the ssh command.
242 @param hosts_file Ignored.
243 @param connect_timeout Ignored.
244 @param alive_interval Ignored.
Dean Liaoe3e75f62017-11-14 10:36:43 +0800245 @param alive_count_max Ignored.
246 @param connection_attempts Ignored.
Fang Deng5d518f42013-08-02 14:04:32 -0700247
248 @returns: An ssh command with the requested settings.
249
250 """
Dean Liaoe3e75f62017-11-14 10:36:43 +0800251 options = ' '.join([opts, '-o Protocol=2'])
252 return super(ServoHost, self).make_ssh_command(
253 user=user, port=port, opts=options, hosts_file='/dev/null',
254 connect_timeout=30, alive_interval=180, alive_count_max=3,
255 connection_attempts=4)
Fang Deng5d518f42013-08-02 14:04:32 -0700256
257
258 def _make_scp_cmd(self, sources, dest):
259 """Format scp command.
260
261 Given a list of source paths and a destination path, produces the
262 appropriate scp command for encoding it. Remote paths must be
263 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
264 to allow additional ssh options.
265
266 @param sources: A list of source paths to copy from.
267 @param dest: Destination path to copy to.
268
269 @returns: An scp command that copies |sources| on local machine to
270 |dest| on the remote servo host.
271
272 """
273 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
274 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
275 return command % (self.master_ssh_option,
276 self.port, ' '.join(sources), dest)
277
278
279 def run(self, command, timeout=3600, ignore_status=False,
280 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
Luigi Semenzatobfbd1f32017-01-06 10:41:18 -0800281 connect_timeout=30, ssh_failure_retry_ok=False,
282 options='', stdin=None, verbose=True, args=()):
Fang Deng5d518f42013-08-02 14:04:32 -0700283 """Run a command on the servo host.
284
285 Extends method `run` in SSHHost. If the servo host is a remote device,
286 it will call `run` in SSHost without changing anything.
287 If the servo host is 'localhost', it will call utils.system_output.
288
289 @param command: The command line string.
290 @param timeout: Time limit in seconds before attempting to
291 kill the running process. The run() function
292 will take a few seconds longer than 'timeout'
293 to complete if it has to kill the process.
294 @param ignore_status: Do not raise an exception, no matter
295 what the exit code of the command is.
296 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
297 @param connect_timeout: SSH connection timeout (in seconds)
298 Ignored if host is 'localhost'.
299 @param options: String with additional ssh command options
300 Ignored if host is 'localhost'.
Luigi Semenzatobfbd1f32017-01-06 10:41:18 -0800301 @param ssh_failure_retry_ok: when True and ssh connection failure is
302 suspected, OK to retry command (but not
303 compulsory, and likely not needed here)
Fang Deng5d518f42013-08-02 14:04:32 -0700304 @param stdin: Stdin to pass (a string) to the executed command.
305 @param verbose: Log the commands.
306 @param args: Sequence of strings to pass as arguments to command by
307 quoting them in " and escaping their contents if necessary.
308
309 @returns: A utils.CmdResult object.
310
311 @raises AutoservRunError if the command failed.
312 @raises AutoservSSHTimeout SSH connection has timed out. Only applies
313 when servo host is not 'localhost'.
314
315 """
316 run_args = {'command': command, 'timeout': timeout,
317 'ignore_status': ignore_status, 'stdout_tee': stdout_tee,
318 'stderr_tee': stderr_tee, 'stdin': stdin,
319 'verbose': verbose, 'args': args}
320 if self.is_localhost():
321 if self._sudo_required:
Michael Tangf9b3ada2016-11-18 16:01:05 -0800322 run_args['command'] = 'sudo -n sh -c "%s"' % utils.sh_escape(
323 command)
Fang Deng5d518f42013-08-02 14:04:32 -0700324 try:
325 return utils.run(**run_args)
326 except error.CmdError as e:
327 logging.error(e)
328 raise error.AutoservRunError('command execution error',
329 e.result_obj)
330 else:
331 run_args['connect_timeout'] = connect_timeout
332 run_args['options'] = options
333 return super(ServoHost, self).run(**run_args)
334
335
Richard Barnette9a26ad62016-06-10 12:03:08 -0700336 def _get_release_version(self):
Dan Shi0942b1d2015-03-31 11:07:00 -0700337 """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release.
338
339 @returns The version string in lsb-release, under attribute
340 CHROMEOS_RELEASE_VERSION.
341 """
342 lsb_release_content = self.run(
343 'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip()
344 return lsbrelease_utils.get_chromeos_release_version(
345 lsb_release_content=lsb_release_content)
346
347
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700348 def get_attached_duts(self, afe):
349 """Gather a list of duts that use this servo host.
350
351 @param afe: afe instance.
352
353 @returns list of duts.
Richard Barnette3a7697f2016-04-20 11:33:27 -0700354 """
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700355 return afe.get_hosts_by_attribute(
356 attribute=SERVO_HOST_ATTR, value=self.hostname)
357
358
359 def get_board(self):
360 """Determine the board for this servo host.
361
362 @returns a string representing this servo host's board.
363 """
364 return lsbrelease_utils.get_current_board(
365 lsb_release_content=self.run('cat /etc/lsb-release').stdout)
366
367
368 def _choose_dut_for_synchronized_reboot(self, dut_list, afe):
369 """Choose which dut to schedule servo host reboot job.
370
371 We'll want a semi-deterministic way of selecting which host should be
372 scheduled for the servo host reboot job. For now we'll sort the
373 list with the expectation the dut list will stay consistent.
374 From there we'll grab the first dut that is available so we
375 don't schedule a job on a dut that will never run.
376
377 @param dut_list: List of the dut hostnames to choose from.
378 @param afe: Instance of the AFE.
379
380 @return hostname of dut to schedule job on.
381 """
382 afe_hosts = afe.get_hosts(dut_list)
383 afe_hosts.sort()
384 for afe_host in afe_hosts:
385 if afe_host.status not in host_states.UNAVAILABLE_STATES:
386 return afe_host.hostname
387 # If they're all unavailable, just return the first sorted dut.
388 dut_list.sort()
389 return dut_list[0]
390
391
392 def _sync_job_scheduled_for_duts(self, dut_list, afe):
393 """Checks if a synchronized reboot has been scheduled for these duts.
394
395 Grab all the host queue entries that aren't completed for the duts and
396 see if any of them have the expected job name.
397
398 @param dut_list: List of duts to check on.
399 @param afe: Instance of the AFE.
400
401 @returns True if the job is scheduled, False otherwise.
402 """
403 afe_hosts = afe.get_hosts(dut_list)
404 for afe_host in afe_hosts:
405 hqes = afe.get_host_queue_entries(host=afe_host.id, complete=0)
406 for hqe in hqes:
407 job = afe.get_jobs(id=hqe.job.id)
Kevin Cheng55265902016-10-19 12:46:50 -0700408 if job and job[0].name in (_SERVO_HOST_REBOOT_TEST_NAME,
409 _SERVO_HOST_FORCE_REBOOT_TEST_NAME):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700410 return True
411 return False
412
413
Kevin Cheng55265902016-10-19 12:46:50 -0700414 def schedule_synchronized_reboot(self, dut_list, afe, force_reboot=False):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700415 """Schedule a job to reboot the servo host.
416
417 When we schedule a job, it will create a ServoHost object which will
418 go through this entire flow of checking if a reboot is needed and
419 trying to schedule it. There is probably a better approach to setting
420 up a synchronized reboot but I'm coming up short on better ideas so I
421 apologize for this circus show.
422
Kevin Cheng55265902016-10-19 12:46:50 -0700423 @param dut_list: List of duts that need to be locked.
424 @param afe: Instance of afe.
425 @param force_reboot: Boolean to indicate if a forced reboot should be
426 scheduled or not.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700427 """
428 # If we've already scheduled job on a dut, we're done here.
429 if self._sync_job_scheduled_for_duts(dut_list, afe):
430 return
431
432 # Looks like we haven't scheduled a job yet.
Kevin Cheng55265902016-10-19 12:46:50 -0700433 test = (_SERVO_HOST_REBOOT_TEST_NAME if not force_reboot
434 else _SERVO_HOST_FORCE_REBOOT_TEST_NAME)
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700435 dut = self._choose_dut_for_synchronized_reboot(dut_list, afe)
436 getter = control_file_getter.FileSystemGetter([AUTOTEST_BASE])
Kevin Cheng55265902016-10-19 12:46:50 -0700437 control_file = getter.get_control_file_contents_by_name(test)
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700438 control_type = control_data.CONTROL_TYPE_NAMES.SERVER
Kevin Cheng79589982016-10-25 13:26:04 -0700439 try:
440 afe.create_job(control_file=control_file, name=test,
441 control_type=control_type, hosts=[dut])
442 except Exception as e:
443 # Sometimes creating the job will raise an exception. We'll log it
444 # but we don't want to fail because of it.
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700445 logging.exception('Scheduling reboot job failed due to Exception.')
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700446
447
448 def reboot(self, *args, **dargs):
449 """Reboot using special servo host reboot command."""
450 super(ServoHost, self).reboot(reboot_cmd=self.REBOOT_CMD,
451 *args, **dargs)
452
453
454 def _check_for_reboot(self, updater):
455 """Reboot this servo host if an upgrade is waiting.
Richard Barnette3a7697f2016-04-20 11:33:27 -0700456
457 If the host has successfully downloaded and finalized a new
458 build, reboot.
459
460 @param updater: a ChromiumOSUpdater instance for checking
461 whether reboot is needed.
462 @return Return a (status, build) tuple reflecting the
463 update_engine status and current build of the host
464 at the end of the call.
465 """
Richard Barnette9a26ad62016-06-10 12:03:08 -0700466 current_build_number = self._get_release_version()
Richard Barnette3a7697f2016-04-20 11:33:27 -0700467 status = updater.check_update_status()
468 if status == autoupdater.UPDATER_NEED_REBOOT:
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700469 # Check if we need to schedule an organized reboot.
Kevin Cheng79589982016-10-25 13:26:04 -0700470 afe = frontend_wrappers.RetryingAFE(
471 timeout_min=5, delay_sec=10,
Prathmesh Prabhucbd5ebb2018-08-28 17:04:50 -0700472 server=server_utils.get_global_afe_hostname())
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700473 dut_list = self.get_attached_duts(afe)
474 logging.info('servo host has the following duts: %s', dut_list)
475 if len(dut_list) > 1:
476 logging.info('servo host has multiple duts, scheduling '
477 'synchronized reboot')
478 self.schedule_synchronized_reboot(dut_list, afe)
479 return status, current_build_number
480
481 logging.info('Rebooting servo host %s from build %s',
Richard Barnette3a7697f2016-04-20 11:33:27 -0700482 self.hostname, current_build_number)
483 # Tell the reboot() call not to wait for completion.
484 # Otherwise, the call will log reboot failure if servo does
485 # not come back. The logged reboot failure will lead to
486 # test job failure. If the test does not require servo, we
487 # don't want servo failure to fail the test with error:
488 # `Host did not return from reboot` in status.log.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700489 self.reboot(fastsync=True, wait=False)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700490
491 # We told the reboot() call not to wait, but we need to wait
492 # for the reboot before we continue. Alas. The code from
493 # here below is basically a copy of Host.wait_for_restart(),
494 # with the logging bits ripped out, so that they can't cause
495 # the failure logging problem described above.
496 #
497 # The black stain that this has left on my soul can never be
498 # erased.
499 old_boot_id = self.get_boot_id()
500 if not self.wait_down(timeout=self.WAIT_DOWN_REBOOT_TIMEOUT,
501 warning_timer=self.WAIT_DOWN_REBOOT_WARNING,
502 old_boot_id=old_boot_id):
503 raise error.AutoservHostError(
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700504 'servo host %s failed to shut down.' %
505 self.hostname)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700506 if self.wait_up(timeout=120):
Richard Barnette9a26ad62016-06-10 12:03:08 -0700507 current_build_number = self._get_release_version()
Richard Barnette3a7697f2016-04-20 11:33:27 -0700508 status = updater.check_update_status()
509 logging.info('servo host %s back from reboot, with build %s',
510 self.hostname, current_build_number)
511 else:
512 raise error.AutoservHostError(
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700513 'servo host %s failed to come back from reboot.' %
514 self.hostname)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700515 return status, current_build_number
516
517
Richard Barnette3a7697f2016-04-20 11:33:27 -0700518 def update_image(self, wait_for_update=False):
beeps5e8c45a2013-12-17 22:05:11 -0800519 """Update the image on the servo host, if needed.
520
J. Richard Barnette84895392015-04-30 12:31:01 -0700521 This method recognizes the following cases:
522 * If the Host is not running Chrome OS, do nothing.
523 * If a previously triggered update is now complete, reboot
524 to the new version.
525 * If the host is processing a previously triggered update,
526 do nothing.
527 * If the host is running a version of Chrome OS different
528 from the default for servo Hosts, trigger an update, but
529 don't wait for it to complete.
beeps5e8c45a2013-12-17 22:05:11 -0800530
Richard Barnette3a7697f2016-04-20 11:33:27 -0700531 @param wait_for_update If an update needs to be applied and
532 this is true, then don't return until the update is
533 downloaded and finalized, and the host rebooted.
beeps5e8c45a2013-12-17 22:05:11 -0800534 @raises dev_server.DevServerException: If all the devservers are down.
535 @raises site_utils.ParseBuildNameException: If the devserver returns
536 an invalid build name.
beeps5e8c45a2013-12-17 22:05:11 -0800537 @raises AutoservRunError: If the update_engine_client isn't present on
538 the host, and the host is a cros_host.
J. Richard Barnette84895392015-04-30 12:31:01 -0700539
beeps5e8c45a2013-12-17 22:05:11 -0800540 """
Dan Shib795b5a2015-09-24 13:26:35 -0700541 # servod could be running in a Ubuntu workstation.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700542 if not self.is_cros_host():
beeps5e8c45a2013-12-17 22:05:11 -0800543 logging.info('Not attempting an update, either %s is not running '
544 'chromeos or we cannot find enough information about '
545 'the host.', self.hostname)
546 return
547
Dan Shib795b5a2015-09-24 13:26:35 -0700548 if lsbrelease_utils.is_moblab():
549 logging.info('Not attempting an update, %s is running moblab.',
550 self.hostname)
551 return
552
Richard Barnette383ef9c2016-12-13 11:56:49 -0800553 target_build = afe_utils.get_stable_cros_image_name(self.get_board())
Prathmesh Prabhucbd5ebb2018-08-28 17:04:50 -0700554 target_build_number = server_utils.ParseBuildName(
J. Richard Barnette84895392015-04-30 12:31:01 -0700555 target_build)[3]
xixuanfa2d92a2016-12-09 09:45:27 -0800556 # For servo image staging, we want it as more widely distributed as
557 # possible, so that devservers' load can be evenly distributed. So use
558 # hostname instead of target_build as hash.
559 ds = dev_server.ImageServer.resolve(self.hostname,
560 hostname=self.hostname)
J. Richard Barnette84895392015-04-30 12:31:01 -0700561 url = ds.get_update_url(target_build)
beeps5e8c45a2013-12-17 22:05:11 -0800562
563 updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700564 status, current_build_number = self._check_for_reboot(updater)
565 update_pending = True
beeps5e8c45a2013-12-17 22:05:11 -0800566 if status in autoupdater.UPDATER_PROCESSING_UPDATE:
567 logging.info('servo host %s already processing an update, update '
568 'engine client status=%s', self.hostname, status)
Allen Li66aa2542017-06-26 15:26:27 -0700569 elif status == autoupdater.UPDATER_NEED_REBOOT:
570 return
J. Richard Barnette84895392015-04-30 12:31:01 -0700571 elif current_build_number != target_build_number:
beeps5e8c45a2013-12-17 22:05:11 -0800572 logging.info('Using devserver url: %s to trigger update on '
573 'servo host %s, from %s to %s', url, self.hostname,
J. Richard Barnette84895392015-04-30 12:31:01 -0700574 current_build_number, target_build_number)
beeps5e8c45a2013-12-17 22:05:11 -0800575 try:
J. Richard Barnette84895392015-04-30 12:31:01 -0700576 ds.stage_artifacts(target_build,
577 artifacts=['full_payload'])
578 except Exception as e:
579 logging.error('Staging artifacts failed: %s', str(e))
580 logging.error('Abandoning update for this cycle.')
beeps5e8c45a2013-12-17 22:05:11 -0800581 else:
J. Richard Barnette84895392015-04-30 12:31:01 -0700582 try:
583 updater.trigger_update()
584 except autoupdater.RootFSUpdateError as e:
585 trigger_download_status = 'failed with %s' % str(e)
Aviv Keshet11836322016-11-22 11:32:01 -0800586 metrics.Counter('chromeos/autotest/servo/'
587 'rootfs_update_failed').increment()
J. Richard Barnette84895392015-04-30 12:31:01 -0700588 else:
589 trigger_download_status = 'passed'
590 logging.info('Triggered download and update %s for %s, '
591 'update engine currently in status %s',
592 trigger_download_status, self.hostname,
593 updater.check_update_status())
beeps5e8c45a2013-12-17 22:05:11 -0800594 else:
595 logging.info('servo host %s does not require an update.',
596 self.hostname)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700597 update_pending = False
598
599 if update_pending and wait_for_update:
600 logging.info('Waiting for servo update to complete.')
601 self.run('update_engine_client --follow', ignore_status=True)
beeps5e8c45a2013-12-17 22:05:11 -0800602
603
Richard Barnette1edbb162016-11-01 11:47:50 -0700604 def verify(self, silent=False):
605 """Update the servo host and verify it's in a good state.
606
607 @param silent If true, suppress logging in `status.log`.
608 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700609 message = 'Beginning verify for servo host %s port %s serial %s'
610 message %= (self.hostname, self.servo_port, self.servo_serial)
611 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700612 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700613 self._repair_strategy.verify(self, silent)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700614 except:
615 self.disconnect_servo()
616 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700617
618
Richard Barnette1edbb162016-11-01 11:47:50 -0700619 def repair(self, silent=False):
620 """Attempt to repair servo host.
621
622 @param silent If true, suppress logging in `status.log`.
623 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700624 message = 'Beginning repair for servo host %s port %s serial %s'
625 message %= (self.hostname, self.servo_port, self.servo_serial)
626 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700627 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700628 self._repair_strategy.repair(self, silent)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700629 except:
630 self.disconnect_servo()
631 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700632
633
Fang Dengd4fe7392013-09-20 12:18:21 -0700634 def has_power(self):
635 """Return whether or not the servo host is powered by PoE."""
636 # TODO(fdeng): See crbug.com/302791
637 # For now, assume all servo hosts in the lab have power.
638 return self.is_in_lab()
639
640
641 def power_cycle(self):
642 """Cycle power to this host via PoE if it is a lab device.
643
Richard Barnette9a26ad62016-06-10 12:03:08 -0700644 @raises AutoservRepairError if it fails to power cycle the
Fang Dengd4fe7392013-09-20 12:18:21 -0700645 servo host.
646
647 """
648 if self.has_power():
649 try:
650 rpm_client.set_power(self.hostname, 'CYCLE')
651 except (socket.error, xmlrpclib.Error,
652 httplib.BadStatusLine,
653 rpm_client.RemotePowerException) as e:
Richard Barnette9a26ad62016-06-10 12:03:08 -0700654 raise hosts.AutoservRepairError(
Fang Dengd4fe7392013-09-20 12:18:21 -0700655 'Power cycling %s failed: %s' % (self.hostname, e))
656 else:
657 logging.info('Skipping power cycling, not a lab device.')
658
659
Dan Shi4d478522014-02-14 13:46:32 -0800660 def get_servo(self):
661 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700662
Dan Shi4d478522014-02-14 13:46:32 -0800663 @return: a servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700664 """
Dan Shi4d478522014-02-14 13:46:32 -0800665 return self._servo
666
667
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700668 def close(self):
669 """Stop UART logging and close the host object."""
670 if self._servo:
Congbin Guo2e5e2a22018-07-27 10:32:48 -0700671 # In some cases when we run as lab-tools, the job object is None.
672 if self.job:
673 self._servo.dump_uart_streams(self.job.resultdir)
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700674 self._servo.close()
675
676 super(ServoHost, self).close()
677
678
Richard Barnetteea3e4602016-06-10 12:36:41 -0700679def make_servo_hostname(dut_hostname):
680 """Given a DUT's hostname, return the hostname of its servo.
681
682 @param dut_hostname: hostname of a DUT.
683
684 @return hostname of the DUT's servo.
685
686 """
687 host_parts = dut_hostname.split('.')
688 host_parts[0] = host_parts[0] + '-servo'
689 return '.'.join(host_parts)
690
691
692def servo_host_is_up(servo_hostname):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700693 """Given a servo host name, return if it's up or not.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700694
695 @param servo_hostname: hostname of the servo host.
696
697 @return True if it's up, False otherwise
698 """
699 # Technically, this duplicates the SSH ping done early in the servo
700 # proxy initialization code. However, this ping ends in a couple
701 # seconds when if fails, rather than the 60 seconds it takes to decide
702 # that an SSH ping has timed out. Specifically, that timeout happens
703 # when our servo DNS name resolves, but there is no host at that IP.
704 logging.info('Pinging servo host at %s', servo_hostname)
705 ping_config = ping_runner.PingConfig(
706 servo_hostname, count=3,
707 ignore_result=True, ignore_status=True)
708 return ping_runner.PingRunner().ping(ping_config).received > 0
709
710
Richard Barnettee519dcd2016-08-15 17:37:17 -0700711def _map_afe_board_to_servo_board(afe_board):
712 """Map a board we get from the AFE to a servo appropriate value.
713
714 Many boards are identical to other boards for servo's purposes.
715 This function makes that mapping.
716
717 @param afe_board string board name received from AFE.
718 @return board we expect servo to have.
719
720 """
721 KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
722 BOARD_MAP = {'gizmo': 'panther'}
723 mapped_board = afe_board
724 if afe_board in BOARD_MAP:
725 mapped_board = BOARD_MAP[afe_board]
726 else:
727 for suffix in KNOWN_SUFFIXES:
728 if afe_board.endswith(suffix):
729 mapped_board = afe_board[0:-len(suffix)]
730 break
731 if mapped_board != afe_board:
732 logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
733 return mapped_board
734
735
Prathmesh Prabhub4810232018-09-07 13:24:08 -0700736def get_servo_args_for_host(dut_host):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700737 """Return servo data associated with a given DUT.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700738
Richard Barnetteea3e4602016-06-10 12:36:41 -0700739 @param dut_host Instance of `Host` on which to find the servo
740 attributes.
Prathmesh Prabhuf605dd32018-08-28 17:09:04 -0700741 @return `servo_args` dict with host and an optional port.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700742 """
Prathmesh Prabhucba44292018-08-28 17:44:45 -0700743 info = dut_host.host_info_store.get()
744 servo_args = {k: v for k, v in info.attributes.iteritems()
745 if k in SERVO_ATTR_KEYS}
Richard Barnetteea3e4602016-06-10 12:36:41 -0700746
Prathmesh Prabhucba44292018-08-28 17:44:45 -0700747 if SERVO_PORT_ATTR in servo_args:
748 try:
749 servo_args[SERVO_PORT_ATTR] = int(servo_args[SERVO_PORT_ATTR])
750 except ValueError:
751 logging.error('servo port is not an int: %s',
752 servo_args[SERVO_PORT_ATTR])
753 # Reset servo_args because we don't want to use an invalid port.
754 servo_args.pop(SERVO_HOST_ATTR, None)
755
756 if info.board:
757 servo_args[SERVO_BOARD_ATTR] = _map_afe_board_to_servo_board(info.board)
Nick Sanders2f3c9852018-10-24 12:10:24 -0700758 if info.model:
759 servo_args[SERVO_MODEL_ATTR] = info.model
Prathmesh Prabhu6f5f6362018-09-05 17:20:31 -0700760 return servo_args if SERVO_HOST_ATTR in servo_args else None
Richard Barnetteea3e4602016-06-10 12:36:41 -0700761
762
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -0700763def _tweak_args_for_ssp_moblab(servo_args):
764 if servo_args[SERVO_HOST_ATTR] in ['localhost', '127.0.0.1']:
765 servo_args[SERVO_HOST_ATTR] = _CONFIG.get_config_value(
766 'SSP', 'host_container_ip', type=str, default=None)
767
768
Dan Shi023aae32016-05-25 11:13:01 -0700769def create_servo_host(dut, servo_args, try_lab_servo=False,
Richard Barnette9a26ad62016-06-10 12:03:08 -0700770 try_servo_repair=False):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700771 """Create a ServoHost object for a given DUT, if appropriate.
Dan Shi4d478522014-02-14 13:46:32 -0800772
Richard Barnette9a26ad62016-06-10 12:03:08 -0700773 This function attempts to create and verify or repair a `ServoHost`
774 object for a servo connected to the given `dut`, subject to various
775 constraints imposed by the parameters:
776 * When the `servo_args` parameter is not `None`, a servo
777 host must be created, and must be checked with `repair()`.
778 * Otherwise, if a servo exists in the lab and `try_lab_servo` is
779 true:
780 * If `try_servo_repair` is true, then create a servo host and
781 check it with `repair()`.
782 * Otherwise, if the servo responds to `ping` then create a
783 servo host and check it with `verify()`.
Fang Denge545abb2014-12-30 18:43:47 -0800784
Richard Barnette9a26ad62016-06-10 12:03:08 -0700785 In cases where `servo_args` was not `None`, repair failure
786 exceptions are passed back to the caller; otherwise, exceptions
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700787 are logged and then discarded. Note that this only happens in cases
788 where we're called from a test (not special task) control file that
789 has an explicit dependency on servo. In that case, we require that
790 repair not write to `status.log`, so as to avoid polluting test
791 results.
792
793 TODO(jrbarnette): The special handling for servo in test control
794 files is a thorn in my flesh; I dearly hope to see it cut out before
795 my retirement.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700796
797 Parameters for a servo host consist of a host name, port number, and
798 DUT board, and are determined from one of these sources, in order of
799 priority:
Richard Barnetteea3e4602016-06-10 12:36:41 -0700800 * Servo attributes from the `dut` parameter take precedence over
801 all other sources of information.
802 * If a DNS entry for the servo based on the DUT hostname exists in
803 the CrOS lab network, that hostname is used with the default
Richard Barnette9a26ad62016-06-10 12:03:08 -0700804 port and the DUT's board.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700805 * If no other options are found, the parameters will be taken
Richard Barnette9a26ad62016-06-10 12:03:08 -0700806 from the `servo_args` dict passed in from the caller.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700807
808 @param dut An instance of `Host` from which to take
809 servo parameters (if available).
810 @param servo_args A dictionary with servo parameters to use if
811 they can't be found from `dut`. If this
812 argument is supplied, unrepaired exceptions
813 from `verify()` will be passed back to the
814 caller.
815 @param try_lab_servo If not true, servo host creation will be
816 skipped unless otherwise required by the
817 caller.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700818 @param try_servo_repair If true, check a servo host with
819 `repair()` instead of `verify()`.
Dan Shi4d478522014-02-14 13:46:32 -0800820
821 @returns: A ServoHost object or None. See comments above.
822
823 """
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700824 servo_dependency = servo_args is not None
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700825 if dut is not None and (try_lab_servo or servo_dependency):
Prathmesh Prabhub4810232018-09-07 13:24:08 -0700826 servo_args_override = get_servo_args_for_host(dut)
Richard Barnetteea3e4602016-06-10 12:36:41 -0700827 if servo_args_override is not None:
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -0700828 if utils.in_moblab_ssp():
829 _tweak_args_for_ssp_moblab(servo_args_override)
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700830 logging.debug(
831 'Overriding provided servo_args (%s) with arguments'
832 ' determined from the host (%s)',
833 servo_args,
834 servo_args_override,
835 )
Richard Barnetteea3e4602016-06-10 12:36:41 -0700836 servo_args = servo_args_override
Prathmesh Prabhucba44292018-08-28 17:44:45 -0700837
Richard Barnetteea3e4602016-06-10 12:36:41 -0700838 if servo_args is None:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700839 logging.debug('No servo_args provided, and failed to find overrides.')
Richard Barnetteea3e4602016-06-10 12:36:41 -0700840 return None
Prathmesh Prabhucba44292018-08-28 17:44:45 -0700841 if SERVO_HOST_ATTR not in servo_args:
842 logging.debug('%s attribute missing from servo_args: %s',
843 SERVO_HOST_ATTR, servo_args)
844 return None
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700845 if (not servo_dependency and not try_servo_repair and
Richard Barnette9a26ad62016-06-10 12:03:08 -0700846 not servo_host_is_up(servo_args[SERVO_HOST_ATTR])):
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700847 logging.debug('ServoHost is not up.')
Dan Shibbb0cb62014-03-24 17:50:57 -0700848 return None
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700849
Prathmesh Prabhuf605dd32018-08-28 17:09:04 -0700850 newhost = ServoHost(
851 is_in_lab=(servo_args
852 and server_utils.host_in_lab(
853 servo_args[SERVO_HOST_ATTR])),
854 **servo_args
855 )
Richard Barnette9a26ad62016-06-10 12:03:08 -0700856 # Note that the logic of repair() includes everything done
857 # by verify(). It's sufficient to call one or the other;
858 # we don't need both.
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700859 if servo_dependency:
860 newhost.repair(silent=True)
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700861 return newhost
862
863 if try_servo_repair:
864 try:
865 newhost.repair()
866 except Exception:
867 logging.exception('servo repair failed for %s', newhost.hostname)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700868 else:
869 try:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700870 newhost.verify()
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700871 except Exception:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700872 logging.exception('servo verify failed for %s', newhost.hostname)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700873 return newhost