blob: c50bb729e4852b6e1d5a049c36f90a2acf989bdb [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
12import httplib
13import logging
14import socket
Fang Deng5d518f42013-08-02 14:04:32 -070015import xmlrpclib
Raul E Rangel52ca2e82018-07-03 14:10:14 -060016import os
Fang Deng5d518f42013-08-02 14:04:32 -070017
18from autotest_lib.client.bin import utils
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070019from autotest_lib.client.common_lib import control_data
Fang Deng5d518f42013-08-02 14:04:32 -070020from autotest_lib.client.common_lib import error
beeps5e8c45a2013-12-17 22:05:11 -080021from autotest_lib.client.common_lib import global_config
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070022from autotest_lib.client.common_lib import host_states
Richard Barnette9a26ad62016-06-10 12:03:08 -070023from autotest_lib.client.common_lib import hosts
Dan Shi0942b1d2015-03-31 11:07:00 -070024from autotest_lib.client.common_lib import lsbrelease_utils
beeps5e8c45a2013-12-17 22:05:11 -080025from autotest_lib.client.common_lib.cros import dev_server
Fang Deng5d518f42013-08-02 14:04:32 -070026from autotest_lib.client.common_lib.cros import retry
Christopher Wileycef1f902014-06-19 11:11:23 -070027from autotest_lib.client.common_lib.cros.network import ping_runner
Hsinyu Chaoe0b08e62015-08-11 10:50:37 +000028from autotest_lib.client.cros import constants as client_constants
Richard Barnettee519dcd2016-08-15 17:37:17 -070029from autotest_lib.server import afe_utils
beeps5e8c45a2013-12-17 22:05:11 -080030from autotest_lib.server import site_utils as server_site_utils
Richard Barnetted31580e2018-05-14 19:58:00 +000031from autotest_lib.server.cros import autoupdater
Cheng-Yi Chiang22612862015-08-20 20:39:57 +080032from autotest_lib.server.cros import dnsname_mangler
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070033from autotest_lib.server.cros.dynamic_suite import control_file_getter
Richard Barnetted31580e2018-05-14 19:58:00 +000034from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Richard Barnette9a26ad62016-06-10 12:03:08 -070035from autotest_lib.server.cros.servo import servo
Prathmesh Prabhuc2c6d542018-04-20 14:28:45 -070036from autotest_lib.server.hosts import base_classes
Richard Barnetted31580e2018-05-14 19:58:00 +000037from autotest_lib.server.hosts import servo_repair
Fang Deng5d518f42013-08-02 14:04:32 -070038from autotest_lib.server.hosts import ssh_host
Fang Dengd4fe7392013-09-20 12:18:21 -070039from autotest_lib.site_utils.rpm_control_system import rpm_client
Fang Deng5d518f42013-08-02 14:04:32 -070040
Dan Shi5e2efb72017-02-07 11:40:23 -080041try:
42 from chromite.lib import metrics
43except ImportError:
44 metrics = utils.metrics_mock
45
Fang Deng5d518f42013-08-02 14:04:32 -070046
Simran Basi0739d682015-02-25 16:22:56 -080047# Names of the host attributes in the database that represent the values for
48# the servo_host and servo_port for a servo connected to the DUT.
49SERVO_HOST_ATTR = 'servo_host'
50SERVO_PORT_ATTR = 'servo_port'
Richard Barnettee519dcd2016-08-15 17:37:17 -070051SERVO_BOARD_ATTR = 'servo_board'
Kevin Cheng643ce8a2016-09-15 15:42:12 -070052SERVO_SERIAL_ATTR = 'servo_serial'
Simran Basi0739d682015-02-25 16:22:56 -080053
Dan Shi3b2adf62015-09-02 17:46:54 -070054_CONFIG = global_config.global_config
xixuan6cf6d2f2016-01-29 15:29:00 -080055ENABLE_SSH_TUNNEL_FOR_SERVO = _CONFIG.get_config_value(
56 'CROS', 'enable_ssh_tunnel_for_servo', type=bool, default=False)
Simran Basi0739d682015-02-25 16:22:56 -080057
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070058AUTOTEST_BASE = _CONFIG.get_config_value(
59 'SCHEDULER', 'drone_installation_directory',
60 default='/usr/local/autotest')
61
62_SERVO_HOST_REBOOT_TEST_NAME = 'servohost_Reboot'
Kevin Cheng55265902016-10-19 12:46:50 -070063_SERVO_HOST_FORCE_REBOOT_TEST_NAME = 'servohost_Reboot.force_reboot'
Fang Deng5d518f42013-08-02 14:04:32 -070064
Fang Deng5d518f42013-08-02 14:04:32 -070065class ServoHost(ssh_host.SSHHost):
66 """Host class for a host that controls a servo, e.g. beaglebone."""
67
Raul E Rangel52ca2e82018-07-03 14:10:14 -060068 DEFAULT_PORT = int(os.getenv('SERVOD_PORT', '9999'))
Richard Barnette9a26ad62016-06-10 12:03:08 -070069
Dan Shie5b3c512014-08-21 12:12:09 -070070 # Timeout for initializing servo signals.
Wai-Hong Tam37b6ed32017-09-19 15:52:39 -070071 INITIALIZE_SERVO_TIMEOUT_SECS = 60
Richard Barnette9a26ad62016-06-10 12:03:08 -070072
xixuan6cf6d2f2016-01-29 15:29:00 -080073 # Ready test function
74 SERVO_READY_METHOD = 'get_version'
Fang Deng5d518f42013-08-02 14:04:32 -070075
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070076 REBOOT_CMD = 'sleep 1; reboot & sleep 10; reboot -f'
77
Fang Deng5d518f42013-08-02 14:04:32 -070078
Richard Barnette17bfc6c2016-08-04 18:41:43 -070079 def _initialize(self, servo_host='localhost',
Richard Barnettee519dcd2016-08-15 17:37:17 -070080 servo_port=DEFAULT_PORT, servo_board=None,
Kevin Cheng643ce8a2016-09-15 15:42:12 -070081 servo_serial=None, is_in_lab=None, *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -070082 """Initialize a ServoHost instance.
83
84 A ServoHost instance represents a host that controls a servo.
85
86 @param servo_host: Name of the host where the servod process
87 is running.
Raul E Rangel52ca2e82018-07-03 14:10:14 -060088 @param servo_port: Port the servod process is listening on. Defaults
89 to the SERVOD_PORT environment variable if set,
90 otherwise 9999.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070091 @param servo_board: Board that the servo is connected to.
Dan Shi4d478522014-02-14 13:46:32 -080092 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
93 to None, for which utils.host_is_in_lab_zone will be
94 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -070095
96 """
97 super(ServoHost, self)._initialize(hostname=servo_host,
98 *args, **dargs)
Richard Barnettee519dcd2016-08-15 17:37:17 -070099 self.servo_port = servo_port
100 self.servo_board = servo_board
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700101 self.servo_serial = servo_serial
Richard Barnettee519dcd2016-08-15 17:37:17 -0700102 self._servo = None
Richard Barnette9a26ad62016-06-10 12:03:08 -0700103 self._repair_strategy = (
104 servo_repair.create_servo_repair_strategy())
Richard Barnettee519dcd2016-08-15 17:37:17 -0700105 self._is_localhost = (self.hostname == 'localhost')
106 if self._is_localhost:
107 self._is_in_lab = False
108 elif is_in_lab is None:
Dan Shi4d478522014-02-14 13:46:32 -0800109 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
110 else:
111 self._is_in_lab = is_in_lab
xixuan6cf6d2f2016-01-29 15:29:00 -0800112
Richard Barnettee519dcd2016-08-15 17:37:17 -0700113 # Commands on the servo host must be run by the superuser.
114 # Our account on a remote host is root, but if our target is
115 # localhost then we might be running unprivileged. If so,
116 # `sudo` will have to be added to the commands.
Fang Deng5d518f42013-08-02 14:04:32 -0700117 if self._is_localhost:
118 self._sudo_required = utils.system_output('id -u') != '0'
119 else:
120 self._sudo_required = False
Richard Barnettee519dcd2016-08-15 17:37:17 -0700121
Richard Barnette9a26ad62016-06-10 12:03:08 -0700122
123 def connect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700124 """Establish a connection to the servod server on this host.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700125
126 Initializes `self._servo` and then verifies that all network
127 connections are working. This will create an ssh tunnel if
128 it's required.
129
130 As a side effect of testing the connection, all signals on the
131 target servo are reset to default values, and the USB stick is
132 set to the neutral (off) position.
133 """
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700134 servo_obj = servo.Servo(servo_host=self, servo_serial=self.servo_serial)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700135 timeout, _ = retry.timeout(
136 servo_obj.initialize_dut,
137 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
138 if timeout:
139 raise hosts.AutoservVerifyError(
140 'Servo initialize timed out.')
141 self._servo = servo_obj
142
143
144 def disconnect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700145 """Disconnect our servo if it exists.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700146
147 If we've previously successfully connected to our servo,
148 disconnect any established ssh tunnel, and set `self._servo`
149 back to `None`.
150 """
151 if self._servo:
152 # N.B. This call is safe even without a tunnel:
153 # rpc_server_tracker.disconnect() silently ignores
154 # unknown ports.
155 self.rpc_server_tracker.disconnect(self.servo_port)
156 self._servo = None
Fang Deng5d518f42013-08-02 14:04:32 -0700157
158
159 def is_in_lab(self):
160 """Check whether the servo host is a lab device.
161
162 @returns: True if the servo host is in Cros Lab, otherwise False.
163
164 """
165 return self._is_in_lab
166
167
168 def is_localhost(self):
169 """Checks whether the servo host points to localhost.
170
171 @returns: True if it points to localhost, otherwise False.
172
173 """
174 return self._is_localhost
175
176
177 def get_servod_server_proxy(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700178 """Return a proxy that can be used to communicate with servod server.
Fang Deng5d518f42013-08-02 14:04:32 -0700179
180 @returns: An xmlrpclib.ServerProxy that is connected to the servod
181 server on the host.
Fang Deng5d518f42013-08-02 14:04:32 -0700182 """
Richard Barnette9a26ad62016-06-10 12:03:08 -0700183 if ENABLE_SSH_TUNNEL_FOR_SERVO and not self.is_localhost():
184 return self.rpc_server_tracker.xmlrpc_connect(
185 None, self.servo_port,
186 ready_test_name=self.SERVO_READY_METHOD,
187 timeout_seconds=60)
188 else:
189 remote = 'http://%s:%s' % (self.hostname, self.servo_port)
190 return xmlrpclib.ServerProxy(remote)
Fang Deng5d518f42013-08-02 14:04:32 -0700191
192
Richard Barnette9a26ad62016-06-10 12:03:08 -0700193 def is_cros_host(self):
beeps5e8c45a2013-12-17 22:05:11 -0800194 """Check if a servo host is running chromeos.
195
196 @return: True if the servo host is running chromeos.
197 False if it isn't, or we don't have enough information.
198 """
199 try:
200 result = self.run('grep -q CHROMEOS /etc/lsb-release',
201 ignore_status=True, timeout=10)
202 except (error.AutoservRunError, error.AutoservSSHTimeout):
203 return False
204 return result.exit_status == 0
205
206
Fang Deng5d518f42013-08-02 14:04:32 -0700207 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
Dean Liaoe3e75f62017-11-14 10:36:43 +0800208 connect_timeout=None, alive_interval=None,
209 alive_count_max=None, connection_attempts=None):
Fang Deng5d518f42013-08-02 14:04:32 -0700210 """Override default make_ssh_command to use tuned options.
211
212 Tuning changes:
213 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
214 connection failure. Consistency with remote_access.py.
215
216 - ServerAliveInterval=180; which causes SSH to ping connection every
217 180 seconds. In conjunction with ServerAliveCountMax ensures
218 that if the connection dies, Autotest will bail out quickly.
219
220 - ServerAliveCountMax=3; consistency with remote_access.py.
221
222 - ConnectAttempts=4; reduce flakiness in connection errors;
223 consistency with remote_access.py.
224
225 - UserKnownHostsFile=/dev/null; we don't care about the keys.
226
227 - SSH protocol forced to 2; needed for ServerAliveInterval.
228
229 @param user User name to use for the ssh connection.
230 @param port Port on the target host to use for ssh connection.
231 @param opts Additional options to the ssh command.
232 @param hosts_file Ignored.
233 @param connect_timeout Ignored.
234 @param alive_interval Ignored.
Dean Liaoe3e75f62017-11-14 10:36:43 +0800235 @param alive_count_max Ignored.
236 @param connection_attempts Ignored.
Fang Deng5d518f42013-08-02 14:04:32 -0700237
238 @returns: An ssh command with the requested settings.
239
240 """
Dean Liaoe3e75f62017-11-14 10:36:43 +0800241 options = ' '.join([opts, '-o Protocol=2'])
242 return super(ServoHost, self).make_ssh_command(
243 user=user, port=port, opts=options, hosts_file='/dev/null',
244 connect_timeout=30, alive_interval=180, alive_count_max=3,
245 connection_attempts=4)
Fang Deng5d518f42013-08-02 14:04:32 -0700246
247
248 def _make_scp_cmd(self, sources, dest):
249 """Format scp command.
250
251 Given a list of source paths and a destination path, produces the
252 appropriate scp command for encoding it. Remote paths must be
253 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
254 to allow additional ssh options.
255
256 @param sources: A list of source paths to copy from.
257 @param dest: Destination path to copy to.
258
259 @returns: An scp command that copies |sources| on local machine to
260 |dest| on the remote servo host.
261
262 """
263 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
264 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
265 return command % (self.master_ssh_option,
266 self.port, ' '.join(sources), dest)
267
268
269 def run(self, command, timeout=3600, ignore_status=False,
270 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
Luigi Semenzatobfbd1f32017-01-06 10:41:18 -0800271 connect_timeout=30, ssh_failure_retry_ok=False,
272 options='', stdin=None, verbose=True, args=()):
Fang Deng5d518f42013-08-02 14:04:32 -0700273 """Run a command on the servo host.
274
275 Extends method `run` in SSHHost. If the servo host is a remote device,
276 it will call `run` in SSHost without changing anything.
277 If the servo host is 'localhost', it will call utils.system_output.
278
279 @param command: The command line string.
280 @param timeout: Time limit in seconds before attempting to
281 kill the running process. The run() function
282 will take a few seconds longer than 'timeout'
283 to complete if it has to kill the process.
284 @param ignore_status: Do not raise an exception, no matter
285 what the exit code of the command is.
286 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
287 @param connect_timeout: SSH connection timeout (in seconds)
288 Ignored if host is 'localhost'.
289 @param options: String with additional ssh command options
290 Ignored if host is 'localhost'.
Luigi Semenzatobfbd1f32017-01-06 10:41:18 -0800291 @param ssh_failure_retry_ok: when True and ssh connection failure is
292 suspected, OK to retry command (but not
293 compulsory, and likely not needed here)
Fang Deng5d518f42013-08-02 14:04:32 -0700294 @param stdin: Stdin to pass (a string) to the executed command.
295 @param verbose: Log the commands.
296 @param args: Sequence of strings to pass as arguments to command by
297 quoting them in " and escaping their contents if necessary.
298
299 @returns: A utils.CmdResult object.
300
301 @raises AutoservRunError if the command failed.
302 @raises AutoservSSHTimeout SSH connection has timed out. Only applies
303 when servo host is not 'localhost'.
304
305 """
306 run_args = {'command': command, 'timeout': timeout,
307 'ignore_status': ignore_status, 'stdout_tee': stdout_tee,
308 'stderr_tee': stderr_tee, 'stdin': stdin,
309 'verbose': verbose, 'args': args}
310 if self.is_localhost():
311 if self._sudo_required:
Michael Tangf9b3ada2016-11-18 16:01:05 -0800312 run_args['command'] = 'sudo -n sh -c "%s"' % utils.sh_escape(
313 command)
Fang Deng5d518f42013-08-02 14:04:32 -0700314 try:
315 return utils.run(**run_args)
316 except error.CmdError as e:
317 logging.error(e)
318 raise error.AutoservRunError('command execution error',
319 e.result_obj)
320 else:
321 run_args['connect_timeout'] = connect_timeout
322 run_args['options'] = options
323 return super(ServoHost, self).run(**run_args)
324
325
Richard Barnette9a26ad62016-06-10 12:03:08 -0700326 def _get_release_version(self):
Dan Shi0942b1d2015-03-31 11:07:00 -0700327 """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release.
328
329 @returns The version string in lsb-release, under attribute
330 CHROMEOS_RELEASE_VERSION.
331 """
332 lsb_release_content = self.run(
333 'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip()
334 return lsbrelease_utils.get_chromeos_release_version(
335 lsb_release_content=lsb_release_content)
336
337
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700338 def get_attached_duts(self, afe):
339 """Gather a list of duts that use this servo host.
340
341 @param afe: afe instance.
342
343 @returns list of duts.
Richard Barnette3a7697f2016-04-20 11:33:27 -0700344 """
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700345 return afe.get_hosts_by_attribute(
346 attribute=SERVO_HOST_ATTR, value=self.hostname)
347
348
349 def get_board(self):
350 """Determine the board for this servo host.
351
352 @returns a string representing this servo host's board.
353 """
354 return lsbrelease_utils.get_current_board(
355 lsb_release_content=self.run('cat /etc/lsb-release').stdout)
356
357
358 def _choose_dut_for_synchronized_reboot(self, dut_list, afe):
359 """Choose which dut to schedule servo host reboot job.
360
361 We'll want a semi-deterministic way of selecting which host should be
362 scheduled for the servo host reboot job. For now we'll sort the
363 list with the expectation the dut list will stay consistent.
364 From there we'll grab the first dut that is available so we
365 don't schedule a job on a dut that will never run.
366
367 @param dut_list: List of the dut hostnames to choose from.
368 @param afe: Instance of the AFE.
369
370 @return hostname of dut to schedule job on.
371 """
372 afe_hosts = afe.get_hosts(dut_list)
373 afe_hosts.sort()
374 for afe_host in afe_hosts:
375 if afe_host.status not in host_states.UNAVAILABLE_STATES:
376 return afe_host.hostname
377 # If they're all unavailable, just return the first sorted dut.
378 dut_list.sort()
379 return dut_list[0]
380
381
382 def _sync_job_scheduled_for_duts(self, dut_list, afe):
383 """Checks if a synchronized reboot has been scheduled for these duts.
384
385 Grab all the host queue entries that aren't completed for the duts and
386 see if any of them have the expected job name.
387
388 @param dut_list: List of duts to check on.
389 @param afe: Instance of the AFE.
390
391 @returns True if the job is scheduled, False otherwise.
392 """
393 afe_hosts = afe.get_hosts(dut_list)
394 for afe_host in afe_hosts:
395 hqes = afe.get_host_queue_entries(host=afe_host.id, complete=0)
396 for hqe in hqes:
397 job = afe.get_jobs(id=hqe.job.id)
Kevin Cheng55265902016-10-19 12:46:50 -0700398 if job and job[0].name in (_SERVO_HOST_REBOOT_TEST_NAME,
399 _SERVO_HOST_FORCE_REBOOT_TEST_NAME):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700400 return True
401 return False
402
403
Kevin Cheng55265902016-10-19 12:46:50 -0700404 def schedule_synchronized_reboot(self, dut_list, afe, force_reboot=False):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700405 """Schedule a job to reboot the servo host.
406
407 When we schedule a job, it will create a ServoHost object which will
408 go through this entire flow of checking if a reboot is needed and
409 trying to schedule it. There is probably a better approach to setting
410 up a synchronized reboot but I'm coming up short on better ideas so I
411 apologize for this circus show.
412
Kevin Cheng55265902016-10-19 12:46:50 -0700413 @param dut_list: List of duts that need to be locked.
414 @param afe: Instance of afe.
415 @param force_reboot: Boolean to indicate if a forced reboot should be
416 scheduled or not.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700417 """
418 # If we've already scheduled job on a dut, we're done here.
419 if self._sync_job_scheduled_for_duts(dut_list, afe):
420 return
421
422 # Looks like we haven't scheduled a job yet.
Kevin Cheng55265902016-10-19 12:46:50 -0700423 test = (_SERVO_HOST_REBOOT_TEST_NAME if not force_reboot
424 else _SERVO_HOST_FORCE_REBOOT_TEST_NAME)
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700425 dut = self._choose_dut_for_synchronized_reboot(dut_list, afe)
426 getter = control_file_getter.FileSystemGetter([AUTOTEST_BASE])
Kevin Cheng55265902016-10-19 12:46:50 -0700427 control_file = getter.get_control_file_contents_by_name(test)
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700428 control_type = control_data.CONTROL_TYPE_NAMES.SERVER
Kevin Cheng79589982016-10-25 13:26:04 -0700429 try:
430 afe.create_job(control_file=control_file, name=test,
431 control_type=control_type, hosts=[dut])
432 except Exception as e:
433 # Sometimes creating the job will raise an exception. We'll log it
434 # but we don't want to fail because of it.
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700435 logging.exception('Scheduling reboot job failed due to Exception.')
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700436
437
438 def reboot(self, *args, **dargs):
439 """Reboot using special servo host reboot command."""
440 super(ServoHost, self).reboot(reboot_cmd=self.REBOOT_CMD,
441 *args, **dargs)
442
443
444 def _check_for_reboot(self, updater):
445 """Reboot this servo host if an upgrade is waiting.
Richard Barnette3a7697f2016-04-20 11:33:27 -0700446
447 If the host has successfully downloaded and finalized a new
448 build, reboot.
449
450 @param updater: a ChromiumOSUpdater instance for checking
451 whether reboot is needed.
452 @return Return a (status, build) tuple reflecting the
453 update_engine status and current build of the host
454 at the end of the call.
455 """
Richard Barnette9a26ad62016-06-10 12:03:08 -0700456 current_build_number = self._get_release_version()
Richard Barnette3a7697f2016-04-20 11:33:27 -0700457 status = updater.check_update_status()
458 if status == autoupdater.UPDATER_NEED_REBOOT:
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700459 # Check if we need to schedule an organized reboot.
Kevin Cheng79589982016-10-25 13:26:04 -0700460 afe = frontend_wrappers.RetryingAFE(
461 timeout_min=5, delay_sec=10,
462 server=server_site_utils.get_global_afe_hostname())
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700463 dut_list = self.get_attached_duts(afe)
464 logging.info('servo host has the following duts: %s', dut_list)
465 if len(dut_list) > 1:
466 logging.info('servo host has multiple duts, scheduling '
467 'synchronized reboot')
468 self.schedule_synchronized_reboot(dut_list, afe)
469 return status, current_build_number
470
471 logging.info('Rebooting servo host %s from build %s',
Richard Barnette3a7697f2016-04-20 11:33:27 -0700472 self.hostname, current_build_number)
473 # Tell the reboot() call not to wait for completion.
474 # Otherwise, the call will log reboot failure if servo does
475 # not come back. The logged reboot failure will lead to
476 # test job failure. If the test does not require servo, we
477 # don't want servo failure to fail the test with error:
478 # `Host did not return from reboot` in status.log.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700479 self.reboot(fastsync=True, wait=False)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700480
481 # We told the reboot() call not to wait, but we need to wait
482 # for the reboot before we continue. Alas. The code from
483 # here below is basically a copy of Host.wait_for_restart(),
484 # with the logging bits ripped out, so that they can't cause
485 # the failure logging problem described above.
486 #
487 # The black stain that this has left on my soul can never be
488 # erased.
489 old_boot_id = self.get_boot_id()
490 if not self.wait_down(timeout=self.WAIT_DOWN_REBOOT_TIMEOUT,
491 warning_timer=self.WAIT_DOWN_REBOOT_WARNING,
492 old_boot_id=old_boot_id):
493 raise error.AutoservHostError(
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700494 'servo host %s failed to shut down.' %
495 self.hostname)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700496 if self.wait_up(timeout=120):
Richard Barnette9a26ad62016-06-10 12:03:08 -0700497 current_build_number = self._get_release_version()
Richard Barnette3a7697f2016-04-20 11:33:27 -0700498 status = updater.check_update_status()
499 logging.info('servo host %s back from reboot, with build %s',
500 self.hostname, current_build_number)
501 else:
502 raise error.AutoservHostError(
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700503 'servo host %s failed to come back from reboot.' %
504 self.hostname)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700505 return status, current_build_number
506
507
Richard Barnette3a7697f2016-04-20 11:33:27 -0700508 def update_image(self, wait_for_update=False):
beeps5e8c45a2013-12-17 22:05:11 -0800509 """Update the image on the servo host, if needed.
510
J. Richard Barnette84895392015-04-30 12:31:01 -0700511 This method recognizes the following cases:
512 * If the Host is not running Chrome OS, do nothing.
513 * If a previously triggered update is now complete, reboot
514 to the new version.
515 * If the host is processing a previously triggered update,
516 do nothing.
517 * If the host is running a version of Chrome OS different
518 from the default for servo Hosts, trigger an update, but
519 don't wait for it to complete.
beeps5e8c45a2013-12-17 22:05:11 -0800520
Richard Barnette3a7697f2016-04-20 11:33:27 -0700521 @param wait_for_update If an update needs to be applied and
522 this is true, then don't return until the update is
523 downloaded and finalized, and the host rebooted.
beeps5e8c45a2013-12-17 22:05:11 -0800524 @raises dev_server.DevServerException: If all the devservers are down.
525 @raises site_utils.ParseBuildNameException: If the devserver returns
526 an invalid build name.
beeps5e8c45a2013-12-17 22:05:11 -0800527 @raises AutoservRunError: If the update_engine_client isn't present on
528 the host, and the host is a cros_host.
J. Richard Barnette84895392015-04-30 12:31:01 -0700529
beeps5e8c45a2013-12-17 22:05:11 -0800530 """
Dan Shib795b5a2015-09-24 13:26:35 -0700531 # servod could be running in a Ubuntu workstation.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700532 if not self.is_cros_host():
beeps5e8c45a2013-12-17 22:05:11 -0800533 logging.info('Not attempting an update, either %s is not running '
534 'chromeos or we cannot find enough information about '
535 'the host.', self.hostname)
536 return
537
Dan Shib795b5a2015-09-24 13:26:35 -0700538 if lsbrelease_utils.is_moblab():
539 logging.info('Not attempting an update, %s is running moblab.',
540 self.hostname)
541 return
542
Richard Barnette383ef9c2016-12-13 11:56:49 -0800543 target_build = afe_utils.get_stable_cros_image_name(self.get_board())
J. Richard Barnette84895392015-04-30 12:31:01 -0700544 target_build_number = server_site_utils.ParseBuildName(
545 target_build)[3]
xixuanfa2d92a2016-12-09 09:45:27 -0800546 # For servo image staging, we want it as more widely distributed as
547 # possible, so that devservers' load can be evenly distributed. So use
548 # hostname instead of target_build as hash.
549 ds = dev_server.ImageServer.resolve(self.hostname,
550 hostname=self.hostname)
J. Richard Barnette84895392015-04-30 12:31:01 -0700551 url = ds.get_update_url(target_build)
beeps5e8c45a2013-12-17 22:05:11 -0800552
553 updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700554 status, current_build_number = self._check_for_reboot(updater)
555 update_pending = True
beeps5e8c45a2013-12-17 22:05:11 -0800556 if status in autoupdater.UPDATER_PROCESSING_UPDATE:
557 logging.info('servo host %s already processing an update, update '
558 'engine client status=%s', self.hostname, status)
Allen Li66aa2542017-06-26 15:26:27 -0700559 elif status == autoupdater.UPDATER_NEED_REBOOT:
560 return
J. Richard Barnette84895392015-04-30 12:31:01 -0700561 elif current_build_number != target_build_number:
beeps5e8c45a2013-12-17 22:05:11 -0800562 logging.info('Using devserver url: %s to trigger update on '
563 'servo host %s, from %s to %s', url, self.hostname,
J. Richard Barnette84895392015-04-30 12:31:01 -0700564 current_build_number, target_build_number)
beeps5e8c45a2013-12-17 22:05:11 -0800565 try:
J. Richard Barnette84895392015-04-30 12:31:01 -0700566 ds.stage_artifacts(target_build,
567 artifacts=['full_payload'])
568 except Exception as e:
569 logging.error('Staging artifacts failed: %s', str(e))
570 logging.error('Abandoning update for this cycle.')
beeps5e8c45a2013-12-17 22:05:11 -0800571 else:
J. Richard Barnette84895392015-04-30 12:31:01 -0700572 try:
Richard Barnette7e53aa02016-05-20 10:49:40 -0700573 # TODO(jrbarnette): This 'touch' is a gross hack
574 # to get us past crbug.com/613603. Once that
575 # bug is resolved, we should remove this code.
576 self.run('touch /home/chronos/.oobe_completed')
J. Richard Barnette84895392015-04-30 12:31:01 -0700577 updater.trigger_update()
578 except autoupdater.RootFSUpdateError as e:
579 trigger_download_status = 'failed with %s' % str(e)
Aviv Keshet11836322016-11-22 11:32:01 -0800580 metrics.Counter('chromeos/autotest/servo/'
581 'rootfs_update_failed').increment()
J. Richard Barnette84895392015-04-30 12:31:01 -0700582 else:
583 trigger_download_status = 'passed'
584 logging.info('Triggered download and update %s for %s, '
585 'update engine currently in status %s',
586 trigger_download_status, self.hostname,
587 updater.check_update_status())
beeps5e8c45a2013-12-17 22:05:11 -0800588 else:
589 logging.info('servo host %s does not require an update.',
590 self.hostname)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700591 update_pending = False
592
593 if update_pending and wait_for_update:
594 logging.info('Waiting for servo update to complete.')
595 self.run('update_engine_client --follow', ignore_status=True)
beeps5e8c45a2013-12-17 22:05:11 -0800596
597
Richard Barnette1edbb162016-11-01 11:47:50 -0700598 def verify(self, silent=False):
599 """Update the servo host and verify it's in a good state.
600
601 @param silent If true, suppress logging in `status.log`.
602 """
Richard Barnette79d78c42016-05-25 09:31:21 -0700603 # TODO(jrbarnette) Old versions of beaglebone_servo include
Richard Barnette9a26ad62016-06-10 12:03:08 -0700604 # the powerd package. If you touch the .oobe_completed file
605 # (as we do to work around an update_engine problem), then
606 # powerd will eventually shut down the beaglebone for lack
607 # of (apparent) activity. Current versions of
Richard Barnette79d78c42016-05-25 09:31:21 -0700608 # beaglebone_servo don't have powerd, but until we can purge
609 # the lab of the old images, we need to make sure powerd
610 # isn't running.
611 self.run('stop powerd', ignore_status=True)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700612 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700613 self._repair_strategy.verify(self, silent)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700614 except:
615 self.disconnect_servo()
616 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700617
618
Richard Barnette1edbb162016-11-01 11:47:50 -0700619 def repair(self, silent=False):
620 """Attempt to repair servo host.
621
622 @param silent If true, suppress logging in `status.log`.
623 """
Richard Barnette9a26ad62016-06-10 12:03:08 -0700624 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700625 self._repair_strategy.repair(self, silent)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700626 except:
627 self.disconnect_servo()
628 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700629
630
Fang Dengd4fe7392013-09-20 12:18:21 -0700631 def has_power(self):
632 """Return whether or not the servo host is powered by PoE."""
633 # TODO(fdeng): See crbug.com/302791
634 # For now, assume all servo hosts in the lab have power.
635 return self.is_in_lab()
636
637
638 def power_cycle(self):
639 """Cycle power to this host via PoE if it is a lab device.
640
Richard Barnette9a26ad62016-06-10 12:03:08 -0700641 @raises AutoservRepairError if it fails to power cycle the
Fang Dengd4fe7392013-09-20 12:18:21 -0700642 servo host.
643
644 """
645 if self.has_power():
646 try:
647 rpm_client.set_power(self.hostname, 'CYCLE')
648 except (socket.error, xmlrpclib.Error,
649 httplib.BadStatusLine,
650 rpm_client.RemotePowerException) as e:
Richard Barnette9a26ad62016-06-10 12:03:08 -0700651 raise hosts.AutoservRepairError(
Fang Dengd4fe7392013-09-20 12:18:21 -0700652 'Power cycling %s failed: %s' % (self.hostname, e))
653 else:
654 logging.info('Skipping power cycling, not a lab device.')
655
656
Dan Shi4d478522014-02-14 13:46:32 -0800657 def get_servo(self):
658 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700659
Dan Shi4d478522014-02-14 13:46:32 -0800660 @return: a servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700661 """
Dan Shi4d478522014-02-14 13:46:32 -0800662 return self._servo
663
664
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700665 def close(self):
666 """Stop UART logging and close the host object."""
667 if self._servo:
Congbin Guo2e5e2a22018-07-27 10:32:48 -0700668 # In some cases when we run as lab-tools, the job object is None.
669 if self.job:
670 self._servo.dump_uart_streams(self.job.resultdir)
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700671 self._servo.close()
672
673 super(ServoHost, self).close()
674
675
Richard Barnetteea3e4602016-06-10 12:36:41 -0700676def make_servo_hostname(dut_hostname):
677 """Given a DUT's hostname, return the hostname of its servo.
678
679 @param dut_hostname: hostname of a DUT.
680
681 @return hostname of the DUT's servo.
682
683 """
684 host_parts = dut_hostname.split('.')
685 host_parts[0] = host_parts[0] + '-servo'
686 return '.'.join(host_parts)
687
688
689def servo_host_is_up(servo_hostname):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700690 """Given a servo host name, return if it's up or not.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700691
692 @param servo_hostname: hostname of the servo host.
693
694 @return True if it's up, False otherwise
695 """
696 # Technically, this duplicates the SSH ping done early in the servo
697 # proxy initialization code. However, this ping ends in a couple
698 # seconds when if fails, rather than the 60 seconds it takes to decide
699 # that an SSH ping has timed out. Specifically, that timeout happens
700 # when our servo DNS name resolves, but there is no host at that IP.
701 logging.info('Pinging servo host at %s', servo_hostname)
702 ping_config = ping_runner.PingConfig(
703 servo_hostname, count=3,
704 ignore_result=True, ignore_status=True)
705 return ping_runner.PingRunner().ping(ping_config).received > 0
706
707
Richard Barnettee519dcd2016-08-15 17:37:17 -0700708def _map_afe_board_to_servo_board(afe_board):
709 """Map a board we get from the AFE to a servo appropriate value.
710
711 Many boards are identical to other boards for servo's purposes.
712 This function makes that mapping.
713
714 @param afe_board string board name received from AFE.
715 @return board we expect servo to have.
716
717 """
718 KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
719 BOARD_MAP = {'gizmo': 'panther'}
720 mapped_board = afe_board
721 if afe_board in BOARD_MAP:
722 mapped_board = BOARD_MAP[afe_board]
723 else:
724 for suffix in KNOWN_SUFFIXES:
725 if afe_board.endswith(suffix):
726 mapped_board = afe_board[0:-len(suffix)]
727 break
728 if mapped_board != afe_board:
729 logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
730 return mapped_board
731
732
Richard Barnetteea3e4602016-06-10 12:36:41 -0700733def _get_standard_servo_args(dut_host):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700734 """Return servo data associated with a given DUT.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700735
736 This checks for the presence of servo host and port attached to the
737 given `dut_host`. This data should be stored in the
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700738 `_afe_host.attributes` field in the provided `dut_host` parameter.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700739
740 @param dut_host Instance of `Host` on which to find the servo
741 attributes.
742 @return A tuple of `servo_args` dict with host and an option port,
743 plus an `is_in_lab` flag indicating whether this in the CrOS
744 test lab, or some different environment.
745 """
746 servo_args = None
747 is_in_lab = False
748 is_ssp_moblab = False
749 if utils.is_in_container():
750 is_moblab = _CONFIG.get_config_value(
751 'SSP', 'is_moblab', type=bool, default=False)
752 is_ssp_moblab = is_moblab
753 else:
754 is_moblab = utils.is_moblab()
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700755 attrs = dut_host._afe_host.attributes
Richard Barnetteea3e4602016-06-10 12:36:41 -0700756 if attrs and SERVO_HOST_ATTR in attrs:
757 servo_host = attrs[SERVO_HOST_ATTR]
758 if (is_ssp_moblab and servo_host in ['localhost', '127.0.0.1']):
759 servo_host = _CONFIG.get_config_value(
760 'SSP', 'host_container_ip', type=str, default=None)
761 servo_args = {SERVO_HOST_ATTR: servo_host}
762 if SERVO_PORT_ATTR in attrs:
Kevin Cheng692e5292016-08-14 00:23:24 -0700763 try:
764 servo_port = attrs[SERVO_PORT_ATTR]
765 servo_args[SERVO_PORT_ATTR] = int(servo_port)
766 except ValueError:
767 logging.error('servo port is not an int: %s', servo_port)
768 # Let's set the servo args to None since we're not creating
769 # the ServoHost object with the proper port now.
770 servo_args = None
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700771 if SERVO_SERIAL_ATTR in attrs:
772 servo_args[SERVO_SERIAL_ATTR] = attrs[SERVO_SERIAL_ATTR]
Richard Barnetteea3e4602016-06-10 12:36:41 -0700773 is_in_lab = (not is_moblab
774 and utils.host_is_in_lab_zone(servo_host))
775
776 # TODO(jrbarnette): This test to use the default lab servo hostname
777 # is a legacy that we need only until every host in the DB has
778 # proper attributes.
779 elif (not is_moblab and
780 not dnsname_mangler.is_ip_address(dut_host.hostname)):
781 servo_host = make_servo_hostname(dut_host.hostname)
782 is_in_lab = utils.host_is_in_lab_zone(servo_host)
783 if is_in_lab:
784 servo_args = {SERVO_HOST_ATTR: servo_host}
Richard Barnette9a26ad62016-06-10 12:03:08 -0700785 if servo_args is not None:
Prathmesh Prabhua3bb7652017-02-09 11:42:13 -0800786 info = dut_host.host_info_store.get()
787 if info.board:
788 servo_args[SERVO_BOARD_ATTR] = _map_afe_board_to_servo_board(
789 info.board)
Richard Barnetteea3e4602016-06-10 12:36:41 -0700790 return servo_args, is_in_lab
791
792
Dan Shi023aae32016-05-25 11:13:01 -0700793def create_servo_host(dut, servo_args, try_lab_servo=False,
Richard Barnette9a26ad62016-06-10 12:03:08 -0700794 try_servo_repair=False):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700795 """Create a ServoHost object for a given DUT, if appropriate.
Dan Shi4d478522014-02-14 13:46:32 -0800796
Richard Barnette9a26ad62016-06-10 12:03:08 -0700797 This function attempts to create and verify or repair a `ServoHost`
798 object for a servo connected to the given `dut`, subject to various
799 constraints imposed by the parameters:
800 * When the `servo_args` parameter is not `None`, a servo
801 host must be created, and must be checked with `repair()`.
802 * Otherwise, if a servo exists in the lab and `try_lab_servo` is
803 true:
804 * If `try_servo_repair` is true, then create a servo host and
805 check it with `repair()`.
806 * Otherwise, if the servo responds to `ping` then create a
807 servo host and check it with `verify()`.
Fang Denge545abb2014-12-30 18:43:47 -0800808
Richard Barnette9a26ad62016-06-10 12:03:08 -0700809 In cases where `servo_args` was not `None`, repair failure
810 exceptions are passed back to the caller; otherwise, exceptions
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700811 are logged and then discarded. Note that this only happens in cases
812 where we're called from a test (not special task) control file that
813 has an explicit dependency on servo. In that case, we require that
814 repair not write to `status.log`, so as to avoid polluting test
815 results.
816
817 TODO(jrbarnette): The special handling for servo in test control
818 files is a thorn in my flesh; I dearly hope to see it cut out before
819 my retirement.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700820
821 Parameters for a servo host consist of a host name, port number, and
822 DUT board, and are determined from one of these sources, in order of
823 priority:
Richard Barnetteea3e4602016-06-10 12:36:41 -0700824 * Servo attributes from the `dut` parameter take precedence over
825 all other sources of information.
826 * If a DNS entry for the servo based on the DUT hostname exists in
827 the CrOS lab network, that hostname is used with the default
Richard Barnette9a26ad62016-06-10 12:03:08 -0700828 port and the DUT's board.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700829 * If no other options are found, the parameters will be taken
Richard Barnette9a26ad62016-06-10 12:03:08 -0700830 from the `servo_args` dict passed in from the caller.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700831
832 @param dut An instance of `Host` from which to take
833 servo parameters (if available).
834 @param servo_args A dictionary with servo parameters to use if
835 they can't be found from `dut`. If this
836 argument is supplied, unrepaired exceptions
837 from `verify()` will be passed back to the
838 caller.
839 @param try_lab_servo If not true, servo host creation will be
840 skipped unless otherwise required by the
841 caller.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700842 @param try_servo_repair If true, check a servo host with
843 `repair()` instead of `verify()`.
Dan Shi4d478522014-02-14 13:46:32 -0800844
845 @returns: A ServoHost object or None. See comments above.
846
847 """
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700848 servo_dependency = servo_args is not None
Richard Barnetteea3e4602016-06-10 12:36:41 -0700849 is_in_lab = False
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700850 if dut is not None and (try_lab_servo or servo_dependency):
Richard Barnetteea3e4602016-06-10 12:36:41 -0700851 servo_args_override, is_in_lab = _get_standard_servo_args(dut)
852 if servo_args_override is not None:
853 servo_args = servo_args_override
854 if servo_args is None:
855 return None
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700856 if (not servo_dependency and not try_servo_repair and
Richard Barnette9a26ad62016-06-10 12:03:08 -0700857 not servo_host_is_up(servo_args[SERVO_HOST_ATTR])):
Dan Shibbb0cb62014-03-24 17:50:57 -0700858 return None
Richard Barnette9a26ad62016-06-10 12:03:08 -0700859 newhost = ServoHost(is_in_lab=is_in_lab, **servo_args)
Prathmesh Prabhuc2c6d542018-04-20 14:28:45 -0700860 base_classes.send_creation_metric(newhost)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700861 # Note that the logic of repair() includes everything done
862 # by verify(). It's sufficient to call one or the other;
863 # we don't need both.
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700864 if servo_dependency:
865 newhost.repair(silent=True)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700866 else:
867 try:
868 if try_servo_repair:
869 newhost.repair()
870 else:
871 newhost.verify()
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700872 except Exception:
Richard Barnette9a26ad62016-06-10 12:03:08 -0700873 operation = 'repair' if try_servo_repair else 'verification'
874 logging.exception('Servo %s failed for %s',
875 operation, newhost.hostname)
876 return newhost