blob: 451013218f4ede9b42c0ddded71d1fa4b0c50422 [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
12import httplib
13import logging
14import socket
Fang Deng5d518f42013-08-02 14:04:32 -070015import xmlrpclib
Raul E Rangel52ca2e82018-07-03 14:10:14 -060016import os
Fang Deng5d518f42013-08-02 14:04:32 -070017
18from autotest_lib.client.bin import utils
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070019from autotest_lib.client.common_lib import control_data
Fang Deng5d518f42013-08-02 14:04:32 -070020from autotest_lib.client.common_lib import error
beeps5e8c45a2013-12-17 22:05:11 -080021from autotest_lib.client.common_lib import global_config
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070022from autotest_lib.client.common_lib import host_states
Richard Barnette9a26ad62016-06-10 12:03:08 -070023from autotest_lib.client.common_lib import hosts
Dan Shi0942b1d2015-03-31 11:07:00 -070024from autotest_lib.client.common_lib import lsbrelease_utils
beeps5e8c45a2013-12-17 22:05:11 -080025from autotest_lib.client.common_lib.cros import dev_server
Fang Deng5d518f42013-08-02 14:04:32 -070026from autotest_lib.client.common_lib.cros import retry
Christopher Wileycef1f902014-06-19 11:11:23 -070027from autotest_lib.client.common_lib.cros.network import ping_runner
Hsinyu Chaoe0b08e62015-08-11 10:50:37 +000028from autotest_lib.client.cros import constants as client_constants
Richard Barnettee519dcd2016-08-15 17:37:17 -070029from autotest_lib.server import afe_utils
Prathmesh Prabhucbd5ebb2018-08-28 17:04:50 -070030from autotest_lib.server import site_utils as server_utils
Richard Barnetted31580e2018-05-14 19:58:00 +000031from autotest_lib.server.cros import autoupdater
Cheng-Yi Chiang22612862015-08-20 20:39:57 +080032from autotest_lib.server.cros import dnsname_mangler
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070033from autotest_lib.server.cros.dynamic_suite import control_file_getter
Richard Barnetted31580e2018-05-14 19:58:00 +000034from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Richard Barnette9a26ad62016-06-10 12:03:08 -070035from autotest_lib.server.cros.servo import servo
Richard Barnetted31580e2018-05-14 19:58:00 +000036from autotest_lib.server.hosts import servo_repair
Fang Deng5d518f42013-08-02 14:04:32 -070037from autotest_lib.server.hosts import ssh_host
Fang Dengd4fe7392013-09-20 12:18:21 -070038from autotest_lib.site_utils.rpm_control_system import rpm_client
Fang Deng5d518f42013-08-02 14:04:32 -070039
Dan Shi5e2efb72017-02-07 11:40:23 -080040try:
41 from chromite.lib import metrics
42except ImportError:
43 metrics = utils.metrics_mock
44
Fang Deng5d518f42013-08-02 14:04:32 -070045
Simran Basi0739d682015-02-25 16:22:56 -080046# Names of the host attributes in the database that represent the values for
47# the servo_host and servo_port for a servo connected to the DUT.
48SERVO_HOST_ATTR = 'servo_host'
49SERVO_PORT_ATTR = 'servo_port'
Richard Barnettee519dcd2016-08-15 17:37:17 -070050SERVO_BOARD_ATTR = 'servo_board'
Kevin Cheng643ce8a2016-09-15 15:42:12 -070051SERVO_SERIAL_ATTR = 'servo_serial'
Prathmesh Prabhucba44292018-08-28 17:44:45 -070052SERVO_ATTR_KEYS = (
53 SERVO_BOARD_ATTR,
54 SERVO_HOST_ATTR,
55 SERVO_PORT_ATTR,
56 SERVO_SERIAL_ATTR,
57)
Simran Basi0739d682015-02-25 16:22:56 -080058
Dan Shi3b2adf62015-09-02 17:46:54 -070059_CONFIG = global_config.global_config
xixuan6cf6d2f2016-01-29 15:29:00 -080060ENABLE_SSH_TUNNEL_FOR_SERVO = _CONFIG.get_config_value(
61 'CROS', 'enable_ssh_tunnel_for_servo', type=bool, default=False)
Simran Basi0739d682015-02-25 16:22:56 -080062
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070063AUTOTEST_BASE = _CONFIG.get_config_value(
64 'SCHEDULER', 'drone_installation_directory',
65 default='/usr/local/autotest')
66
67_SERVO_HOST_REBOOT_TEST_NAME = 'servohost_Reboot'
Kevin Cheng55265902016-10-19 12:46:50 -070068_SERVO_HOST_FORCE_REBOOT_TEST_NAME = 'servohost_Reboot.force_reboot'
Fang Deng5d518f42013-08-02 14:04:32 -070069
Fang Deng5d518f42013-08-02 14:04:32 -070070class ServoHost(ssh_host.SSHHost):
71 """Host class for a host that controls a servo, e.g. beaglebone."""
72
Raul E Rangel52ca2e82018-07-03 14:10:14 -060073 DEFAULT_PORT = int(os.getenv('SERVOD_PORT', '9999'))
Richard Barnette9a26ad62016-06-10 12:03:08 -070074
Dan Shie5b3c512014-08-21 12:12:09 -070075 # Timeout for initializing servo signals.
Wai-Hong Tam37b6ed32017-09-19 15:52:39 -070076 INITIALIZE_SERVO_TIMEOUT_SECS = 60
Richard Barnette9a26ad62016-06-10 12:03:08 -070077
xixuan6cf6d2f2016-01-29 15:29:00 -080078 # Ready test function
79 SERVO_READY_METHOD = 'get_version'
Fang Deng5d518f42013-08-02 14:04:32 -070080
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070081 REBOOT_CMD = 'sleep 1; reboot & sleep 10; reboot -f'
82
Fang Deng5d518f42013-08-02 14:04:32 -070083
Richard Barnette17bfc6c2016-08-04 18:41:43 -070084 def _initialize(self, servo_host='localhost',
Richard Barnettee519dcd2016-08-15 17:37:17 -070085 servo_port=DEFAULT_PORT, servo_board=None,
Kevin Cheng643ce8a2016-09-15 15:42:12 -070086 servo_serial=None, is_in_lab=None, *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -070087 """Initialize a ServoHost instance.
88
89 A ServoHost instance represents a host that controls a servo.
90
91 @param servo_host: Name of the host where the servod process
92 is running.
Raul E Rangel52ca2e82018-07-03 14:10:14 -060093 @param servo_port: Port the servod process is listening on. Defaults
94 to the SERVOD_PORT environment variable if set,
95 otherwise 9999.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070096 @param servo_board: Board that the servo is connected to.
Dan Shi4d478522014-02-14 13:46:32 -080097 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
98 to None, for which utils.host_is_in_lab_zone will be
99 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700100
101 """
102 super(ServoHost, self)._initialize(hostname=servo_host,
103 *args, **dargs)
Richard Barnette42f4db92018-08-23 15:05:15 -0700104 self.servo_port = int(servo_port)
Richard Barnettee519dcd2016-08-15 17:37:17 -0700105 self.servo_board = servo_board
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700106 self.servo_serial = servo_serial
Richard Barnettee519dcd2016-08-15 17:37:17 -0700107 self._servo = None
Richard Barnette9a26ad62016-06-10 12:03:08 -0700108 self._repair_strategy = (
109 servo_repair.create_servo_repair_strategy())
Richard Barnettee519dcd2016-08-15 17:37:17 -0700110 self._is_localhost = (self.hostname == 'localhost')
111 if self._is_localhost:
112 self._is_in_lab = False
113 elif is_in_lab is None:
Dan Shi4d478522014-02-14 13:46:32 -0800114 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
115 else:
116 self._is_in_lab = is_in_lab
xixuan6cf6d2f2016-01-29 15:29:00 -0800117
Richard Barnettee519dcd2016-08-15 17:37:17 -0700118 # Commands on the servo host must be run by the superuser.
119 # Our account on a remote host is root, but if our target is
120 # localhost then we might be running unprivileged. If so,
121 # `sudo` will have to be added to the commands.
Fang Deng5d518f42013-08-02 14:04:32 -0700122 if self._is_localhost:
123 self._sudo_required = utils.system_output('id -u') != '0'
124 else:
125 self._sudo_required = False
Richard Barnettee519dcd2016-08-15 17:37:17 -0700126
Richard Barnette9a26ad62016-06-10 12:03:08 -0700127
128 def connect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700129 """Establish a connection to the servod server on this host.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700130
131 Initializes `self._servo` and then verifies that all network
132 connections are working. This will create an ssh tunnel if
133 it's required.
134
135 As a side effect of testing the connection, all signals on the
136 target servo are reset to default values, and the USB stick is
137 set to the neutral (off) position.
138 """
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700139 servo_obj = servo.Servo(servo_host=self, servo_serial=self.servo_serial)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700140 timeout, _ = retry.timeout(
141 servo_obj.initialize_dut,
142 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
143 if timeout:
144 raise hosts.AutoservVerifyError(
145 'Servo initialize timed out.')
146 self._servo = servo_obj
147
148
149 def disconnect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700150 """Disconnect our servo if it exists.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700151
152 If we've previously successfully connected to our servo,
153 disconnect any established ssh tunnel, and set `self._servo`
154 back to `None`.
155 """
156 if self._servo:
157 # N.B. This call is safe even without a tunnel:
158 # rpc_server_tracker.disconnect() silently ignores
159 # unknown ports.
160 self.rpc_server_tracker.disconnect(self.servo_port)
161 self._servo = None
Fang Deng5d518f42013-08-02 14:04:32 -0700162
163
164 def is_in_lab(self):
165 """Check whether the servo host is a lab device.
166
167 @returns: True if the servo host is in Cros Lab, otherwise False.
168
169 """
170 return self._is_in_lab
171
172
173 def is_localhost(self):
174 """Checks whether the servo host points to localhost.
175
176 @returns: True if it points to localhost, otherwise False.
177
178 """
179 return self._is_localhost
180
181
182 def get_servod_server_proxy(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700183 """Return a proxy that can be used to communicate with servod server.
Fang Deng5d518f42013-08-02 14:04:32 -0700184
185 @returns: An xmlrpclib.ServerProxy that is connected to the servod
186 server on the host.
Fang Deng5d518f42013-08-02 14:04:32 -0700187 """
Richard Barnette9a26ad62016-06-10 12:03:08 -0700188 if ENABLE_SSH_TUNNEL_FOR_SERVO and not self.is_localhost():
189 return self.rpc_server_tracker.xmlrpc_connect(
190 None, self.servo_port,
191 ready_test_name=self.SERVO_READY_METHOD,
192 timeout_seconds=60)
193 else:
194 remote = 'http://%s:%s' % (self.hostname, self.servo_port)
195 return xmlrpclib.ServerProxy(remote)
Fang Deng5d518f42013-08-02 14:04:32 -0700196
197
Richard Barnette9a26ad62016-06-10 12:03:08 -0700198 def is_cros_host(self):
beeps5e8c45a2013-12-17 22:05:11 -0800199 """Check if a servo host is running chromeos.
200
201 @return: True if the servo host is running chromeos.
202 False if it isn't, or we don't have enough information.
203 """
204 try:
205 result = self.run('grep -q CHROMEOS /etc/lsb-release',
206 ignore_status=True, timeout=10)
207 except (error.AutoservRunError, error.AutoservSSHTimeout):
208 return False
209 return result.exit_status == 0
210
211
Fang Deng5d518f42013-08-02 14:04:32 -0700212 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
Dean Liaoe3e75f62017-11-14 10:36:43 +0800213 connect_timeout=None, alive_interval=None,
214 alive_count_max=None, connection_attempts=None):
Fang Deng5d518f42013-08-02 14:04:32 -0700215 """Override default make_ssh_command to use tuned options.
216
217 Tuning changes:
218 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
219 connection failure. Consistency with remote_access.py.
220
221 - ServerAliveInterval=180; which causes SSH to ping connection every
222 180 seconds. In conjunction with ServerAliveCountMax ensures
223 that if the connection dies, Autotest will bail out quickly.
224
225 - ServerAliveCountMax=3; consistency with remote_access.py.
226
227 - ConnectAttempts=4; reduce flakiness in connection errors;
228 consistency with remote_access.py.
229
230 - UserKnownHostsFile=/dev/null; we don't care about the keys.
231
232 - SSH protocol forced to 2; needed for ServerAliveInterval.
233
234 @param user User name to use for the ssh connection.
235 @param port Port on the target host to use for ssh connection.
236 @param opts Additional options to the ssh command.
237 @param hosts_file Ignored.
238 @param connect_timeout Ignored.
239 @param alive_interval Ignored.
Dean Liaoe3e75f62017-11-14 10:36:43 +0800240 @param alive_count_max Ignored.
241 @param connection_attempts Ignored.
Fang Deng5d518f42013-08-02 14:04:32 -0700242
243 @returns: An ssh command with the requested settings.
244
245 """
Dean Liaoe3e75f62017-11-14 10:36:43 +0800246 options = ' '.join([opts, '-o Protocol=2'])
247 return super(ServoHost, self).make_ssh_command(
248 user=user, port=port, opts=options, hosts_file='/dev/null',
249 connect_timeout=30, alive_interval=180, alive_count_max=3,
250 connection_attempts=4)
Fang Deng5d518f42013-08-02 14:04:32 -0700251
252
253 def _make_scp_cmd(self, sources, dest):
254 """Format scp command.
255
256 Given a list of source paths and a destination path, produces the
257 appropriate scp command for encoding it. Remote paths must be
258 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
259 to allow additional ssh options.
260
261 @param sources: A list of source paths to copy from.
262 @param dest: Destination path to copy to.
263
264 @returns: An scp command that copies |sources| on local machine to
265 |dest| on the remote servo host.
266
267 """
268 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
269 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
270 return command % (self.master_ssh_option,
271 self.port, ' '.join(sources), dest)
272
273
274 def run(self, command, timeout=3600, ignore_status=False,
275 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
Luigi Semenzatobfbd1f32017-01-06 10:41:18 -0800276 connect_timeout=30, ssh_failure_retry_ok=False,
277 options='', stdin=None, verbose=True, args=()):
Fang Deng5d518f42013-08-02 14:04:32 -0700278 """Run a command on the servo host.
279
280 Extends method `run` in SSHHost. If the servo host is a remote device,
281 it will call `run` in SSHost without changing anything.
282 If the servo host is 'localhost', it will call utils.system_output.
283
284 @param command: The command line string.
285 @param timeout: Time limit in seconds before attempting to
286 kill the running process. The run() function
287 will take a few seconds longer than 'timeout'
288 to complete if it has to kill the process.
289 @param ignore_status: Do not raise an exception, no matter
290 what the exit code of the command is.
291 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
292 @param connect_timeout: SSH connection timeout (in seconds)
293 Ignored if host is 'localhost'.
294 @param options: String with additional ssh command options
295 Ignored if host is 'localhost'.
Luigi Semenzatobfbd1f32017-01-06 10:41:18 -0800296 @param ssh_failure_retry_ok: when True and ssh connection failure is
297 suspected, OK to retry command (but not
298 compulsory, and likely not needed here)
Fang Deng5d518f42013-08-02 14:04:32 -0700299 @param stdin: Stdin to pass (a string) to the executed command.
300 @param verbose: Log the commands.
301 @param args: Sequence of strings to pass as arguments to command by
302 quoting them in " and escaping their contents if necessary.
303
304 @returns: A utils.CmdResult object.
305
306 @raises AutoservRunError if the command failed.
307 @raises AutoservSSHTimeout SSH connection has timed out. Only applies
308 when servo host is not 'localhost'.
309
310 """
311 run_args = {'command': command, 'timeout': timeout,
312 'ignore_status': ignore_status, 'stdout_tee': stdout_tee,
313 'stderr_tee': stderr_tee, 'stdin': stdin,
314 'verbose': verbose, 'args': args}
315 if self.is_localhost():
316 if self._sudo_required:
Michael Tangf9b3ada2016-11-18 16:01:05 -0800317 run_args['command'] = 'sudo -n sh -c "%s"' % utils.sh_escape(
318 command)
Fang Deng5d518f42013-08-02 14:04:32 -0700319 try:
320 return utils.run(**run_args)
321 except error.CmdError as e:
322 logging.error(e)
323 raise error.AutoservRunError('command execution error',
324 e.result_obj)
325 else:
326 run_args['connect_timeout'] = connect_timeout
327 run_args['options'] = options
328 return super(ServoHost, self).run(**run_args)
329
330
Richard Barnette9a26ad62016-06-10 12:03:08 -0700331 def _get_release_version(self):
Dan Shi0942b1d2015-03-31 11:07:00 -0700332 """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release.
333
334 @returns The version string in lsb-release, under attribute
335 CHROMEOS_RELEASE_VERSION.
336 """
337 lsb_release_content = self.run(
338 'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip()
339 return lsbrelease_utils.get_chromeos_release_version(
340 lsb_release_content=lsb_release_content)
341
342
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700343 def get_attached_duts(self, afe):
344 """Gather a list of duts that use this servo host.
345
346 @param afe: afe instance.
347
348 @returns list of duts.
Richard Barnette3a7697f2016-04-20 11:33:27 -0700349 """
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700350 return afe.get_hosts_by_attribute(
351 attribute=SERVO_HOST_ATTR, value=self.hostname)
352
353
354 def get_board(self):
355 """Determine the board for this servo host.
356
357 @returns a string representing this servo host's board.
358 """
359 return lsbrelease_utils.get_current_board(
360 lsb_release_content=self.run('cat /etc/lsb-release').stdout)
361
362
363 def _choose_dut_for_synchronized_reboot(self, dut_list, afe):
364 """Choose which dut to schedule servo host reboot job.
365
366 We'll want a semi-deterministic way of selecting which host should be
367 scheduled for the servo host reboot job. For now we'll sort the
368 list with the expectation the dut list will stay consistent.
369 From there we'll grab the first dut that is available so we
370 don't schedule a job on a dut that will never run.
371
372 @param dut_list: List of the dut hostnames to choose from.
373 @param afe: Instance of the AFE.
374
375 @return hostname of dut to schedule job on.
376 """
377 afe_hosts = afe.get_hosts(dut_list)
378 afe_hosts.sort()
379 for afe_host in afe_hosts:
380 if afe_host.status not in host_states.UNAVAILABLE_STATES:
381 return afe_host.hostname
382 # If they're all unavailable, just return the first sorted dut.
383 dut_list.sort()
384 return dut_list[0]
385
386
387 def _sync_job_scheduled_for_duts(self, dut_list, afe):
388 """Checks if a synchronized reboot has been scheduled for these duts.
389
390 Grab all the host queue entries that aren't completed for the duts and
391 see if any of them have the expected job name.
392
393 @param dut_list: List of duts to check on.
394 @param afe: Instance of the AFE.
395
396 @returns True if the job is scheduled, False otherwise.
397 """
398 afe_hosts = afe.get_hosts(dut_list)
399 for afe_host in afe_hosts:
400 hqes = afe.get_host_queue_entries(host=afe_host.id, complete=0)
401 for hqe in hqes:
402 job = afe.get_jobs(id=hqe.job.id)
Kevin Cheng55265902016-10-19 12:46:50 -0700403 if job and job[0].name in (_SERVO_HOST_REBOOT_TEST_NAME,
404 _SERVO_HOST_FORCE_REBOOT_TEST_NAME):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700405 return True
406 return False
407
408
Kevin Cheng55265902016-10-19 12:46:50 -0700409 def schedule_synchronized_reboot(self, dut_list, afe, force_reboot=False):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700410 """Schedule a job to reboot the servo host.
411
412 When we schedule a job, it will create a ServoHost object which will
413 go through this entire flow of checking if a reboot is needed and
414 trying to schedule it. There is probably a better approach to setting
415 up a synchronized reboot but I'm coming up short on better ideas so I
416 apologize for this circus show.
417
Kevin Cheng55265902016-10-19 12:46:50 -0700418 @param dut_list: List of duts that need to be locked.
419 @param afe: Instance of afe.
420 @param force_reboot: Boolean to indicate if a forced reboot should be
421 scheduled or not.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700422 """
423 # If we've already scheduled job on a dut, we're done here.
424 if self._sync_job_scheduled_for_duts(dut_list, afe):
425 return
426
427 # Looks like we haven't scheduled a job yet.
Kevin Cheng55265902016-10-19 12:46:50 -0700428 test = (_SERVO_HOST_REBOOT_TEST_NAME if not force_reboot
429 else _SERVO_HOST_FORCE_REBOOT_TEST_NAME)
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700430 dut = self._choose_dut_for_synchronized_reboot(dut_list, afe)
431 getter = control_file_getter.FileSystemGetter([AUTOTEST_BASE])
Kevin Cheng55265902016-10-19 12:46:50 -0700432 control_file = getter.get_control_file_contents_by_name(test)
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700433 control_type = control_data.CONTROL_TYPE_NAMES.SERVER
Kevin Cheng79589982016-10-25 13:26:04 -0700434 try:
435 afe.create_job(control_file=control_file, name=test,
436 control_type=control_type, hosts=[dut])
437 except Exception as e:
438 # Sometimes creating the job will raise an exception. We'll log it
439 # but we don't want to fail because of it.
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700440 logging.exception('Scheduling reboot job failed due to Exception.')
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700441
442
443 def reboot(self, *args, **dargs):
444 """Reboot using special servo host reboot command."""
445 super(ServoHost, self).reboot(reboot_cmd=self.REBOOT_CMD,
446 *args, **dargs)
447
448
449 def _check_for_reboot(self, updater):
450 """Reboot this servo host if an upgrade is waiting.
Richard Barnette3a7697f2016-04-20 11:33:27 -0700451
452 If the host has successfully downloaded and finalized a new
453 build, reboot.
454
455 @param updater: a ChromiumOSUpdater instance for checking
456 whether reboot is needed.
457 @return Return a (status, build) tuple reflecting the
458 update_engine status and current build of the host
459 at the end of the call.
460 """
Richard Barnette9a26ad62016-06-10 12:03:08 -0700461 current_build_number = self._get_release_version()
Richard Barnette3a7697f2016-04-20 11:33:27 -0700462 status = updater.check_update_status()
463 if status == autoupdater.UPDATER_NEED_REBOOT:
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700464 # Check if we need to schedule an organized reboot.
Kevin Cheng79589982016-10-25 13:26:04 -0700465 afe = frontend_wrappers.RetryingAFE(
466 timeout_min=5, delay_sec=10,
Prathmesh Prabhucbd5ebb2018-08-28 17:04:50 -0700467 server=server_utils.get_global_afe_hostname())
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700468 dut_list = self.get_attached_duts(afe)
469 logging.info('servo host has the following duts: %s', dut_list)
470 if len(dut_list) > 1:
471 logging.info('servo host has multiple duts, scheduling '
472 'synchronized reboot')
473 self.schedule_synchronized_reboot(dut_list, afe)
474 return status, current_build_number
475
476 logging.info('Rebooting servo host %s from build %s',
Richard Barnette3a7697f2016-04-20 11:33:27 -0700477 self.hostname, current_build_number)
478 # Tell the reboot() call not to wait for completion.
479 # Otherwise, the call will log reboot failure if servo does
480 # not come back. The logged reboot failure will lead to
481 # test job failure. If the test does not require servo, we
482 # don't want servo failure to fail the test with error:
483 # `Host did not return from reboot` in status.log.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700484 self.reboot(fastsync=True, wait=False)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700485
486 # We told the reboot() call not to wait, but we need to wait
487 # for the reboot before we continue. Alas. The code from
488 # here below is basically a copy of Host.wait_for_restart(),
489 # with the logging bits ripped out, so that they can't cause
490 # the failure logging problem described above.
491 #
492 # The black stain that this has left on my soul can never be
493 # erased.
494 old_boot_id = self.get_boot_id()
495 if not self.wait_down(timeout=self.WAIT_DOWN_REBOOT_TIMEOUT,
496 warning_timer=self.WAIT_DOWN_REBOOT_WARNING,
497 old_boot_id=old_boot_id):
498 raise error.AutoservHostError(
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700499 'servo host %s failed to shut down.' %
500 self.hostname)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700501 if self.wait_up(timeout=120):
Richard Barnette9a26ad62016-06-10 12:03:08 -0700502 current_build_number = self._get_release_version()
Richard Barnette3a7697f2016-04-20 11:33:27 -0700503 status = updater.check_update_status()
504 logging.info('servo host %s back from reboot, with build %s',
505 self.hostname, current_build_number)
506 else:
507 raise error.AutoservHostError(
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700508 'servo host %s failed to come back from reboot.' %
509 self.hostname)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700510 return status, current_build_number
511
512
Richard Barnette3a7697f2016-04-20 11:33:27 -0700513 def update_image(self, wait_for_update=False):
beeps5e8c45a2013-12-17 22:05:11 -0800514 """Update the image on the servo host, if needed.
515
J. Richard Barnette84895392015-04-30 12:31:01 -0700516 This method recognizes the following cases:
517 * If the Host is not running Chrome OS, do nothing.
518 * If a previously triggered update is now complete, reboot
519 to the new version.
520 * If the host is processing a previously triggered update,
521 do nothing.
522 * If the host is running a version of Chrome OS different
523 from the default for servo Hosts, trigger an update, but
524 don't wait for it to complete.
beeps5e8c45a2013-12-17 22:05:11 -0800525
Richard Barnette3a7697f2016-04-20 11:33:27 -0700526 @param wait_for_update If an update needs to be applied and
527 this is true, then don't return until the update is
528 downloaded and finalized, and the host rebooted.
beeps5e8c45a2013-12-17 22:05:11 -0800529 @raises dev_server.DevServerException: If all the devservers are down.
530 @raises site_utils.ParseBuildNameException: If the devserver returns
531 an invalid build name.
beeps5e8c45a2013-12-17 22:05:11 -0800532 @raises AutoservRunError: If the update_engine_client isn't present on
533 the host, and the host is a cros_host.
J. Richard Barnette84895392015-04-30 12:31:01 -0700534
beeps5e8c45a2013-12-17 22:05:11 -0800535 """
Dan Shib795b5a2015-09-24 13:26:35 -0700536 # servod could be running in a Ubuntu workstation.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700537 if not self.is_cros_host():
beeps5e8c45a2013-12-17 22:05:11 -0800538 logging.info('Not attempting an update, either %s is not running '
539 'chromeos or we cannot find enough information about '
540 'the host.', self.hostname)
541 return
542
Dan Shib795b5a2015-09-24 13:26:35 -0700543 if lsbrelease_utils.is_moblab():
544 logging.info('Not attempting an update, %s is running moblab.',
545 self.hostname)
546 return
547
Richard Barnette383ef9c2016-12-13 11:56:49 -0800548 target_build = afe_utils.get_stable_cros_image_name(self.get_board())
Prathmesh Prabhucbd5ebb2018-08-28 17:04:50 -0700549 target_build_number = server_utils.ParseBuildName(
J. Richard Barnette84895392015-04-30 12:31:01 -0700550 target_build)[3]
xixuanfa2d92a2016-12-09 09:45:27 -0800551 # For servo image staging, we want it as more widely distributed as
552 # possible, so that devservers' load can be evenly distributed. So use
553 # hostname instead of target_build as hash.
554 ds = dev_server.ImageServer.resolve(self.hostname,
555 hostname=self.hostname)
J. Richard Barnette84895392015-04-30 12:31:01 -0700556 url = ds.get_update_url(target_build)
beeps5e8c45a2013-12-17 22:05:11 -0800557
558 updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700559 status, current_build_number = self._check_for_reboot(updater)
560 update_pending = True
beeps5e8c45a2013-12-17 22:05:11 -0800561 if status in autoupdater.UPDATER_PROCESSING_UPDATE:
562 logging.info('servo host %s already processing an update, update '
563 'engine client status=%s', self.hostname, status)
Allen Li66aa2542017-06-26 15:26:27 -0700564 elif status == autoupdater.UPDATER_NEED_REBOOT:
565 return
J. Richard Barnette84895392015-04-30 12:31:01 -0700566 elif current_build_number != target_build_number:
beeps5e8c45a2013-12-17 22:05:11 -0800567 logging.info('Using devserver url: %s to trigger update on '
568 'servo host %s, from %s to %s', url, self.hostname,
J. Richard Barnette84895392015-04-30 12:31:01 -0700569 current_build_number, target_build_number)
beeps5e8c45a2013-12-17 22:05:11 -0800570 try:
J. Richard Barnette84895392015-04-30 12:31:01 -0700571 ds.stage_artifacts(target_build,
572 artifacts=['full_payload'])
573 except Exception as e:
574 logging.error('Staging artifacts failed: %s', str(e))
575 logging.error('Abandoning update for this cycle.')
beeps5e8c45a2013-12-17 22:05:11 -0800576 else:
J. Richard Barnette84895392015-04-30 12:31:01 -0700577 try:
578 updater.trigger_update()
579 except autoupdater.RootFSUpdateError as e:
580 trigger_download_status = 'failed with %s' % str(e)
Aviv Keshet11836322016-11-22 11:32:01 -0800581 metrics.Counter('chromeos/autotest/servo/'
582 'rootfs_update_failed').increment()
J. Richard Barnette84895392015-04-30 12:31:01 -0700583 else:
584 trigger_download_status = 'passed'
585 logging.info('Triggered download and update %s for %s, '
586 'update engine currently in status %s',
587 trigger_download_status, self.hostname,
588 updater.check_update_status())
beeps5e8c45a2013-12-17 22:05:11 -0800589 else:
590 logging.info('servo host %s does not require an update.',
591 self.hostname)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700592 update_pending = False
593
594 if update_pending and wait_for_update:
595 logging.info('Waiting for servo update to complete.')
596 self.run('update_engine_client --follow', ignore_status=True)
beeps5e8c45a2013-12-17 22:05:11 -0800597
598
Richard Barnette1edbb162016-11-01 11:47:50 -0700599 def verify(self, silent=False):
600 """Update the servo host and verify it's in a good state.
601
602 @param silent If true, suppress logging in `status.log`.
603 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700604 message = 'Beginning verify for servo host %s port %s serial %s'
605 message %= (self.hostname, self.servo_port, self.servo_serial)
606 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700607 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700608 self._repair_strategy.verify(self, silent)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700609 except:
610 self.disconnect_servo()
611 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700612
613
Richard Barnette1edbb162016-11-01 11:47:50 -0700614 def repair(self, silent=False):
615 """Attempt to repair servo host.
616
617 @param silent If true, suppress logging in `status.log`.
618 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700619 message = 'Beginning repair for servo host %s port %s serial %s'
620 message %= (self.hostname, self.servo_port, self.servo_serial)
621 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700622 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700623 self._repair_strategy.repair(self, silent)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700624 except:
625 self.disconnect_servo()
626 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700627
628
Fang Dengd4fe7392013-09-20 12:18:21 -0700629 def has_power(self):
630 """Return whether or not the servo host is powered by PoE."""
631 # TODO(fdeng): See crbug.com/302791
632 # For now, assume all servo hosts in the lab have power.
633 return self.is_in_lab()
634
635
636 def power_cycle(self):
637 """Cycle power to this host via PoE if it is a lab device.
638
Richard Barnette9a26ad62016-06-10 12:03:08 -0700639 @raises AutoservRepairError if it fails to power cycle the
Fang Dengd4fe7392013-09-20 12:18:21 -0700640 servo host.
641
642 """
643 if self.has_power():
644 try:
645 rpm_client.set_power(self.hostname, 'CYCLE')
646 except (socket.error, xmlrpclib.Error,
647 httplib.BadStatusLine,
648 rpm_client.RemotePowerException) as e:
Richard Barnette9a26ad62016-06-10 12:03:08 -0700649 raise hosts.AutoservRepairError(
Fang Dengd4fe7392013-09-20 12:18:21 -0700650 'Power cycling %s failed: %s' % (self.hostname, e))
651 else:
652 logging.info('Skipping power cycling, not a lab device.')
653
654
Dan Shi4d478522014-02-14 13:46:32 -0800655 def get_servo(self):
656 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700657
Dan Shi4d478522014-02-14 13:46:32 -0800658 @return: a servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700659 """
Dan Shi4d478522014-02-14 13:46:32 -0800660 return self._servo
661
662
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700663 def close(self):
664 """Stop UART logging and close the host object."""
665 if self._servo:
Congbin Guo2e5e2a22018-07-27 10:32:48 -0700666 # In some cases when we run as lab-tools, the job object is None.
667 if self.job:
668 self._servo.dump_uart_streams(self.job.resultdir)
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700669 self._servo.close()
670
671 super(ServoHost, self).close()
672
673
Richard Barnetteea3e4602016-06-10 12:36:41 -0700674def make_servo_hostname(dut_hostname):
675 """Given a DUT's hostname, return the hostname of its servo.
676
677 @param dut_hostname: hostname of a DUT.
678
679 @return hostname of the DUT's servo.
680
681 """
682 host_parts = dut_hostname.split('.')
683 host_parts[0] = host_parts[0] + '-servo'
684 return '.'.join(host_parts)
685
686
687def servo_host_is_up(servo_hostname):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700688 """Given a servo host name, return if it's up or not.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700689
690 @param servo_hostname: hostname of the servo host.
691
692 @return True if it's up, False otherwise
693 """
694 # Technically, this duplicates the SSH ping done early in the servo
695 # proxy initialization code. However, this ping ends in a couple
696 # seconds when if fails, rather than the 60 seconds it takes to decide
697 # that an SSH ping has timed out. Specifically, that timeout happens
698 # when our servo DNS name resolves, but there is no host at that IP.
699 logging.info('Pinging servo host at %s', servo_hostname)
700 ping_config = ping_runner.PingConfig(
701 servo_hostname, count=3,
702 ignore_result=True, ignore_status=True)
703 return ping_runner.PingRunner().ping(ping_config).received > 0
704
705
Richard Barnettee519dcd2016-08-15 17:37:17 -0700706def _map_afe_board_to_servo_board(afe_board):
707 """Map a board we get from the AFE to a servo appropriate value.
708
709 Many boards are identical to other boards for servo's purposes.
710 This function makes that mapping.
711
712 @param afe_board string board name received from AFE.
713 @return board we expect servo to have.
714
715 """
716 KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
717 BOARD_MAP = {'gizmo': 'panther'}
718 mapped_board = afe_board
719 if afe_board in BOARD_MAP:
720 mapped_board = BOARD_MAP[afe_board]
721 else:
722 for suffix in KNOWN_SUFFIXES:
723 if afe_board.endswith(suffix):
724 mapped_board = afe_board[0:-len(suffix)]
725 break
726 if mapped_board != afe_board:
727 logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
728 return mapped_board
729
730
Prathmesh Prabhub4810232018-09-07 13:24:08 -0700731def get_servo_args_for_host(dut_host):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700732 """Return servo data associated with a given DUT.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700733
Richard Barnetteea3e4602016-06-10 12:36:41 -0700734 @param dut_host Instance of `Host` on which to find the servo
735 attributes.
Prathmesh Prabhuf605dd32018-08-28 17:09:04 -0700736 @return `servo_args` dict with host and an optional port.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700737 """
Prathmesh Prabhucba44292018-08-28 17:44:45 -0700738 info = dut_host.host_info_store.get()
739 servo_args = {k: v for k, v in info.attributes.iteritems()
740 if k in SERVO_ATTR_KEYS}
Richard Barnetteea3e4602016-06-10 12:36:41 -0700741
Prathmesh Prabhucba44292018-08-28 17:44:45 -0700742 if SERVO_PORT_ATTR in servo_args:
743 try:
744 servo_args[SERVO_PORT_ATTR] = int(servo_args[SERVO_PORT_ATTR])
745 except ValueError:
746 logging.error('servo port is not an int: %s',
747 servo_args[SERVO_PORT_ATTR])
748 # Reset servo_args because we don't want to use an invalid port.
749 servo_args.pop(SERVO_HOST_ATTR, None)
750
751 if info.board:
752 servo_args[SERVO_BOARD_ATTR] = _map_afe_board_to_servo_board(info.board)
Prathmesh Prabhu6f5f6362018-09-05 17:20:31 -0700753 return servo_args if SERVO_HOST_ATTR in servo_args else None
Richard Barnetteea3e4602016-06-10 12:36:41 -0700754
755
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -0700756def _tweak_args_for_ssp_moblab(servo_args):
757 if servo_args[SERVO_HOST_ATTR] in ['localhost', '127.0.0.1']:
758 servo_args[SERVO_HOST_ATTR] = _CONFIG.get_config_value(
759 'SSP', 'host_container_ip', type=str, default=None)
760
761
Dan Shi023aae32016-05-25 11:13:01 -0700762def create_servo_host(dut, servo_args, try_lab_servo=False,
Richard Barnette9a26ad62016-06-10 12:03:08 -0700763 try_servo_repair=False):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700764 """Create a ServoHost object for a given DUT, if appropriate.
Dan Shi4d478522014-02-14 13:46:32 -0800765
Richard Barnette9a26ad62016-06-10 12:03:08 -0700766 This function attempts to create and verify or repair a `ServoHost`
767 object for a servo connected to the given `dut`, subject to various
768 constraints imposed by the parameters:
769 * When the `servo_args` parameter is not `None`, a servo
770 host must be created, and must be checked with `repair()`.
771 * Otherwise, if a servo exists in the lab and `try_lab_servo` is
772 true:
773 * If `try_servo_repair` is true, then create a servo host and
774 check it with `repair()`.
775 * Otherwise, if the servo responds to `ping` then create a
776 servo host and check it with `verify()`.
Fang Denge545abb2014-12-30 18:43:47 -0800777
Richard Barnette9a26ad62016-06-10 12:03:08 -0700778 In cases where `servo_args` was not `None`, repair failure
779 exceptions are passed back to the caller; otherwise, exceptions
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700780 are logged and then discarded. Note that this only happens in cases
781 where we're called from a test (not special task) control file that
782 has an explicit dependency on servo. In that case, we require that
783 repair not write to `status.log`, so as to avoid polluting test
784 results.
785
786 TODO(jrbarnette): The special handling for servo in test control
787 files is a thorn in my flesh; I dearly hope to see it cut out before
788 my retirement.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700789
790 Parameters for a servo host consist of a host name, port number, and
791 DUT board, and are determined from one of these sources, in order of
792 priority:
Richard Barnetteea3e4602016-06-10 12:36:41 -0700793 * Servo attributes from the `dut` parameter take precedence over
794 all other sources of information.
795 * If a DNS entry for the servo based on the DUT hostname exists in
796 the CrOS lab network, that hostname is used with the default
Richard Barnette9a26ad62016-06-10 12:03:08 -0700797 port and the DUT's board.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700798 * If no other options are found, the parameters will be taken
Richard Barnette9a26ad62016-06-10 12:03:08 -0700799 from the `servo_args` dict passed in from the caller.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700800
801 @param dut An instance of `Host` from which to take
802 servo parameters (if available).
803 @param servo_args A dictionary with servo parameters to use if
804 they can't be found from `dut`. If this
805 argument is supplied, unrepaired exceptions
806 from `verify()` will be passed back to the
807 caller.
808 @param try_lab_servo If not true, servo host creation will be
809 skipped unless otherwise required by the
810 caller.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700811 @param try_servo_repair If true, check a servo host with
812 `repair()` instead of `verify()`.
Dan Shi4d478522014-02-14 13:46:32 -0800813
814 @returns: A ServoHost object or None. See comments above.
815
816 """
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700817 servo_dependency = servo_args is not None
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700818 if dut is not None and (try_lab_servo or servo_dependency):
Prathmesh Prabhub4810232018-09-07 13:24:08 -0700819 servo_args_override = get_servo_args_for_host(dut)
Richard Barnetteea3e4602016-06-10 12:36:41 -0700820 if servo_args_override is not None:
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -0700821 if utils.in_moblab_ssp():
822 _tweak_args_for_ssp_moblab(servo_args_override)
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700823 logging.debug(
824 'Overriding provided servo_args (%s) with arguments'
825 ' determined from the host (%s)',
826 servo_args,
827 servo_args_override,
828 )
Richard Barnetteea3e4602016-06-10 12:36:41 -0700829 servo_args = servo_args_override
Prathmesh Prabhucba44292018-08-28 17:44:45 -0700830
Richard Barnetteea3e4602016-06-10 12:36:41 -0700831 if servo_args is None:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700832 logging.debug('No servo_args provided, and failed to find overrides.')
Richard Barnetteea3e4602016-06-10 12:36:41 -0700833 return None
Prathmesh Prabhucba44292018-08-28 17:44:45 -0700834 if SERVO_HOST_ATTR not in servo_args:
835 logging.debug('%s attribute missing from servo_args: %s',
836 SERVO_HOST_ATTR, servo_args)
837 return None
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700838 if (not servo_dependency and not try_servo_repair and
Richard Barnette9a26ad62016-06-10 12:03:08 -0700839 not servo_host_is_up(servo_args[SERVO_HOST_ATTR])):
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700840 logging.debug('ServoHost is not up.')
Dan Shibbb0cb62014-03-24 17:50:57 -0700841 return None
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700842
Prathmesh Prabhuf605dd32018-08-28 17:09:04 -0700843 newhost = ServoHost(
844 is_in_lab=(servo_args
845 and server_utils.host_in_lab(
846 servo_args[SERVO_HOST_ATTR])),
847 **servo_args
848 )
Richard Barnette9a26ad62016-06-10 12:03:08 -0700849 # Note that the logic of repair() includes everything done
850 # by verify(). It's sufficient to call one or the other;
851 # we don't need both.
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700852 if servo_dependency:
853 newhost.repair(silent=True)
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700854 return newhost
855
856 if try_servo_repair:
857 try:
858 newhost.repair()
859 except Exception:
860 logging.exception('servo repair failed for %s', newhost.hostname)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700861 else:
862 try:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700863 newhost.verify()
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700864 except Exception:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700865 logging.exception('servo verify failed for %s', newhost.hostname)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700866 return newhost