blob: 580d6aced8208fde82d3df9897984a7788ca85b3 [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
12import httplib
13import logging
14import socket
Fang Deng5d518f42013-08-02 14:04:32 -070015import xmlrpclib
16
17from autotest_lib.client.bin import utils
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070018from autotest_lib.client.common_lib import control_data
Fang Deng5d518f42013-08-02 14:04:32 -070019from autotest_lib.client.common_lib import error
beeps5e8c45a2013-12-17 22:05:11 -080020from autotest_lib.client.common_lib import global_config
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070021from autotest_lib.client.common_lib import host_states
Richard Barnette9a26ad62016-06-10 12:03:08 -070022from autotest_lib.client.common_lib import hosts
Dan Shi0942b1d2015-03-31 11:07:00 -070023from autotest_lib.client.common_lib import lsbrelease_utils
beeps5e8c45a2013-12-17 22:05:11 -080024from autotest_lib.client.common_lib.cros import autoupdater
25from autotest_lib.client.common_lib.cros import dev_server
Fang Deng5d518f42013-08-02 14:04:32 -070026from autotest_lib.client.common_lib.cros import retry
Christopher Wileycef1f902014-06-19 11:11:23 -070027from autotest_lib.client.common_lib.cros.network import ping_runner
Hsinyu Chaoe0b08e62015-08-11 10:50:37 +000028from autotest_lib.client.cros import constants as client_constants
Richard Barnettee519dcd2016-08-15 17:37:17 -070029from autotest_lib.server import afe_utils
beeps5e8c45a2013-12-17 22:05:11 -080030from autotest_lib.server import site_utils as server_site_utils
Cheng-Yi Chiang22612862015-08-20 20:39:57 +080031from autotest_lib.server.cros import dnsname_mangler
Simran Basi0739d682015-02-25 16:22:56 -080032from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070033from autotest_lib.server.cros.dynamic_suite import control_file_getter
Richard Barnette9a26ad62016-06-10 12:03:08 -070034from autotest_lib.server.cros.servo import servo
35from autotest_lib.server.hosts import servo_repair
Fang Deng5d518f42013-08-02 14:04:32 -070036from autotest_lib.server.hosts import ssh_host
Fang Dengd4fe7392013-09-20 12:18:21 -070037from autotest_lib.site_utils.rpm_control_system import rpm_client
Fang Deng5d518f42013-08-02 14:04:32 -070038
Dan Shi5e2efb72017-02-07 11:40:23 -080039try:
40 from chromite.lib import metrics
41except ImportError:
42 metrics = utils.metrics_mock
43
Fang Deng5d518f42013-08-02 14:04:32 -070044
Simran Basi0739d682015-02-25 16:22:56 -080045# Names of the host attributes in the database that represent the values for
46# the servo_host and servo_port for a servo connected to the DUT.
47SERVO_HOST_ATTR = 'servo_host'
48SERVO_PORT_ATTR = 'servo_port'
Richard Barnettee519dcd2016-08-15 17:37:17 -070049SERVO_BOARD_ATTR = 'servo_board'
Kevin Cheng643ce8a2016-09-15 15:42:12 -070050SERVO_SERIAL_ATTR = 'servo_serial'
Simran Basi0739d682015-02-25 16:22:56 -080051
Dan Shi3b2adf62015-09-02 17:46:54 -070052_CONFIG = global_config.global_config
xixuan6cf6d2f2016-01-29 15:29:00 -080053ENABLE_SSH_TUNNEL_FOR_SERVO = _CONFIG.get_config_value(
54 'CROS', 'enable_ssh_tunnel_for_servo', type=bool, default=False)
Simran Basi0739d682015-02-25 16:22:56 -080055
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070056AUTOTEST_BASE = _CONFIG.get_config_value(
57 'SCHEDULER', 'drone_installation_directory',
58 default='/usr/local/autotest')
59
60_SERVO_HOST_REBOOT_TEST_NAME = 'servohost_Reboot'
Kevin Cheng55265902016-10-19 12:46:50 -070061_SERVO_HOST_FORCE_REBOOT_TEST_NAME = 'servohost_Reboot.force_reboot'
Fang Deng5d518f42013-08-02 14:04:32 -070062
Fang Deng5d518f42013-08-02 14:04:32 -070063class ServoHost(ssh_host.SSHHost):
64 """Host class for a host that controls a servo, e.g. beaglebone."""
65
Richard Barnette9a26ad62016-06-10 12:03:08 -070066 DEFAULT_PORT = 9999
67
Dan Shie5b3c512014-08-21 12:12:09 -070068 # Timeout for initializing servo signals.
Wai-Hong Tam37b6ed32017-09-19 15:52:39 -070069 INITIALIZE_SERVO_TIMEOUT_SECS = 60
Richard Barnette9a26ad62016-06-10 12:03:08 -070070
xixuan6cf6d2f2016-01-29 15:29:00 -080071 # Ready test function
72 SERVO_READY_METHOD = 'get_version'
Fang Deng5d518f42013-08-02 14:04:32 -070073
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070074 REBOOT_CMD = 'sleep 1; reboot & sleep 10; reboot -f'
75
Fang Deng5d518f42013-08-02 14:04:32 -070076
Richard Barnette17bfc6c2016-08-04 18:41:43 -070077 def _initialize(self, servo_host='localhost',
Richard Barnettee519dcd2016-08-15 17:37:17 -070078 servo_port=DEFAULT_PORT, servo_board=None,
Kevin Cheng643ce8a2016-09-15 15:42:12 -070079 servo_serial=None, is_in_lab=None, *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -070080 """Initialize a ServoHost instance.
81
82 A ServoHost instance represents a host that controls a servo.
83
84 @param servo_host: Name of the host where the servod process
85 is running.
86 @param servo_port: Port the servod process is listening on.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070087 @param servo_board: Board that the servo is connected to.
Dan Shi4d478522014-02-14 13:46:32 -080088 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
89 to None, for which utils.host_is_in_lab_zone will be
90 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -070091
92 """
93 super(ServoHost, self)._initialize(hostname=servo_host,
94 *args, **dargs)
Richard Barnettee519dcd2016-08-15 17:37:17 -070095 self.servo_port = servo_port
96 self.servo_board = servo_board
Kevin Cheng643ce8a2016-09-15 15:42:12 -070097 self.servo_serial = servo_serial
Richard Barnettee519dcd2016-08-15 17:37:17 -070098 self._servo = None
Richard Barnette9a26ad62016-06-10 12:03:08 -070099 self._repair_strategy = (
100 servo_repair.create_servo_repair_strategy())
Richard Barnettee519dcd2016-08-15 17:37:17 -0700101 self._is_localhost = (self.hostname == 'localhost')
102 if self._is_localhost:
103 self._is_in_lab = False
104 elif is_in_lab is None:
Dan Shi4d478522014-02-14 13:46:32 -0800105 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
106 else:
107 self._is_in_lab = is_in_lab
xixuan6cf6d2f2016-01-29 15:29:00 -0800108
Richard Barnettee519dcd2016-08-15 17:37:17 -0700109 # Commands on the servo host must be run by the superuser.
110 # Our account on a remote host is root, but if our target is
111 # localhost then we might be running unprivileged. If so,
112 # `sudo` will have to be added to the commands.
Fang Deng5d518f42013-08-02 14:04:32 -0700113 if self._is_localhost:
114 self._sudo_required = utils.system_output('id -u') != '0'
115 else:
116 self._sudo_required = False
Richard Barnettee519dcd2016-08-15 17:37:17 -0700117
Richard Barnette9a26ad62016-06-10 12:03:08 -0700118
119 def connect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700120 """Establish a connection to the servod server on this host.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700121
122 Initializes `self._servo` and then verifies that all network
123 connections are working. This will create an ssh tunnel if
124 it's required.
125
126 As a side effect of testing the connection, all signals on the
127 target servo are reset to default values, and the USB stick is
128 set to the neutral (off) position.
129 """
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700130 servo_obj = servo.Servo(servo_host=self, servo_serial=self.servo_serial)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700131 timeout, _ = retry.timeout(
132 servo_obj.initialize_dut,
133 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
134 if timeout:
135 raise hosts.AutoservVerifyError(
136 'Servo initialize timed out.')
137 self._servo = servo_obj
138
139
140 def disconnect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700141 """Disconnect our servo if it exists.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700142
143 If we've previously successfully connected to our servo,
144 disconnect any established ssh tunnel, and set `self._servo`
145 back to `None`.
146 """
147 if self._servo:
148 # N.B. This call is safe even without a tunnel:
149 # rpc_server_tracker.disconnect() silently ignores
150 # unknown ports.
151 self.rpc_server_tracker.disconnect(self.servo_port)
152 self._servo = None
Fang Deng5d518f42013-08-02 14:04:32 -0700153
154
155 def is_in_lab(self):
156 """Check whether the servo host is a lab device.
157
158 @returns: True if the servo host is in Cros Lab, otherwise False.
159
160 """
161 return self._is_in_lab
162
163
164 def is_localhost(self):
165 """Checks whether the servo host points to localhost.
166
167 @returns: True if it points to localhost, otherwise False.
168
169 """
170 return self._is_localhost
171
172
173 def get_servod_server_proxy(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700174 """Return a proxy that can be used to communicate with servod server.
Fang Deng5d518f42013-08-02 14:04:32 -0700175
176 @returns: An xmlrpclib.ServerProxy that is connected to the servod
177 server on the host.
Fang Deng5d518f42013-08-02 14:04:32 -0700178 """
Richard Barnette9a26ad62016-06-10 12:03:08 -0700179 if ENABLE_SSH_TUNNEL_FOR_SERVO and not self.is_localhost():
180 return self.rpc_server_tracker.xmlrpc_connect(
181 None, self.servo_port,
182 ready_test_name=self.SERVO_READY_METHOD,
183 timeout_seconds=60)
184 else:
185 remote = 'http://%s:%s' % (self.hostname, self.servo_port)
186 return xmlrpclib.ServerProxy(remote)
Fang Deng5d518f42013-08-02 14:04:32 -0700187
188
Richard Barnette9a26ad62016-06-10 12:03:08 -0700189 def is_cros_host(self):
beeps5e8c45a2013-12-17 22:05:11 -0800190 """Check if a servo host is running chromeos.
191
192 @return: True if the servo host is running chromeos.
193 False if it isn't, or we don't have enough information.
194 """
195 try:
196 result = self.run('grep -q CHROMEOS /etc/lsb-release',
197 ignore_status=True, timeout=10)
198 except (error.AutoservRunError, error.AutoservSSHTimeout):
199 return False
200 return result.exit_status == 0
201
202
Fang Deng5d518f42013-08-02 14:04:32 -0700203 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
Dean Liaoe3e75f62017-11-14 10:36:43 +0800204 connect_timeout=None, alive_interval=None,
205 alive_count_max=None, connection_attempts=None):
Fang Deng5d518f42013-08-02 14:04:32 -0700206 """Override default make_ssh_command to use tuned options.
207
208 Tuning changes:
209 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
210 connection failure. Consistency with remote_access.py.
211
212 - ServerAliveInterval=180; which causes SSH to ping connection every
213 180 seconds. In conjunction with ServerAliveCountMax ensures
214 that if the connection dies, Autotest will bail out quickly.
215
216 - ServerAliveCountMax=3; consistency with remote_access.py.
217
218 - ConnectAttempts=4; reduce flakiness in connection errors;
219 consistency with remote_access.py.
220
221 - UserKnownHostsFile=/dev/null; we don't care about the keys.
222
223 - SSH protocol forced to 2; needed for ServerAliveInterval.
224
225 @param user User name to use for the ssh connection.
226 @param port Port on the target host to use for ssh connection.
227 @param opts Additional options to the ssh command.
228 @param hosts_file Ignored.
229 @param connect_timeout Ignored.
230 @param alive_interval Ignored.
Dean Liaoe3e75f62017-11-14 10:36:43 +0800231 @param alive_count_max Ignored.
232 @param connection_attempts Ignored.
Fang Deng5d518f42013-08-02 14:04:32 -0700233
234 @returns: An ssh command with the requested settings.
235
236 """
Dean Liaoe3e75f62017-11-14 10:36:43 +0800237 options = ' '.join([opts, '-o Protocol=2'])
238 return super(ServoHost, self).make_ssh_command(
239 user=user, port=port, opts=options, hosts_file='/dev/null',
240 connect_timeout=30, alive_interval=180, alive_count_max=3,
241 connection_attempts=4)
Fang Deng5d518f42013-08-02 14:04:32 -0700242
243
244 def _make_scp_cmd(self, sources, dest):
245 """Format scp command.
246
247 Given a list of source paths and a destination path, produces the
248 appropriate scp command for encoding it. Remote paths must be
249 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
250 to allow additional ssh options.
251
252 @param sources: A list of source paths to copy from.
253 @param dest: Destination path to copy to.
254
255 @returns: An scp command that copies |sources| on local machine to
256 |dest| on the remote servo host.
257
258 """
259 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
260 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
261 return command % (self.master_ssh_option,
262 self.port, ' '.join(sources), dest)
263
264
265 def run(self, command, timeout=3600, ignore_status=False,
266 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
Luigi Semenzatobfbd1f32017-01-06 10:41:18 -0800267 connect_timeout=30, ssh_failure_retry_ok=False,
268 options='', stdin=None, verbose=True, args=()):
Fang Deng5d518f42013-08-02 14:04:32 -0700269 """Run a command on the servo host.
270
271 Extends method `run` in SSHHost. If the servo host is a remote device,
272 it will call `run` in SSHost without changing anything.
273 If the servo host is 'localhost', it will call utils.system_output.
274
275 @param command: The command line string.
276 @param timeout: Time limit in seconds before attempting to
277 kill the running process. The run() function
278 will take a few seconds longer than 'timeout'
279 to complete if it has to kill the process.
280 @param ignore_status: Do not raise an exception, no matter
281 what the exit code of the command is.
282 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
283 @param connect_timeout: SSH connection timeout (in seconds)
284 Ignored if host is 'localhost'.
285 @param options: String with additional ssh command options
286 Ignored if host is 'localhost'.
Luigi Semenzatobfbd1f32017-01-06 10:41:18 -0800287 @param ssh_failure_retry_ok: when True and ssh connection failure is
288 suspected, OK to retry command (but not
289 compulsory, and likely not needed here)
Fang Deng5d518f42013-08-02 14:04:32 -0700290 @param stdin: Stdin to pass (a string) to the executed command.
291 @param verbose: Log the commands.
292 @param args: Sequence of strings to pass as arguments to command by
293 quoting them in " and escaping their contents if necessary.
294
295 @returns: A utils.CmdResult object.
296
297 @raises AutoservRunError if the command failed.
298 @raises AutoservSSHTimeout SSH connection has timed out. Only applies
299 when servo host is not 'localhost'.
300
301 """
302 run_args = {'command': command, 'timeout': timeout,
303 'ignore_status': ignore_status, 'stdout_tee': stdout_tee,
304 'stderr_tee': stderr_tee, 'stdin': stdin,
305 'verbose': verbose, 'args': args}
306 if self.is_localhost():
307 if self._sudo_required:
Michael Tangf9b3ada2016-11-18 16:01:05 -0800308 run_args['command'] = 'sudo -n sh -c "%s"' % utils.sh_escape(
309 command)
Fang Deng5d518f42013-08-02 14:04:32 -0700310 try:
311 return utils.run(**run_args)
312 except error.CmdError as e:
313 logging.error(e)
314 raise error.AutoservRunError('command execution error',
315 e.result_obj)
316 else:
317 run_args['connect_timeout'] = connect_timeout
318 run_args['options'] = options
319 return super(ServoHost, self).run(**run_args)
320
321
Richard Barnette9a26ad62016-06-10 12:03:08 -0700322 def _get_release_version(self):
Dan Shi0942b1d2015-03-31 11:07:00 -0700323 """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release.
324
325 @returns The version string in lsb-release, under attribute
326 CHROMEOS_RELEASE_VERSION.
327 """
328 lsb_release_content = self.run(
329 'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip()
330 return lsbrelease_utils.get_chromeos_release_version(
331 lsb_release_content=lsb_release_content)
332
333
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700334 def get_attached_duts(self, afe):
335 """Gather a list of duts that use this servo host.
336
337 @param afe: afe instance.
338
339 @returns list of duts.
Richard Barnette3a7697f2016-04-20 11:33:27 -0700340 """
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700341 return afe.get_hosts_by_attribute(
342 attribute=SERVO_HOST_ATTR, value=self.hostname)
343
344
345 def get_board(self):
346 """Determine the board for this servo host.
347
348 @returns a string representing this servo host's board.
349 """
350 return lsbrelease_utils.get_current_board(
351 lsb_release_content=self.run('cat /etc/lsb-release').stdout)
352
353
354 def _choose_dut_for_synchronized_reboot(self, dut_list, afe):
355 """Choose which dut to schedule servo host reboot job.
356
357 We'll want a semi-deterministic way of selecting which host should be
358 scheduled for the servo host reboot job. For now we'll sort the
359 list with the expectation the dut list will stay consistent.
360 From there we'll grab the first dut that is available so we
361 don't schedule a job on a dut that will never run.
362
363 @param dut_list: List of the dut hostnames to choose from.
364 @param afe: Instance of the AFE.
365
366 @return hostname of dut to schedule job on.
367 """
368 afe_hosts = afe.get_hosts(dut_list)
369 afe_hosts.sort()
370 for afe_host in afe_hosts:
371 if afe_host.status not in host_states.UNAVAILABLE_STATES:
372 return afe_host.hostname
373 # If they're all unavailable, just return the first sorted dut.
374 dut_list.sort()
375 return dut_list[0]
376
377
378 def _sync_job_scheduled_for_duts(self, dut_list, afe):
379 """Checks if a synchronized reboot has been scheduled for these duts.
380
381 Grab all the host queue entries that aren't completed for the duts and
382 see if any of them have the expected job name.
383
384 @param dut_list: List of duts to check on.
385 @param afe: Instance of the AFE.
386
387 @returns True if the job is scheduled, False otherwise.
388 """
389 afe_hosts = afe.get_hosts(dut_list)
390 for afe_host in afe_hosts:
391 hqes = afe.get_host_queue_entries(host=afe_host.id, complete=0)
392 for hqe in hqes:
393 job = afe.get_jobs(id=hqe.job.id)
Kevin Cheng55265902016-10-19 12:46:50 -0700394 if job and job[0].name in (_SERVO_HOST_REBOOT_TEST_NAME,
395 _SERVO_HOST_FORCE_REBOOT_TEST_NAME):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700396 return True
397 return False
398
399
Kevin Cheng55265902016-10-19 12:46:50 -0700400 def schedule_synchronized_reboot(self, dut_list, afe, force_reboot=False):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700401 """Schedule a job to reboot the servo host.
402
403 When we schedule a job, it will create a ServoHost object which will
404 go through this entire flow of checking if a reboot is needed and
405 trying to schedule it. There is probably a better approach to setting
406 up a synchronized reboot but I'm coming up short on better ideas so I
407 apologize for this circus show.
408
Kevin Cheng55265902016-10-19 12:46:50 -0700409 @param dut_list: List of duts that need to be locked.
410 @param afe: Instance of afe.
411 @param force_reboot: Boolean to indicate if a forced reboot should be
412 scheduled or not.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700413 """
414 # If we've already scheduled job on a dut, we're done here.
415 if self._sync_job_scheduled_for_duts(dut_list, afe):
416 return
417
418 # Looks like we haven't scheduled a job yet.
Kevin Cheng55265902016-10-19 12:46:50 -0700419 test = (_SERVO_HOST_REBOOT_TEST_NAME if not force_reboot
420 else _SERVO_HOST_FORCE_REBOOT_TEST_NAME)
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700421 dut = self._choose_dut_for_synchronized_reboot(dut_list, afe)
422 getter = control_file_getter.FileSystemGetter([AUTOTEST_BASE])
Kevin Cheng55265902016-10-19 12:46:50 -0700423 control_file = getter.get_control_file_contents_by_name(test)
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700424 control_type = control_data.CONTROL_TYPE_NAMES.SERVER
Kevin Cheng79589982016-10-25 13:26:04 -0700425 try:
426 afe.create_job(control_file=control_file, name=test,
427 control_type=control_type, hosts=[dut])
428 except Exception as e:
429 # Sometimes creating the job will raise an exception. We'll log it
430 # but we don't want to fail because of it.
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700431 logging.exception('Scheduling reboot job failed due to Exception.')
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700432
433
434 def reboot(self, *args, **dargs):
435 """Reboot using special servo host reboot command."""
436 super(ServoHost, self).reboot(reboot_cmd=self.REBOOT_CMD,
437 *args, **dargs)
438
439
440 def _check_for_reboot(self, updater):
441 """Reboot this servo host if an upgrade is waiting.
Richard Barnette3a7697f2016-04-20 11:33:27 -0700442
443 If the host has successfully downloaded and finalized a new
444 build, reboot.
445
446 @param updater: a ChromiumOSUpdater instance for checking
447 whether reboot is needed.
448 @return Return a (status, build) tuple reflecting the
449 update_engine status and current build of the host
450 at the end of the call.
451 """
Richard Barnette9a26ad62016-06-10 12:03:08 -0700452 current_build_number = self._get_release_version()
Richard Barnette3a7697f2016-04-20 11:33:27 -0700453 status = updater.check_update_status()
454 if status == autoupdater.UPDATER_NEED_REBOOT:
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700455 # Check if we need to schedule an organized reboot.
Kevin Cheng79589982016-10-25 13:26:04 -0700456 afe = frontend_wrappers.RetryingAFE(
457 timeout_min=5, delay_sec=10,
458 server=server_site_utils.get_global_afe_hostname())
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700459 dut_list = self.get_attached_duts(afe)
460 logging.info('servo host has the following duts: %s', dut_list)
461 if len(dut_list) > 1:
462 logging.info('servo host has multiple duts, scheduling '
463 'synchronized reboot')
464 self.schedule_synchronized_reboot(dut_list, afe)
465 return status, current_build_number
466
467 logging.info('Rebooting servo host %s from build %s',
Richard Barnette3a7697f2016-04-20 11:33:27 -0700468 self.hostname, current_build_number)
469 # Tell the reboot() call not to wait for completion.
470 # Otherwise, the call will log reboot failure if servo does
471 # not come back. The logged reboot failure will lead to
472 # test job failure. If the test does not require servo, we
473 # don't want servo failure to fail the test with error:
474 # `Host did not return from reboot` in status.log.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700475 self.reboot(fastsync=True, wait=False)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700476
477 # We told the reboot() call not to wait, but we need to wait
478 # for the reboot before we continue. Alas. The code from
479 # here below is basically a copy of Host.wait_for_restart(),
480 # with the logging bits ripped out, so that they can't cause
481 # the failure logging problem described above.
482 #
483 # The black stain that this has left on my soul can never be
484 # erased.
485 old_boot_id = self.get_boot_id()
486 if not self.wait_down(timeout=self.WAIT_DOWN_REBOOT_TIMEOUT,
487 warning_timer=self.WAIT_DOWN_REBOOT_WARNING,
488 old_boot_id=old_boot_id):
489 raise error.AutoservHostError(
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700490 'servo host %s failed to shut down.' %
491 self.hostname)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700492 if self.wait_up(timeout=120):
Richard Barnette9a26ad62016-06-10 12:03:08 -0700493 current_build_number = self._get_release_version()
Richard Barnette3a7697f2016-04-20 11:33:27 -0700494 status = updater.check_update_status()
495 logging.info('servo host %s back from reboot, with build %s',
496 self.hostname, current_build_number)
497 else:
498 raise error.AutoservHostError(
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700499 'servo host %s failed to come back from reboot.' %
500 self.hostname)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700501 return status, current_build_number
502
503
Richard Barnette3a7697f2016-04-20 11:33:27 -0700504 def update_image(self, wait_for_update=False):
beeps5e8c45a2013-12-17 22:05:11 -0800505 """Update the image on the servo host, if needed.
506
J. Richard Barnette84895392015-04-30 12:31:01 -0700507 This method recognizes the following cases:
508 * If the Host is not running Chrome OS, do nothing.
509 * If a previously triggered update is now complete, reboot
510 to the new version.
511 * If the host is processing a previously triggered update,
512 do nothing.
513 * If the host is running a version of Chrome OS different
514 from the default for servo Hosts, trigger an update, but
515 don't wait for it to complete.
beeps5e8c45a2013-12-17 22:05:11 -0800516
Richard Barnette3a7697f2016-04-20 11:33:27 -0700517 @param wait_for_update If an update needs to be applied and
518 this is true, then don't return until the update is
519 downloaded and finalized, and the host rebooted.
beeps5e8c45a2013-12-17 22:05:11 -0800520 @raises dev_server.DevServerException: If all the devservers are down.
521 @raises site_utils.ParseBuildNameException: If the devserver returns
522 an invalid build name.
523 @raises autoupdater.ChromiumOSError: If something goes wrong in the
524 checking update engine client status or applying an update.
525 @raises AutoservRunError: If the update_engine_client isn't present on
526 the host, and the host is a cros_host.
J. Richard Barnette84895392015-04-30 12:31:01 -0700527
beeps5e8c45a2013-12-17 22:05:11 -0800528 """
Dan Shib795b5a2015-09-24 13:26:35 -0700529 # servod could be running in a Ubuntu workstation.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700530 if not self.is_cros_host():
beeps5e8c45a2013-12-17 22:05:11 -0800531 logging.info('Not attempting an update, either %s is not running '
532 'chromeos or we cannot find enough information about '
533 'the host.', self.hostname)
534 return
535
Dan Shib795b5a2015-09-24 13:26:35 -0700536 if lsbrelease_utils.is_moblab():
537 logging.info('Not attempting an update, %s is running moblab.',
538 self.hostname)
539 return
540
Richard Barnette383ef9c2016-12-13 11:56:49 -0800541 target_build = afe_utils.get_stable_cros_image_name(self.get_board())
J. Richard Barnette84895392015-04-30 12:31:01 -0700542 target_build_number = server_site_utils.ParseBuildName(
543 target_build)[3]
xixuanfa2d92a2016-12-09 09:45:27 -0800544 # For servo image staging, we want it as more widely distributed as
545 # possible, so that devservers' load can be evenly distributed. So use
546 # hostname instead of target_build as hash.
547 ds = dev_server.ImageServer.resolve(self.hostname,
548 hostname=self.hostname)
J. Richard Barnette84895392015-04-30 12:31:01 -0700549 url = ds.get_update_url(target_build)
beeps5e8c45a2013-12-17 22:05:11 -0800550
551 updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700552 status, current_build_number = self._check_for_reboot(updater)
553 update_pending = True
beeps5e8c45a2013-12-17 22:05:11 -0800554 if status in autoupdater.UPDATER_PROCESSING_UPDATE:
555 logging.info('servo host %s already processing an update, update '
556 'engine client status=%s', self.hostname, status)
Allen Li66aa2542017-06-26 15:26:27 -0700557 elif status == autoupdater.UPDATER_NEED_REBOOT:
558 return
J. Richard Barnette84895392015-04-30 12:31:01 -0700559 elif current_build_number != target_build_number:
beeps5e8c45a2013-12-17 22:05:11 -0800560 logging.info('Using devserver url: %s to trigger update on '
561 'servo host %s, from %s to %s', url, self.hostname,
J. Richard Barnette84895392015-04-30 12:31:01 -0700562 current_build_number, target_build_number)
beeps5e8c45a2013-12-17 22:05:11 -0800563 try:
J. Richard Barnette84895392015-04-30 12:31:01 -0700564 ds.stage_artifacts(target_build,
565 artifacts=['full_payload'])
566 except Exception as e:
567 logging.error('Staging artifacts failed: %s', str(e))
568 logging.error('Abandoning update for this cycle.')
beeps5e8c45a2013-12-17 22:05:11 -0800569 else:
J. Richard Barnette84895392015-04-30 12:31:01 -0700570 try:
Richard Barnette7e53aa02016-05-20 10:49:40 -0700571 # TODO(jrbarnette): This 'touch' is a gross hack
572 # to get us past crbug.com/613603. Once that
573 # bug is resolved, we should remove this code.
574 self.run('touch /home/chronos/.oobe_completed')
J. Richard Barnette84895392015-04-30 12:31:01 -0700575 updater.trigger_update()
576 except autoupdater.RootFSUpdateError as e:
577 trigger_download_status = 'failed with %s' % str(e)
Aviv Keshet11836322016-11-22 11:32:01 -0800578 metrics.Counter('chromeos/autotest/servo/'
579 'rootfs_update_failed').increment()
J. Richard Barnette84895392015-04-30 12:31:01 -0700580 else:
581 trigger_download_status = 'passed'
582 logging.info('Triggered download and update %s for %s, '
583 'update engine currently in status %s',
584 trigger_download_status, self.hostname,
585 updater.check_update_status())
beeps5e8c45a2013-12-17 22:05:11 -0800586 else:
587 logging.info('servo host %s does not require an update.',
588 self.hostname)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700589 update_pending = False
590
591 if update_pending and wait_for_update:
592 logging.info('Waiting for servo update to complete.')
593 self.run('update_engine_client --follow', ignore_status=True)
beeps5e8c45a2013-12-17 22:05:11 -0800594
595
Richard Barnette1edbb162016-11-01 11:47:50 -0700596 def verify(self, silent=False):
597 """Update the servo host and verify it's in a good state.
598
599 @param silent If true, suppress logging in `status.log`.
600 """
Richard Barnette79d78c42016-05-25 09:31:21 -0700601 # TODO(jrbarnette) Old versions of beaglebone_servo include
Richard Barnette9a26ad62016-06-10 12:03:08 -0700602 # the powerd package. If you touch the .oobe_completed file
603 # (as we do to work around an update_engine problem), then
604 # powerd will eventually shut down the beaglebone for lack
605 # of (apparent) activity. Current versions of
Richard Barnette79d78c42016-05-25 09:31:21 -0700606 # beaglebone_servo don't have powerd, but until we can purge
607 # the lab of the old images, we need to make sure powerd
608 # isn't running.
609 self.run('stop powerd', ignore_status=True)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700610 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700611 self._repair_strategy.verify(self, silent)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700612 except:
613 self.disconnect_servo()
614 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700615
616
Richard Barnette1edbb162016-11-01 11:47:50 -0700617 def repair(self, silent=False):
618 """Attempt to repair servo host.
619
620 @param silent If true, suppress logging in `status.log`.
621 """
Richard Barnette9a26ad62016-06-10 12:03:08 -0700622 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700623 self._repair_strategy.repair(self, silent)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700624 except:
625 self.disconnect_servo()
626 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700627
628
Fang Dengd4fe7392013-09-20 12:18:21 -0700629 def has_power(self):
630 """Return whether or not the servo host is powered by PoE."""
631 # TODO(fdeng): See crbug.com/302791
632 # For now, assume all servo hosts in the lab have power.
633 return self.is_in_lab()
634
635
636 def power_cycle(self):
637 """Cycle power to this host via PoE if it is a lab device.
638
Richard Barnette9a26ad62016-06-10 12:03:08 -0700639 @raises AutoservRepairError if it fails to power cycle the
Fang Dengd4fe7392013-09-20 12:18:21 -0700640 servo host.
641
642 """
643 if self.has_power():
644 try:
645 rpm_client.set_power(self.hostname, 'CYCLE')
646 except (socket.error, xmlrpclib.Error,
647 httplib.BadStatusLine,
648 rpm_client.RemotePowerException) as e:
Richard Barnette9a26ad62016-06-10 12:03:08 -0700649 raise hosts.AutoservRepairError(
Fang Dengd4fe7392013-09-20 12:18:21 -0700650 'Power cycling %s failed: %s' % (self.hostname, e))
651 else:
652 logging.info('Skipping power cycling, not a lab device.')
653
654
Dan Shi4d478522014-02-14 13:46:32 -0800655 def get_servo(self):
656 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700657
Dan Shi4d478522014-02-14 13:46:32 -0800658 @return: a servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700659 """
Dan Shi4d478522014-02-14 13:46:32 -0800660 return self._servo
661
662
Richard Barnetteea3e4602016-06-10 12:36:41 -0700663def make_servo_hostname(dut_hostname):
664 """Given a DUT's hostname, return the hostname of its servo.
665
666 @param dut_hostname: hostname of a DUT.
667
668 @return hostname of the DUT's servo.
669
670 """
671 host_parts = dut_hostname.split('.')
672 host_parts[0] = host_parts[0] + '-servo'
673 return '.'.join(host_parts)
674
675
676def servo_host_is_up(servo_hostname):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700677 """Given a servo host name, return if it's up or not.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700678
679 @param servo_hostname: hostname of the servo host.
680
681 @return True if it's up, False otherwise
682 """
683 # Technically, this duplicates the SSH ping done early in the servo
684 # proxy initialization code. However, this ping ends in a couple
685 # seconds when if fails, rather than the 60 seconds it takes to decide
686 # that an SSH ping has timed out. Specifically, that timeout happens
687 # when our servo DNS name resolves, but there is no host at that IP.
688 logging.info('Pinging servo host at %s', servo_hostname)
689 ping_config = ping_runner.PingConfig(
690 servo_hostname, count=3,
691 ignore_result=True, ignore_status=True)
692 return ping_runner.PingRunner().ping(ping_config).received > 0
693
694
Richard Barnettee519dcd2016-08-15 17:37:17 -0700695def _map_afe_board_to_servo_board(afe_board):
696 """Map a board we get from the AFE to a servo appropriate value.
697
698 Many boards are identical to other boards for servo's purposes.
699 This function makes that mapping.
700
701 @param afe_board string board name received from AFE.
702 @return board we expect servo to have.
703
704 """
705 KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
706 BOARD_MAP = {'gizmo': 'panther'}
707 mapped_board = afe_board
708 if afe_board in BOARD_MAP:
709 mapped_board = BOARD_MAP[afe_board]
710 else:
711 for suffix in KNOWN_SUFFIXES:
712 if afe_board.endswith(suffix):
713 mapped_board = afe_board[0:-len(suffix)]
714 break
715 if mapped_board != afe_board:
716 logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
717 return mapped_board
718
719
Richard Barnetteea3e4602016-06-10 12:36:41 -0700720def _get_standard_servo_args(dut_host):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700721 """Return servo data associated with a given DUT.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700722
723 This checks for the presence of servo host and port attached to the
724 given `dut_host`. This data should be stored in the
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700725 `_afe_host.attributes` field in the provided `dut_host` parameter.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700726
727 @param dut_host Instance of `Host` on which to find the servo
728 attributes.
729 @return A tuple of `servo_args` dict with host and an option port,
730 plus an `is_in_lab` flag indicating whether this in the CrOS
731 test lab, or some different environment.
732 """
733 servo_args = None
734 is_in_lab = False
735 is_ssp_moblab = False
736 if utils.is_in_container():
737 is_moblab = _CONFIG.get_config_value(
738 'SSP', 'is_moblab', type=bool, default=False)
739 is_ssp_moblab = is_moblab
740 else:
741 is_moblab = utils.is_moblab()
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700742 attrs = dut_host._afe_host.attributes
Richard Barnetteea3e4602016-06-10 12:36:41 -0700743 if attrs and SERVO_HOST_ATTR in attrs:
744 servo_host = attrs[SERVO_HOST_ATTR]
745 if (is_ssp_moblab and servo_host in ['localhost', '127.0.0.1']):
746 servo_host = _CONFIG.get_config_value(
747 'SSP', 'host_container_ip', type=str, default=None)
748 servo_args = {SERVO_HOST_ATTR: servo_host}
749 if SERVO_PORT_ATTR in attrs:
Kevin Cheng692e5292016-08-14 00:23:24 -0700750 try:
751 servo_port = attrs[SERVO_PORT_ATTR]
752 servo_args[SERVO_PORT_ATTR] = int(servo_port)
753 except ValueError:
754 logging.error('servo port is not an int: %s', servo_port)
755 # Let's set the servo args to None since we're not creating
756 # the ServoHost object with the proper port now.
757 servo_args = None
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700758 if SERVO_SERIAL_ATTR in attrs:
759 servo_args[SERVO_SERIAL_ATTR] = attrs[SERVO_SERIAL_ATTR]
Richard Barnetteea3e4602016-06-10 12:36:41 -0700760 is_in_lab = (not is_moblab
761 and utils.host_is_in_lab_zone(servo_host))
762
763 # TODO(jrbarnette): This test to use the default lab servo hostname
764 # is a legacy that we need only until every host in the DB has
765 # proper attributes.
766 elif (not is_moblab and
767 not dnsname_mangler.is_ip_address(dut_host.hostname)):
768 servo_host = make_servo_hostname(dut_host.hostname)
769 is_in_lab = utils.host_is_in_lab_zone(servo_host)
770 if is_in_lab:
771 servo_args = {SERVO_HOST_ATTR: servo_host}
Richard Barnette9a26ad62016-06-10 12:03:08 -0700772 if servo_args is not None:
Prathmesh Prabhua3bb7652017-02-09 11:42:13 -0800773 info = dut_host.host_info_store.get()
774 if info.board:
775 servo_args[SERVO_BOARD_ATTR] = _map_afe_board_to_servo_board(
776 info.board)
Richard Barnetteea3e4602016-06-10 12:36:41 -0700777 return servo_args, is_in_lab
778
779
Dan Shi023aae32016-05-25 11:13:01 -0700780def create_servo_host(dut, servo_args, try_lab_servo=False,
Richard Barnette9a26ad62016-06-10 12:03:08 -0700781 try_servo_repair=False):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700782 """Create a ServoHost object for a given DUT, if appropriate.
Dan Shi4d478522014-02-14 13:46:32 -0800783
Richard Barnette9a26ad62016-06-10 12:03:08 -0700784 This function attempts to create and verify or repair a `ServoHost`
785 object for a servo connected to the given `dut`, subject to various
786 constraints imposed by the parameters:
787 * When the `servo_args` parameter is not `None`, a servo
788 host must be created, and must be checked with `repair()`.
789 * Otherwise, if a servo exists in the lab and `try_lab_servo` is
790 true:
791 * If `try_servo_repair` is true, then create a servo host and
792 check it with `repair()`.
793 * Otherwise, if the servo responds to `ping` then create a
794 servo host and check it with `verify()`.
Fang Denge545abb2014-12-30 18:43:47 -0800795
Richard Barnette9a26ad62016-06-10 12:03:08 -0700796 In cases where `servo_args` was not `None`, repair failure
797 exceptions are passed back to the caller; otherwise, exceptions
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700798 are logged and then discarded. Note that this only happens in cases
799 where we're called from a test (not special task) control file that
800 has an explicit dependency on servo. In that case, we require that
801 repair not write to `status.log`, so as to avoid polluting test
802 results.
803
804 TODO(jrbarnette): The special handling for servo in test control
805 files is a thorn in my flesh; I dearly hope to see it cut out before
806 my retirement.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700807
808 Parameters for a servo host consist of a host name, port number, and
809 DUT board, and are determined from one of these sources, in order of
810 priority:
Richard Barnetteea3e4602016-06-10 12:36:41 -0700811 * Servo attributes from the `dut` parameter take precedence over
812 all other sources of information.
813 * If a DNS entry for the servo based on the DUT hostname exists in
814 the CrOS lab network, that hostname is used with the default
Richard Barnette9a26ad62016-06-10 12:03:08 -0700815 port and the DUT's board.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700816 * If no other options are found, the parameters will be taken
Richard Barnette9a26ad62016-06-10 12:03:08 -0700817 from the `servo_args` dict passed in from the caller.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700818
819 @param dut An instance of `Host` from which to take
820 servo parameters (if available).
821 @param servo_args A dictionary with servo parameters to use if
822 they can't be found from `dut`. If this
823 argument is supplied, unrepaired exceptions
824 from `verify()` will be passed back to the
825 caller.
826 @param try_lab_servo If not true, servo host creation will be
827 skipped unless otherwise required by the
828 caller.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700829 @param try_servo_repair If true, check a servo host with
830 `repair()` instead of `verify()`.
Dan Shi4d478522014-02-14 13:46:32 -0800831
832 @returns: A ServoHost object or None. See comments above.
833
834 """
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700835 servo_dependency = servo_args is not None
Richard Barnetteea3e4602016-06-10 12:36:41 -0700836 is_in_lab = False
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700837 if dut is not None and (try_lab_servo or servo_dependency):
Richard Barnetteea3e4602016-06-10 12:36:41 -0700838 servo_args_override, is_in_lab = _get_standard_servo_args(dut)
839 if servo_args_override is not None:
840 servo_args = servo_args_override
841 if servo_args is None:
842 return None
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700843 if (not servo_dependency and not try_servo_repair and
Richard Barnette9a26ad62016-06-10 12:03:08 -0700844 not servo_host_is_up(servo_args[SERVO_HOST_ATTR])):
Dan Shibbb0cb62014-03-24 17:50:57 -0700845 return None
Richard Barnette9a26ad62016-06-10 12:03:08 -0700846 newhost = ServoHost(is_in_lab=is_in_lab, **servo_args)
847 # Note that the logic of repair() includes everything done
848 # by verify(). It's sufficient to call one or the other;
849 # we don't need both.
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700850 if servo_dependency:
851 newhost.repair(silent=True)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700852 else:
853 try:
854 if try_servo_repair:
855 newhost.repair()
856 else:
857 newhost.verify()
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700858 except Exception:
Richard Barnette9a26ad62016-06-10 12:03:08 -0700859 operation = 'repair' if try_servo_repair else 'verification'
860 logging.exception('Servo %s failed for %s',
861 operation, newhost.hostname)
862 return newhost