blob: c488551ba8be81088a7857718c016fe63c9b8293 [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
12import httplib
13import logging
14import socket
Kevin Cheng79589982016-10-25 13:26:04 -070015import traceback
Fang Deng5d518f42013-08-02 14:04:32 -070016import xmlrpclib
17
18from autotest_lib.client.bin import utils
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070019from autotest_lib.client.common_lib import control_data
Fang Deng5d518f42013-08-02 14:04:32 -070020from autotest_lib.client.common_lib import error
beeps5e8c45a2013-12-17 22:05:11 -080021from autotest_lib.client.common_lib import global_config
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070022from autotest_lib.client.common_lib import host_states
Richard Barnette9a26ad62016-06-10 12:03:08 -070023from autotest_lib.client.common_lib import hosts
Dan Shi0942b1d2015-03-31 11:07:00 -070024from autotest_lib.client.common_lib import lsbrelease_utils
beeps5e8c45a2013-12-17 22:05:11 -080025from autotest_lib.client.common_lib.cros import autoupdater
26from autotest_lib.client.common_lib.cros import dev_server
Fang Deng5d518f42013-08-02 14:04:32 -070027from autotest_lib.client.common_lib.cros import retry
Kevin Cheng79589982016-10-25 13:26:04 -070028from autotest_lib.client.common_lib.cros.graphite import autotest_es
Christopher Wileycef1f902014-06-19 11:11:23 -070029from autotest_lib.client.common_lib.cros.network import ping_runner
Hsinyu Chaoe0b08e62015-08-11 10:50:37 +000030from autotest_lib.client.cros import constants as client_constants
Richard Barnettee519dcd2016-08-15 17:37:17 -070031from autotest_lib.server import afe_utils
beeps5e8c45a2013-12-17 22:05:11 -080032from autotest_lib.server import site_utils as server_site_utils
Cheng-Yi Chiang22612862015-08-20 20:39:57 +080033from autotest_lib.server.cros import dnsname_mangler
Simran Basi0739d682015-02-25 16:22:56 -080034from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070035from autotest_lib.server.cros.dynamic_suite import control_file_getter
Richard Barnette9a26ad62016-06-10 12:03:08 -070036from autotest_lib.server.cros.servo import servo
37from autotest_lib.server.hosts import servo_repair
Fang Deng5d518f42013-08-02 14:04:32 -070038from autotest_lib.server.hosts import ssh_host
Fang Dengd4fe7392013-09-20 12:18:21 -070039from autotest_lib.site_utils.rpm_control_system import rpm_client
Fang Deng5d518f42013-08-02 14:04:32 -070040
Dan Shi5e2efb72017-02-07 11:40:23 -080041try:
42 from chromite.lib import metrics
43except ImportError:
44 metrics = utils.metrics_mock
45
Fang Deng5d518f42013-08-02 14:04:32 -070046
Simran Basi0739d682015-02-25 16:22:56 -080047# Names of the host attributes in the database that represent the values for
48# the servo_host and servo_port for a servo connected to the DUT.
49SERVO_HOST_ATTR = 'servo_host'
50SERVO_PORT_ATTR = 'servo_port'
Richard Barnettee519dcd2016-08-15 17:37:17 -070051SERVO_BOARD_ATTR = 'servo_board'
Kevin Cheng643ce8a2016-09-15 15:42:12 -070052SERVO_SERIAL_ATTR = 'servo_serial'
Simran Basi0739d682015-02-25 16:22:56 -080053
Dan Shi3b2adf62015-09-02 17:46:54 -070054_CONFIG = global_config.global_config
xixuan6cf6d2f2016-01-29 15:29:00 -080055ENABLE_SSH_TUNNEL_FOR_SERVO = _CONFIG.get_config_value(
56 'CROS', 'enable_ssh_tunnel_for_servo', type=bool, default=False)
Simran Basi0739d682015-02-25 16:22:56 -080057
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070058AUTOTEST_BASE = _CONFIG.get_config_value(
59 'SCHEDULER', 'drone_installation_directory',
60 default='/usr/local/autotest')
61
62_SERVO_HOST_REBOOT_TEST_NAME = 'servohost_Reboot'
Kevin Cheng55265902016-10-19 12:46:50 -070063_SERVO_HOST_FORCE_REBOOT_TEST_NAME = 'servohost_Reboot.force_reboot'
Fang Deng5d518f42013-08-02 14:04:32 -070064
Fang Deng5d518f42013-08-02 14:04:32 -070065class ServoHost(ssh_host.SSHHost):
66 """Host class for a host that controls a servo, e.g. beaglebone."""
67
Richard Barnette9a26ad62016-06-10 12:03:08 -070068 DEFAULT_PORT = 9999
69
Dan Shie5b3c512014-08-21 12:12:09 -070070 # Timeout for initializing servo signals.
71 INITIALIZE_SERVO_TIMEOUT_SECS = 30
Richard Barnette9a26ad62016-06-10 12:03:08 -070072
xixuan6cf6d2f2016-01-29 15:29:00 -080073 # Ready test function
74 SERVO_READY_METHOD = 'get_version'
Fang Deng5d518f42013-08-02 14:04:32 -070075
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070076 REBOOT_CMD = 'sleep 1; reboot & sleep 10; reboot -f'
77
Fang Deng5d518f42013-08-02 14:04:32 -070078
Richard Barnette17bfc6c2016-08-04 18:41:43 -070079 def _initialize(self, servo_host='localhost',
Richard Barnettee519dcd2016-08-15 17:37:17 -070080 servo_port=DEFAULT_PORT, servo_board=None,
Kevin Cheng643ce8a2016-09-15 15:42:12 -070081 servo_serial=None, is_in_lab=None, *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -070082 """Initialize a ServoHost instance.
83
84 A ServoHost instance represents a host that controls a servo.
85
86 @param servo_host: Name of the host where the servod process
87 is running.
88 @param servo_port: Port the servod process is listening on.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070089 @param servo_board: Board that the servo is connected to.
Dan Shi4d478522014-02-14 13:46:32 -080090 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
91 to None, for which utils.host_is_in_lab_zone will be
92 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -070093
94 """
95 super(ServoHost, self)._initialize(hostname=servo_host,
96 *args, **dargs)
Richard Barnettee519dcd2016-08-15 17:37:17 -070097 self.servo_port = servo_port
98 self.servo_board = servo_board
Kevin Cheng643ce8a2016-09-15 15:42:12 -070099 self.servo_serial = servo_serial
Richard Barnettee519dcd2016-08-15 17:37:17 -0700100 self._servo = None
Richard Barnette9a26ad62016-06-10 12:03:08 -0700101 self._repair_strategy = (
102 servo_repair.create_servo_repair_strategy())
Richard Barnettee519dcd2016-08-15 17:37:17 -0700103 self._is_localhost = (self.hostname == 'localhost')
104 if self._is_localhost:
105 self._is_in_lab = False
106 elif is_in_lab is None:
Dan Shi4d478522014-02-14 13:46:32 -0800107 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
108 else:
109 self._is_in_lab = is_in_lab
xixuan6cf6d2f2016-01-29 15:29:00 -0800110
Richard Barnettee519dcd2016-08-15 17:37:17 -0700111 # Commands on the servo host must be run by the superuser.
112 # Our account on a remote host is root, but if our target is
113 # localhost then we might be running unprivileged. If so,
114 # `sudo` will have to be added to the commands.
Fang Deng5d518f42013-08-02 14:04:32 -0700115 if self._is_localhost:
116 self._sudo_required = utils.system_output('id -u') != '0'
117 else:
118 self._sudo_required = False
Richard Barnettee519dcd2016-08-15 17:37:17 -0700119
Richard Barnette9a26ad62016-06-10 12:03:08 -0700120
121 def connect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700122 """Establish a connection to the servod server on this host.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700123
124 Initializes `self._servo` and then verifies that all network
125 connections are working. This will create an ssh tunnel if
126 it's required.
127
128 As a side effect of testing the connection, all signals on the
129 target servo are reset to default values, and the USB stick is
130 set to the neutral (off) position.
131 """
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700132 servo_obj = servo.Servo(servo_host=self, servo_serial=self.servo_serial)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700133 timeout, _ = retry.timeout(
134 servo_obj.initialize_dut,
135 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
136 if timeout:
137 raise hosts.AutoservVerifyError(
138 'Servo initialize timed out.')
139 self._servo = servo_obj
140
141
142 def disconnect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700143 """Disconnect our servo if it exists.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700144
145 If we've previously successfully connected to our servo,
146 disconnect any established ssh tunnel, and set `self._servo`
147 back to `None`.
148 """
149 if self._servo:
150 # N.B. This call is safe even without a tunnel:
151 # rpc_server_tracker.disconnect() silently ignores
152 # unknown ports.
153 self.rpc_server_tracker.disconnect(self.servo_port)
154 self._servo = None
Fang Deng5d518f42013-08-02 14:04:32 -0700155
156
157 def is_in_lab(self):
158 """Check whether the servo host is a lab device.
159
160 @returns: True if the servo host is in Cros Lab, otherwise False.
161
162 """
163 return self._is_in_lab
164
165
166 def is_localhost(self):
167 """Checks whether the servo host points to localhost.
168
169 @returns: True if it points to localhost, otherwise False.
170
171 """
172 return self._is_localhost
173
174
175 def get_servod_server_proxy(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700176 """Return a proxy that can be used to communicate with servod server.
Fang Deng5d518f42013-08-02 14:04:32 -0700177
178 @returns: An xmlrpclib.ServerProxy that is connected to the servod
179 server on the host.
Fang Deng5d518f42013-08-02 14:04:32 -0700180 """
Richard Barnette9a26ad62016-06-10 12:03:08 -0700181 if ENABLE_SSH_TUNNEL_FOR_SERVO and not self.is_localhost():
182 return self.rpc_server_tracker.xmlrpc_connect(
183 None, self.servo_port,
184 ready_test_name=self.SERVO_READY_METHOD,
185 timeout_seconds=60)
186 else:
187 remote = 'http://%s:%s' % (self.hostname, self.servo_port)
188 return xmlrpclib.ServerProxy(remote)
Fang Deng5d518f42013-08-02 14:04:32 -0700189
190
Richard Barnette9a26ad62016-06-10 12:03:08 -0700191 def is_cros_host(self):
beeps5e8c45a2013-12-17 22:05:11 -0800192 """Check if a servo host is running chromeos.
193
194 @return: True if the servo host is running chromeos.
195 False if it isn't, or we don't have enough information.
196 """
197 try:
198 result = self.run('grep -q CHROMEOS /etc/lsb-release',
199 ignore_status=True, timeout=10)
200 except (error.AutoservRunError, error.AutoservSSHTimeout):
201 return False
202 return result.exit_status == 0
203
204
Fang Deng5d518f42013-08-02 14:04:32 -0700205 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
206 connect_timeout=None, alive_interval=None):
207 """Override default make_ssh_command to use tuned options.
208
209 Tuning changes:
210 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
211 connection failure. Consistency with remote_access.py.
212
213 - ServerAliveInterval=180; which causes SSH to ping connection every
214 180 seconds. In conjunction with ServerAliveCountMax ensures
215 that if the connection dies, Autotest will bail out quickly.
216
217 - ServerAliveCountMax=3; consistency with remote_access.py.
218
219 - ConnectAttempts=4; reduce flakiness in connection errors;
220 consistency with remote_access.py.
221
222 - UserKnownHostsFile=/dev/null; we don't care about the keys.
223
224 - SSH protocol forced to 2; needed for ServerAliveInterval.
225
226 @param user User name to use for the ssh connection.
227 @param port Port on the target host to use for ssh connection.
228 @param opts Additional options to the ssh command.
229 @param hosts_file Ignored.
230 @param connect_timeout Ignored.
231 @param alive_interval Ignored.
232
233 @returns: An ssh command with the requested settings.
234
235 """
236 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
237 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
238 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
239 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
240 ' -o Protocol=2 -l %s -p %d')
241 return base_command % (opts, user, port)
242
243
244 def _make_scp_cmd(self, sources, dest):
245 """Format scp command.
246
247 Given a list of source paths and a destination path, produces the
248 appropriate scp command for encoding it. Remote paths must be
249 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
250 to allow additional ssh options.
251
252 @param sources: A list of source paths to copy from.
253 @param dest: Destination path to copy to.
254
255 @returns: An scp command that copies |sources| on local machine to
256 |dest| on the remote servo host.
257
258 """
259 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
260 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
261 return command % (self.master_ssh_option,
262 self.port, ' '.join(sources), dest)
263
264
265 def run(self, command, timeout=3600, ignore_status=False,
266 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
Luigi Semenzatobfbd1f32017-01-06 10:41:18 -0800267 connect_timeout=30, ssh_failure_retry_ok=False,
268 options='', stdin=None, verbose=True, args=()):
Fang Deng5d518f42013-08-02 14:04:32 -0700269 """Run a command on the servo host.
270
271 Extends method `run` in SSHHost. If the servo host is a remote device,
272 it will call `run` in SSHost without changing anything.
273 If the servo host is 'localhost', it will call utils.system_output.
274
275 @param command: The command line string.
276 @param timeout: Time limit in seconds before attempting to
277 kill the running process. The run() function
278 will take a few seconds longer than 'timeout'
279 to complete if it has to kill the process.
280 @param ignore_status: Do not raise an exception, no matter
281 what the exit code of the command is.
282 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
283 @param connect_timeout: SSH connection timeout (in seconds)
284 Ignored if host is 'localhost'.
285 @param options: String with additional ssh command options
286 Ignored if host is 'localhost'.
Luigi Semenzatobfbd1f32017-01-06 10:41:18 -0800287 @param ssh_failure_retry_ok: when True and ssh connection failure is
288 suspected, OK to retry command (but not
289 compulsory, and likely not needed here)
Fang Deng5d518f42013-08-02 14:04:32 -0700290 @param stdin: Stdin to pass (a string) to the executed command.
291 @param verbose: Log the commands.
292 @param args: Sequence of strings to pass as arguments to command by
293 quoting them in " and escaping their contents if necessary.
294
295 @returns: A utils.CmdResult object.
296
297 @raises AutoservRunError if the command failed.
298 @raises AutoservSSHTimeout SSH connection has timed out. Only applies
299 when servo host is not 'localhost'.
300
301 """
302 run_args = {'command': command, 'timeout': timeout,
303 'ignore_status': ignore_status, 'stdout_tee': stdout_tee,
304 'stderr_tee': stderr_tee, 'stdin': stdin,
305 'verbose': verbose, 'args': args}
306 if self.is_localhost():
307 if self._sudo_required:
Michael Tangf9b3ada2016-11-18 16:01:05 -0800308 run_args['command'] = 'sudo -n sh -c "%s"' % utils.sh_escape(
309 command)
Fang Deng5d518f42013-08-02 14:04:32 -0700310 try:
311 return utils.run(**run_args)
312 except error.CmdError as e:
313 logging.error(e)
314 raise error.AutoservRunError('command execution error',
315 e.result_obj)
316 else:
317 run_args['connect_timeout'] = connect_timeout
318 run_args['options'] = options
319 return super(ServoHost, self).run(**run_args)
320
321
Richard Barnette9a26ad62016-06-10 12:03:08 -0700322 def _get_release_version(self):
Dan Shi0942b1d2015-03-31 11:07:00 -0700323 """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release.
324
325 @returns The version string in lsb-release, under attribute
326 CHROMEOS_RELEASE_VERSION.
327 """
328 lsb_release_content = self.run(
329 'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip()
330 return lsbrelease_utils.get_chromeos_release_version(
331 lsb_release_content=lsb_release_content)
332
333
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700334 def get_attached_duts(self, afe):
335 """Gather a list of duts that use this servo host.
336
337 @param afe: afe instance.
338
339 @returns list of duts.
Richard Barnette3a7697f2016-04-20 11:33:27 -0700340 """
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700341 return afe.get_hosts_by_attribute(
342 attribute=SERVO_HOST_ATTR, value=self.hostname)
343
344
345 def get_board(self):
346 """Determine the board for this servo host.
347
348 @returns a string representing this servo host's board.
349 """
350 return lsbrelease_utils.get_current_board(
351 lsb_release_content=self.run('cat /etc/lsb-release').stdout)
352
353
354 def _choose_dut_for_synchronized_reboot(self, dut_list, afe):
355 """Choose which dut to schedule servo host reboot job.
356
357 We'll want a semi-deterministic way of selecting which host should be
358 scheduled for the servo host reboot job. For now we'll sort the
359 list with the expectation the dut list will stay consistent.
360 From there we'll grab the first dut that is available so we
361 don't schedule a job on a dut that will never run.
362
363 @param dut_list: List of the dut hostnames to choose from.
364 @param afe: Instance of the AFE.
365
366 @return hostname of dut to schedule job on.
367 """
368 afe_hosts = afe.get_hosts(dut_list)
369 afe_hosts.sort()
370 for afe_host in afe_hosts:
371 if afe_host.status not in host_states.UNAVAILABLE_STATES:
372 return afe_host.hostname
373 # If they're all unavailable, just return the first sorted dut.
374 dut_list.sort()
375 return dut_list[0]
376
377
378 def _sync_job_scheduled_for_duts(self, dut_list, afe):
379 """Checks if a synchronized reboot has been scheduled for these duts.
380
381 Grab all the host queue entries that aren't completed for the duts and
382 see if any of them have the expected job name.
383
384 @param dut_list: List of duts to check on.
385 @param afe: Instance of the AFE.
386
387 @returns True if the job is scheduled, False otherwise.
388 """
389 afe_hosts = afe.get_hosts(dut_list)
390 for afe_host in afe_hosts:
391 hqes = afe.get_host_queue_entries(host=afe_host.id, complete=0)
392 for hqe in hqes:
393 job = afe.get_jobs(id=hqe.job.id)
Kevin Cheng55265902016-10-19 12:46:50 -0700394 if job and job[0].name in (_SERVO_HOST_REBOOT_TEST_NAME,
395 _SERVO_HOST_FORCE_REBOOT_TEST_NAME):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700396 return True
397 return False
398
399
Kevin Cheng55265902016-10-19 12:46:50 -0700400 def schedule_synchronized_reboot(self, dut_list, afe, force_reboot=False):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700401 """Schedule a job to reboot the servo host.
402
403 When we schedule a job, it will create a ServoHost object which will
404 go through this entire flow of checking if a reboot is needed and
405 trying to schedule it. There is probably a better approach to setting
406 up a synchronized reboot but I'm coming up short on better ideas so I
407 apologize for this circus show.
408
Kevin Cheng55265902016-10-19 12:46:50 -0700409 @param dut_list: List of duts that need to be locked.
410 @param afe: Instance of afe.
411 @param force_reboot: Boolean to indicate if a forced reboot should be
412 scheduled or not.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700413 """
414 # If we've already scheduled job on a dut, we're done here.
415 if self._sync_job_scheduled_for_duts(dut_list, afe):
416 return
417
418 # Looks like we haven't scheduled a job yet.
Kevin Cheng55265902016-10-19 12:46:50 -0700419 test = (_SERVO_HOST_REBOOT_TEST_NAME if not force_reboot
420 else _SERVO_HOST_FORCE_REBOOT_TEST_NAME)
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700421 dut = self._choose_dut_for_synchronized_reboot(dut_list, afe)
422 getter = control_file_getter.FileSystemGetter([AUTOTEST_BASE])
Kevin Cheng55265902016-10-19 12:46:50 -0700423 control_file = getter.get_control_file_contents_by_name(test)
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700424 control_type = control_data.CONTROL_TYPE_NAMES.SERVER
Kevin Cheng79589982016-10-25 13:26:04 -0700425 try:
426 afe.create_job(control_file=control_file, name=test,
427 control_type=control_type, hosts=[dut])
428 except Exception as e:
429 # Sometimes creating the job will raise an exception. We'll log it
430 # but we don't want to fail because of it.
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700431 logging.exception('Scheduling reboot job failed due to Exception.')
Kevin Cheng79589982016-10-25 13:26:04 -0700432 metadata = {'dut': dut,
433 'servo_host': self.hostname,
434 'error': str(e),
435 'details': traceback.format_exc()}
436 # We want to track how often we fail here so we can justify
437 # investing some effort into hardening up afe.create_job().
438 autotest_es.post(use_http=True,
439 type_str='servohost_Reboot_schedule_fail',
440 metadata=metadata)
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700441
442
443 def reboot(self, *args, **dargs):
444 """Reboot using special servo host reboot command."""
445 super(ServoHost, self).reboot(reboot_cmd=self.REBOOT_CMD,
446 *args, **dargs)
447
448
449 def _check_for_reboot(self, updater):
450 """Reboot this servo host if an upgrade is waiting.
Richard Barnette3a7697f2016-04-20 11:33:27 -0700451
452 If the host has successfully downloaded and finalized a new
453 build, reboot.
454
455 @param updater: a ChromiumOSUpdater instance for checking
456 whether reboot is needed.
457 @return Return a (status, build) tuple reflecting the
458 update_engine status and current build of the host
459 at the end of the call.
460 """
Richard Barnette9a26ad62016-06-10 12:03:08 -0700461 current_build_number = self._get_release_version()
Richard Barnette3a7697f2016-04-20 11:33:27 -0700462 status = updater.check_update_status()
463 if status == autoupdater.UPDATER_NEED_REBOOT:
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700464 # Check if we need to schedule an organized reboot.
Kevin Cheng79589982016-10-25 13:26:04 -0700465 afe = frontend_wrappers.RetryingAFE(
466 timeout_min=5, delay_sec=10,
467 server=server_site_utils.get_global_afe_hostname())
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700468 dut_list = self.get_attached_duts(afe)
469 logging.info('servo host has the following duts: %s', dut_list)
470 if len(dut_list) > 1:
471 logging.info('servo host has multiple duts, scheduling '
472 'synchronized reboot')
473 self.schedule_synchronized_reboot(dut_list, afe)
474 return status, current_build_number
475
476 logging.info('Rebooting servo host %s from build %s',
Richard Barnette3a7697f2016-04-20 11:33:27 -0700477 self.hostname, current_build_number)
478 # Tell the reboot() call not to wait for completion.
479 # Otherwise, the call will log reboot failure if servo does
480 # not come back. The logged reboot failure will lead to
481 # test job failure. If the test does not require servo, we
482 # don't want servo failure to fail the test with error:
483 # `Host did not return from reboot` in status.log.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700484 self.reboot(fastsync=True, wait=False)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700485
486 # We told the reboot() call not to wait, but we need to wait
487 # for the reboot before we continue. Alas. The code from
488 # here below is basically a copy of Host.wait_for_restart(),
489 # with the logging bits ripped out, so that they can't cause
490 # the failure logging problem described above.
491 #
492 # The black stain that this has left on my soul can never be
493 # erased.
494 old_boot_id = self.get_boot_id()
495 if not self.wait_down(timeout=self.WAIT_DOWN_REBOOT_TIMEOUT,
496 warning_timer=self.WAIT_DOWN_REBOOT_WARNING,
497 old_boot_id=old_boot_id):
498 raise error.AutoservHostError(
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700499 'servo host %s failed to shut down.' %
500 self.hostname)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700501 if self.wait_up(timeout=120):
Richard Barnette9a26ad62016-06-10 12:03:08 -0700502 current_build_number = self._get_release_version()
Richard Barnette3a7697f2016-04-20 11:33:27 -0700503 status = updater.check_update_status()
504 logging.info('servo host %s back from reboot, with build %s',
505 self.hostname, current_build_number)
506 else:
507 raise error.AutoservHostError(
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700508 'servo host %s failed to come back from reboot.' %
509 self.hostname)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700510 return status, current_build_number
511
512
Richard Barnette3a7697f2016-04-20 11:33:27 -0700513 def update_image(self, wait_for_update=False):
beeps5e8c45a2013-12-17 22:05:11 -0800514 """Update the image on the servo host, if needed.
515
J. Richard Barnette84895392015-04-30 12:31:01 -0700516 This method recognizes the following cases:
517 * If the Host is not running Chrome OS, do nothing.
518 * If a previously triggered update is now complete, reboot
519 to the new version.
520 * If the host is processing a previously triggered update,
521 do nothing.
522 * If the host is running a version of Chrome OS different
523 from the default for servo Hosts, trigger an update, but
524 don't wait for it to complete.
beeps5e8c45a2013-12-17 22:05:11 -0800525
Richard Barnette3a7697f2016-04-20 11:33:27 -0700526 @param wait_for_update If an update needs to be applied and
527 this is true, then don't return until the update is
528 downloaded and finalized, and the host rebooted.
beeps5e8c45a2013-12-17 22:05:11 -0800529 @raises dev_server.DevServerException: If all the devservers are down.
530 @raises site_utils.ParseBuildNameException: If the devserver returns
531 an invalid build name.
532 @raises autoupdater.ChromiumOSError: If something goes wrong in the
533 checking update engine client status or applying an update.
534 @raises AutoservRunError: If the update_engine_client isn't present on
535 the host, and the host is a cros_host.
J. Richard Barnette84895392015-04-30 12:31:01 -0700536
beeps5e8c45a2013-12-17 22:05:11 -0800537 """
Dan Shib795b5a2015-09-24 13:26:35 -0700538 # servod could be running in a Ubuntu workstation.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700539 if not self.is_cros_host():
beeps5e8c45a2013-12-17 22:05:11 -0800540 logging.info('Not attempting an update, either %s is not running '
541 'chromeos or we cannot find enough information about '
542 'the host.', self.hostname)
543 return
544
Dan Shib795b5a2015-09-24 13:26:35 -0700545 if lsbrelease_utils.is_moblab():
546 logging.info('Not attempting an update, %s is running moblab.',
547 self.hostname)
548 return
549
Richard Barnette383ef9c2016-12-13 11:56:49 -0800550 target_build = afe_utils.get_stable_cros_image_name(self.get_board())
J. Richard Barnette84895392015-04-30 12:31:01 -0700551 target_build_number = server_site_utils.ParseBuildName(
552 target_build)[3]
xixuanfa2d92a2016-12-09 09:45:27 -0800553 # For servo image staging, we want it as more widely distributed as
554 # possible, so that devservers' load can be evenly distributed. So use
555 # hostname instead of target_build as hash.
556 ds = dev_server.ImageServer.resolve(self.hostname,
557 hostname=self.hostname)
J. Richard Barnette84895392015-04-30 12:31:01 -0700558 url = ds.get_update_url(target_build)
beeps5e8c45a2013-12-17 22:05:11 -0800559
560 updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700561 status, current_build_number = self._check_for_reboot(updater)
562 update_pending = True
beeps5e8c45a2013-12-17 22:05:11 -0800563 if status in autoupdater.UPDATER_PROCESSING_UPDATE:
564 logging.info('servo host %s already processing an update, update '
565 'engine client status=%s', self.hostname, status)
Allen Li66aa2542017-06-26 15:26:27 -0700566 elif status == autoupdater.UPDATER_NEED_REBOOT:
567 return
J. Richard Barnette84895392015-04-30 12:31:01 -0700568 elif current_build_number != target_build_number:
beeps5e8c45a2013-12-17 22:05:11 -0800569 logging.info('Using devserver url: %s to trigger update on '
570 'servo host %s, from %s to %s', url, self.hostname,
J. Richard Barnette84895392015-04-30 12:31:01 -0700571 current_build_number, target_build_number)
beeps5e8c45a2013-12-17 22:05:11 -0800572 try:
J. Richard Barnette84895392015-04-30 12:31:01 -0700573 ds.stage_artifacts(target_build,
574 artifacts=['full_payload'])
575 except Exception as e:
576 logging.error('Staging artifacts failed: %s', str(e))
577 logging.error('Abandoning update for this cycle.')
beeps5e8c45a2013-12-17 22:05:11 -0800578 else:
J. Richard Barnette84895392015-04-30 12:31:01 -0700579 try:
Richard Barnette7e53aa02016-05-20 10:49:40 -0700580 # TODO(jrbarnette): This 'touch' is a gross hack
581 # to get us past crbug.com/613603. Once that
582 # bug is resolved, we should remove this code.
583 self.run('touch /home/chronos/.oobe_completed')
J. Richard Barnette84895392015-04-30 12:31:01 -0700584 updater.trigger_update()
585 except autoupdater.RootFSUpdateError as e:
586 trigger_download_status = 'failed with %s' % str(e)
Aviv Keshet11836322016-11-22 11:32:01 -0800587 metrics.Counter('chromeos/autotest/servo/'
588 'rootfs_update_failed').increment()
J. Richard Barnette84895392015-04-30 12:31:01 -0700589 else:
590 trigger_download_status = 'passed'
591 logging.info('Triggered download and update %s for %s, '
592 'update engine currently in status %s',
593 trigger_download_status, self.hostname,
594 updater.check_update_status())
beeps5e8c45a2013-12-17 22:05:11 -0800595 else:
596 logging.info('servo host %s does not require an update.',
597 self.hostname)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700598 update_pending = False
599
600 if update_pending and wait_for_update:
601 logging.info('Waiting for servo update to complete.')
602 self.run('update_engine_client --follow', ignore_status=True)
beeps5e8c45a2013-12-17 22:05:11 -0800603
604
Richard Barnette1edbb162016-11-01 11:47:50 -0700605 def verify(self, silent=False):
606 """Update the servo host and verify it's in a good state.
607
608 @param silent If true, suppress logging in `status.log`.
609 """
Richard Barnette79d78c42016-05-25 09:31:21 -0700610 # TODO(jrbarnette) Old versions of beaglebone_servo include
Richard Barnette9a26ad62016-06-10 12:03:08 -0700611 # the powerd package. If you touch the .oobe_completed file
612 # (as we do to work around an update_engine problem), then
613 # powerd will eventually shut down the beaglebone for lack
614 # of (apparent) activity. Current versions of
Richard Barnette79d78c42016-05-25 09:31:21 -0700615 # beaglebone_servo don't have powerd, but until we can purge
616 # the lab of the old images, we need to make sure powerd
617 # isn't running.
618 self.run('stop powerd', ignore_status=True)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700619 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700620 self._repair_strategy.verify(self, silent)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700621 except:
622 self.disconnect_servo()
623 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700624
625
Richard Barnette1edbb162016-11-01 11:47:50 -0700626 def repair(self, silent=False):
627 """Attempt to repair servo host.
628
629 @param silent If true, suppress logging in `status.log`.
630 """
Richard Barnette9a26ad62016-06-10 12:03:08 -0700631 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700632 self._repair_strategy.repair(self, silent)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700633 except:
634 self.disconnect_servo()
635 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700636
637
Fang Dengd4fe7392013-09-20 12:18:21 -0700638 def has_power(self):
639 """Return whether or not the servo host is powered by PoE."""
640 # TODO(fdeng): See crbug.com/302791
641 # For now, assume all servo hosts in the lab have power.
642 return self.is_in_lab()
643
644
645 def power_cycle(self):
646 """Cycle power to this host via PoE if it is a lab device.
647
Richard Barnette9a26ad62016-06-10 12:03:08 -0700648 @raises AutoservRepairError if it fails to power cycle the
Fang Dengd4fe7392013-09-20 12:18:21 -0700649 servo host.
650
651 """
652 if self.has_power():
653 try:
654 rpm_client.set_power(self.hostname, 'CYCLE')
655 except (socket.error, xmlrpclib.Error,
656 httplib.BadStatusLine,
657 rpm_client.RemotePowerException) as e:
Richard Barnette9a26ad62016-06-10 12:03:08 -0700658 raise hosts.AutoservRepairError(
Fang Dengd4fe7392013-09-20 12:18:21 -0700659 'Power cycling %s failed: %s' % (self.hostname, e))
660 else:
661 logging.info('Skipping power cycling, not a lab device.')
662
663
Dan Shi4d478522014-02-14 13:46:32 -0800664 def get_servo(self):
665 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700666
Dan Shi4d478522014-02-14 13:46:32 -0800667 @return: a servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700668 """
Dan Shi4d478522014-02-14 13:46:32 -0800669 return self._servo
670
671
Richard Barnetteea3e4602016-06-10 12:36:41 -0700672def make_servo_hostname(dut_hostname):
673 """Given a DUT's hostname, return the hostname of its servo.
674
675 @param dut_hostname: hostname of a DUT.
676
677 @return hostname of the DUT's servo.
678
679 """
680 host_parts = dut_hostname.split('.')
681 host_parts[0] = host_parts[0] + '-servo'
682 return '.'.join(host_parts)
683
684
685def servo_host_is_up(servo_hostname):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700686 """Given a servo host name, return if it's up or not.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700687
688 @param servo_hostname: hostname of the servo host.
689
690 @return True if it's up, False otherwise
691 """
692 # Technically, this duplicates the SSH ping done early in the servo
693 # proxy initialization code. However, this ping ends in a couple
694 # seconds when if fails, rather than the 60 seconds it takes to decide
695 # that an SSH ping has timed out. Specifically, that timeout happens
696 # when our servo DNS name resolves, but there is no host at that IP.
697 logging.info('Pinging servo host at %s', servo_hostname)
698 ping_config = ping_runner.PingConfig(
699 servo_hostname, count=3,
700 ignore_result=True, ignore_status=True)
701 return ping_runner.PingRunner().ping(ping_config).received > 0
702
703
Richard Barnettee519dcd2016-08-15 17:37:17 -0700704def _map_afe_board_to_servo_board(afe_board):
705 """Map a board we get from the AFE to a servo appropriate value.
706
707 Many boards are identical to other boards for servo's purposes.
708 This function makes that mapping.
709
710 @param afe_board string board name received from AFE.
711 @return board we expect servo to have.
712
713 """
714 KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
715 BOARD_MAP = {'gizmo': 'panther'}
716 mapped_board = afe_board
717 if afe_board in BOARD_MAP:
718 mapped_board = BOARD_MAP[afe_board]
719 else:
720 for suffix in KNOWN_SUFFIXES:
721 if afe_board.endswith(suffix):
722 mapped_board = afe_board[0:-len(suffix)]
723 break
724 if mapped_board != afe_board:
725 logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
726 return mapped_board
727
728
Richard Barnetteea3e4602016-06-10 12:36:41 -0700729def _get_standard_servo_args(dut_host):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700730 """Return servo data associated with a given DUT.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700731
732 This checks for the presence of servo host and port attached to the
733 given `dut_host`. This data should be stored in the
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700734 `_afe_host.attributes` field in the provided `dut_host` parameter.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700735
736 @param dut_host Instance of `Host` on which to find the servo
737 attributes.
738 @return A tuple of `servo_args` dict with host and an option port,
739 plus an `is_in_lab` flag indicating whether this in the CrOS
740 test lab, or some different environment.
741 """
742 servo_args = None
743 is_in_lab = False
744 is_ssp_moblab = False
745 if utils.is_in_container():
746 is_moblab = _CONFIG.get_config_value(
747 'SSP', 'is_moblab', type=bool, default=False)
748 is_ssp_moblab = is_moblab
749 else:
750 is_moblab = utils.is_moblab()
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700751 attrs = dut_host._afe_host.attributes
Richard Barnetteea3e4602016-06-10 12:36:41 -0700752 if attrs and SERVO_HOST_ATTR in attrs:
753 servo_host = attrs[SERVO_HOST_ATTR]
754 if (is_ssp_moblab and servo_host in ['localhost', '127.0.0.1']):
755 servo_host = _CONFIG.get_config_value(
756 'SSP', 'host_container_ip', type=str, default=None)
757 servo_args = {SERVO_HOST_ATTR: servo_host}
758 if SERVO_PORT_ATTR in attrs:
Kevin Cheng692e5292016-08-14 00:23:24 -0700759 try:
760 servo_port = attrs[SERVO_PORT_ATTR]
761 servo_args[SERVO_PORT_ATTR] = int(servo_port)
762 except ValueError:
763 logging.error('servo port is not an int: %s', servo_port)
764 # Let's set the servo args to None since we're not creating
765 # the ServoHost object with the proper port now.
766 servo_args = None
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700767 if SERVO_SERIAL_ATTR in attrs:
768 servo_args[SERVO_SERIAL_ATTR] = attrs[SERVO_SERIAL_ATTR]
Richard Barnetteea3e4602016-06-10 12:36:41 -0700769 is_in_lab = (not is_moblab
770 and utils.host_is_in_lab_zone(servo_host))
771
772 # TODO(jrbarnette): This test to use the default lab servo hostname
773 # is a legacy that we need only until every host in the DB has
774 # proper attributes.
775 elif (not is_moblab and
776 not dnsname_mangler.is_ip_address(dut_host.hostname)):
777 servo_host = make_servo_hostname(dut_host.hostname)
778 is_in_lab = utils.host_is_in_lab_zone(servo_host)
779 if is_in_lab:
780 servo_args = {SERVO_HOST_ATTR: servo_host}
Richard Barnette9a26ad62016-06-10 12:03:08 -0700781 if servo_args is not None:
Prathmesh Prabhua3bb7652017-02-09 11:42:13 -0800782 info = dut_host.host_info_store.get()
783 if info.board:
784 servo_args[SERVO_BOARD_ATTR] = _map_afe_board_to_servo_board(
785 info.board)
Richard Barnetteea3e4602016-06-10 12:36:41 -0700786 return servo_args, is_in_lab
787
788
Dan Shi023aae32016-05-25 11:13:01 -0700789def create_servo_host(dut, servo_args, try_lab_servo=False,
Richard Barnette9a26ad62016-06-10 12:03:08 -0700790 try_servo_repair=False):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700791 """Create a ServoHost object for a given DUT, if appropriate.
Dan Shi4d478522014-02-14 13:46:32 -0800792
Richard Barnette9a26ad62016-06-10 12:03:08 -0700793 This function attempts to create and verify or repair a `ServoHost`
794 object for a servo connected to the given `dut`, subject to various
795 constraints imposed by the parameters:
796 * When the `servo_args` parameter is not `None`, a servo
797 host must be created, and must be checked with `repair()`.
798 * Otherwise, if a servo exists in the lab and `try_lab_servo` is
799 true:
800 * If `try_servo_repair` is true, then create a servo host and
801 check it with `repair()`.
802 * Otherwise, if the servo responds to `ping` then create a
803 servo host and check it with `verify()`.
Fang Denge545abb2014-12-30 18:43:47 -0800804
Richard Barnette9a26ad62016-06-10 12:03:08 -0700805 In cases where `servo_args` was not `None`, repair failure
806 exceptions are passed back to the caller; otherwise, exceptions
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700807 are logged and then discarded. Note that this only happens in cases
808 where we're called from a test (not special task) control file that
809 has an explicit dependency on servo. In that case, we require that
810 repair not write to `status.log`, so as to avoid polluting test
811 results.
812
813 TODO(jrbarnette): The special handling for servo in test control
814 files is a thorn in my flesh; I dearly hope to see it cut out before
815 my retirement.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700816
817 Parameters for a servo host consist of a host name, port number, and
818 DUT board, and are determined from one of these sources, in order of
819 priority:
Richard Barnetteea3e4602016-06-10 12:36:41 -0700820 * Servo attributes from the `dut` parameter take precedence over
821 all other sources of information.
822 * If a DNS entry for the servo based on the DUT hostname exists in
823 the CrOS lab network, that hostname is used with the default
Richard Barnette9a26ad62016-06-10 12:03:08 -0700824 port and the DUT's board.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700825 * If no other options are found, the parameters will be taken
Richard Barnette9a26ad62016-06-10 12:03:08 -0700826 from the `servo_args` dict passed in from the caller.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700827
828 @param dut An instance of `Host` from which to take
829 servo parameters (if available).
830 @param servo_args A dictionary with servo parameters to use if
831 they can't be found from `dut`. If this
832 argument is supplied, unrepaired exceptions
833 from `verify()` will be passed back to the
834 caller.
835 @param try_lab_servo If not true, servo host creation will be
836 skipped unless otherwise required by the
837 caller.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700838 @param try_servo_repair If true, check a servo host with
839 `repair()` instead of `verify()`.
Dan Shi4d478522014-02-14 13:46:32 -0800840
841 @returns: A ServoHost object or None. See comments above.
842
843 """
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700844 servo_dependency = servo_args is not None
Richard Barnetteea3e4602016-06-10 12:36:41 -0700845 is_in_lab = False
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700846 if dut is not None and (try_lab_servo or servo_dependency):
Richard Barnetteea3e4602016-06-10 12:36:41 -0700847 servo_args_override, is_in_lab = _get_standard_servo_args(dut)
848 if servo_args_override is not None:
849 servo_args = servo_args_override
850 if servo_args is None:
851 return None
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700852 if (not servo_dependency and not try_servo_repair and
Richard Barnette9a26ad62016-06-10 12:03:08 -0700853 not servo_host_is_up(servo_args[SERVO_HOST_ATTR])):
Dan Shibbb0cb62014-03-24 17:50:57 -0700854 return None
Richard Barnette9a26ad62016-06-10 12:03:08 -0700855 newhost = ServoHost(is_in_lab=is_in_lab, **servo_args)
856 # Note that the logic of repair() includes everything done
857 # by verify(). It's sufficient to call one or the other;
858 # we don't need both.
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700859 if servo_dependency:
860 newhost.repair(silent=True)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700861 else:
862 try:
863 if try_servo_repair:
864 newhost.repair()
865 else:
866 newhost.verify()
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700867 except Exception:
Richard Barnette9a26ad62016-06-10 12:03:08 -0700868 operation = 'repair' if try_servo_repair else 'verification'
869 logging.exception('Servo %s failed for %s',
870 operation, newhost.hostname)
871 return newhost