blob: 5e5b4742215db4afb791209a4634da132506acf3 [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
12import httplib
13import logging
14import socket
15import time
16import xmlrpclib
17
18from autotest_lib.client.bin import utils
19from autotest_lib.client.common_lib import error
20from autotest_lib.client.common_lib.cros import retry
21from autotest_lib.server.cros.servo import servo
22from autotest_lib.server.hosts import ssh_host
23
24
25class ServoHostException(error.AutoservError):
26 """This is the base class for exceptions raised by ServoHost."""
27 pass
28
29
30class ServoHostVerifyFailure(ServoHostException):
31 """Raised when servo verification fails."""
32 pass
33
34
35class ServoHostRepairTotalFailure(ServoHostException):
36 """Raised if all attempts to repair a servo host fail."""
37 pass
38
39
40def make_servo_hostname(dut_hostname):
41 """Given a DUT's hostname, return the hostname of its servo.
42
43 @param dut_hostname: hostname of a DUT.
44
45 @return hostname of the DUT's servo.
46
47 """
48 host_parts = dut_hostname.split('.')
49 host_parts[0] = host_parts[0] + '-servo'
50 return '.'.join(host_parts)
51
52
53class ServoHost(ssh_host.SSHHost):
54 """Host class for a host that controls a servo, e.g. beaglebone."""
55
56 # Timeout for getting the value of 'pwr_button'.
57 PWR_BUTTON_CMD_TIMEOUT_SECS = 15
58 # Timeout for rebooting servo host.
59 REBOOT_TIMEOUT_SECS = 90
60 HOST_DOWN_TIMEOUT_SECS = 60
61 # Delay after rebooting for servod to become fully functional.
62 REBOOT_DELAY_SECS = 20
63 # Servod process name.
64 SERVOD_PROCESS = 'servod'
65
66
67 def _initialize(self, servo_host='localhost', servo_port=9999,
68 *args, **dargs):
69 """Initialize a ServoHost instance.
70
71 A ServoHost instance represents a host that controls a servo.
72
73 @param servo_host: Name of the host where the servod process
74 is running.
75 @param servo_port: Port the servod process is listening on.
76
77 """
78 super(ServoHost, self)._initialize(hostname=servo_host,
79 *args, **dargs)
80 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
81 self._is_localhost = (self.hostname == 'localhost')
82 remote = 'http://%s:%s' % (self.hostname, servo_port)
83 self._servod_server = xmlrpclib.ServerProxy(remote)
84 # Commands on the servo host must be run by the superuser. Our account
85 # on Beaglebone is root, but locally we might be running as a
86 # different user. If so - `sudo ' will have to be added to the
87 # commands.
88 if self._is_localhost:
89 self._sudo_required = utils.system_output('id -u') != '0'
90 else:
91 self._sudo_required = False
92
93
94 def is_in_lab(self):
95 """Check whether the servo host is a lab device.
96
97 @returns: True if the servo host is in Cros Lab, otherwise False.
98
99 """
100 return self._is_in_lab
101
102
103 def is_localhost(self):
104 """Checks whether the servo host points to localhost.
105
106 @returns: True if it points to localhost, otherwise False.
107
108 """
109 return self._is_localhost
110
111
112 def get_servod_server_proxy(self):
113 """Return a proxy that can be used to communicate with servod server.
114
115 @returns: An xmlrpclib.ServerProxy that is connected to the servod
116 server on the host.
117
118 """
119 return self._servod_server
120
121
122 def get_wait_up_processes(self):
123 """Get the list of local processes to wait for in wait_up.
124
125 Override get_wait_up_processes in
126 autotest_lib.client.common_lib.hosts.base_classes.Host.
127 Wait for servod process to go up. Called by base class when
128 rebooting the device.
129
130 """
131 processes = [self.SERVOD_PROCESS]
132 return processes
133
134
135 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
136 connect_timeout=None, alive_interval=None):
137 """Override default make_ssh_command to use tuned options.
138
139 Tuning changes:
140 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
141 connection failure. Consistency with remote_access.py.
142
143 - ServerAliveInterval=180; which causes SSH to ping connection every
144 180 seconds. In conjunction with ServerAliveCountMax ensures
145 that if the connection dies, Autotest will bail out quickly.
146
147 - ServerAliveCountMax=3; consistency with remote_access.py.
148
149 - ConnectAttempts=4; reduce flakiness in connection errors;
150 consistency with remote_access.py.
151
152 - UserKnownHostsFile=/dev/null; we don't care about the keys.
153
154 - SSH protocol forced to 2; needed for ServerAliveInterval.
155
156 @param user User name to use for the ssh connection.
157 @param port Port on the target host to use for ssh connection.
158 @param opts Additional options to the ssh command.
159 @param hosts_file Ignored.
160 @param connect_timeout Ignored.
161 @param alive_interval Ignored.
162
163 @returns: An ssh command with the requested settings.
164
165 """
166 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
167 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
168 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
169 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
170 ' -o Protocol=2 -l %s -p %d')
171 return base_command % (opts, user, port)
172
173
174 def _make_scp_cmd(self, sources, dest):
175 """Format scp command.
176
177 Given a list of source paths and a destination path, produces the
178 appropriate scp command for encoding it. Remote paths must be
179 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
180 to allow additional ssh options.
181
182 @param sources: A list of source paths to copy from.
183 @param dest: Destination path to copy to.
184
185 @returns: An scp command that copies |sources| on local machine to
186 |dest| on the remote servo host.
187
188 """
189 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
190 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
191 return command % (self.master_ssh_option,
192 self.port, ' '.join(sources), dest)
193
194
195 def run(self, command, timeout=3600, ignore_status=False,
196 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
197 connect_timeout=30, options='', stdin=None, verbose=True, args=()):
198 """Run a command on the servo host.
199
200 Extends method `run` in SSHHost. If the servo host is a remote device,
201 it will call `run` in SSHost without changing anything.
202 If the servo host is 'localhost', it will call utils.system_output.
203
204 @param command: The command line string.
205 @param timeout: Time limit in seconds before attempting to
206 kill the running process. The run() function
207 will take a few seconds longer than 'timeout'
208 to complete if it has to kill the process.
209 @param ignore_status: Do not raise an exception, no matter
210 what the exit code of the command is.
211 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
212 @param connect_timeout: SSH connection timeout (in seconds)
213 Ignored if host is 'localhost'.
214 @param options: String with additional ssh command options
215 Ignored if host is 'localhost'.
216 @param stdin: Stdin to pass (a string) to the executed command.
217 @param verbose: Log the commands.
218 @param args: Sequence of strings to pass as arguments to command by
219 quoting them in " and escaping their contents if necessary.
220
221 @returns: A utils.CmdResult object.
222
223 @raises AutoservRunError if the command failed.
224 @raises AutoservSSHTimeout SSH connection has timed out. Only applies
225 when servo host is not 'localhost'.
226
227 """
228 run_args = {'command': command, 'timeout': timeout,
229 'ignore_status': ignore_status, 'stdout_tee': stdout_tee,
230 'stderr_tee': stderr_tee, 'stdin': stdin,
231 'verbose': verbose, 'args': args}
232 if self.is_localhost():
233 if self._sudo_required:
234 run_args['command'] = 'sudo -n %s' % command
235 try:
236 return utils.run(**run_args)
237 except error.CmdError as e:
238 logging.error(e)
239 raise error.AutoservRunError('command execution error',
240 e.result_obj)
241 else:
242 run_args['connect_timeout'] = connect_timeout
243 run_args['options'] = options
244 return super(ServoHost, self).run(**run_args)
245
246
247 def _check_servod(self):
248 """A sanity check of the servod state."""
249 msg_prefix = 'Servod error: %s'
250 error_msg = None
251 try:
252 timeout, _ = retry.timeout(
253 self._servod_server.get, args=('pwr_button', ),
254 timeout_sec=self.PWR_BUTTON_CMD_TIMEOUT_SECS)
255 if timeout:
256 error_msg = msg_prefix % 'Request timed out.'
257 except (socket.error, xmlrpclib.Error, httplib.BadStatusLine) as e:
258 error_msg = msg_prefix % e
259 if error_msg:
260 raise ServoHostVerifyFailure(error_msg)
261
262
263 def _check_servo_host_usb(self):
264 """A sanity check of the USB device.
265
266 Sometimes the usb gets wedged due to a kernel bug on the beaglebone.
267 A symptom is the presence of /dev/sda without /dev/sda1. The check
268 here ensures that if /dev/sda exists, /dev/sda1 must also exist.
269 See crbug.com/225932.
270
271 @raises ServoHostVerifyFailure if /dev/sda exists without /dev/sda1 on
272 the beaglebone.
273
274 """
275 try:
276 # The following test exits with a non-zero code
277 # and raises AutoserverRunError if error is detected.
278 self.run('test ! -b /dev/sda -o -b /dev/sda1')
279 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
280 raise ServoHostVerifyFailure(
281 'USB sanity check on %s failed: %s' % (self.hostname, e))
282
283
284 def verify_software(self):
285 """Verify that the servo is in a good state.
286
287 It overrides the base class function for verify_software.
288 It checks:
289 1) Whether basic servo command can run successfully.
290 2) Whether USB is in a good state. crbug.com/225932
291
292 @raises ServoHostVerifyFailure if servo host does not pass the checks.
293
294 """
295 logging.info('Verifying servo host %s with sanity checks.',
296 self.hostname)
297 self._check_servod()
298 self._check_servo_host_usb()
299 logging.info('Sanity checks pass on servo host %s', self.hostname)
300
301
302 def _repair_with_sysrq_reboot(self):
303 """Reboot with magic SysRq key."""
304 self.reboot(timeout=self.REBOOT_TIMEOUT_SECS,
305 down_timeout=self.HOST_DOWN_TIMEOUT_SECS,
306 reboot_cmd='echo "b" > /proc/sysrq-trigger &',
307 fastsync=True)
308 time.sleep(self.REBOOT_DELAY_SECS)
309
310
311 def _powercycle_to_repair(self):
312 """Power cycle the servo host using POE."""
313 logging.info('powercycle_to_repair has not been implemented yet.')
314 # TODO(fdeng): implement this method. crbug.com/278602
315
316
317 def repair_full(self):
318 """Attempt to repair servo host.
319
320 This overrides the base class function for repair.
321 Note if the host is not in Cros Lab, the repair procedure
322 will be skipped.
323
324 @raises ServoHostRepairTotalFailure if all attempts fail.
325
326 """
327 if not self.is_in_lab():
328 logging.warn('Skip repairing servo host %s: Not a lab device.',
329 self.hostname)
330 return
331 logging.info('Attempting to repair servo host %s.', self.hostname)
332 repair_funcs = [self._repair_with_sysrq_reboot]
333 errors = []
334 for repair_func in repair_funcs:
335 try:
336 repair_func()
337 self.verify()
338 return
339 except Exception as e:
340 logging.warn('Failed to repair servo: %s', e)
341 errors.append(str(e))
342 raise ServoHostRepairTotalFailure(
343 'All attempts at repairing the servo failed:\n%s' %
344 '\n'.join(errors))
345
346
347 def create_healthy_servo_object(self):
348 """Create a servo.Servo object.
349
350 Create a servo.Servo object. If the servo host is in Cros Lab,
351 this method will first verify the servo host and attempt to repair it if
352 error is detected.
353
354 @raises ServoHostRepairTotalFailure if it fails to fix the servo host.
355 @raises AutoservSshPermissionDeniedError if the DUT is not ssh-able
356 due to permission error.
357
358 """
359 if self.is_in_lab():
360 try:
361 self.verify()
362 except (error.AutoservSSHTimeout,
363 error.AutoservSshPingHostError,
364 error.AutoservHostIsShuttingDownError,
365 ServoHostVerifyFailure):
366 self.repair_full()
367 except error.AutoservSshPermissionDeniedError:
368 raise
369 return servo.Servo(servo_host=self)