blob: 8fe59fe9f34e4005ac2e574b6dfcb21d894d7362 [file] [log] [blame]
Garry Wangebc015b2019-06-06 17:45:06 -07001# Copyright (c) 2019 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This is a base host class for servohost and labstation."""
10
11
12import httplib
13import logging
14import socket
15import xmlrpclib
16
17from autotest_lib.client.bin import utils
Garry Wang358aad42020-08-02 20:56:04 -070018from autotest_lib.client.common_lib import enum
Garry Wangebc015b2019-06-06 17:45:06 -070019from autotest_lib.client.common_lib import error
20from autotest_lib.client.common_lib import hosts
21from autotest_lib.client.common_lib import lsbrelease_utils
22from autotest_lib.client.common_lib.cros import dev_server
Garry Wang358aad42020-08-02 20:56:04 -070023from autotest_lib.client.common_lib.cros import kernel_utils
Garry Wangebc015b2019-06-06 17:45:06 -070024from autotest_lib.client.cros import constants as client_constants
Garry Wang358aad42020-08-02 20:56:04 -070025from autotest_lib.server import autotest
Garry Wangebc015b2019-06-06 17:45:06 -070026from autotest_lib.server import site_utils as server_utils
27from autotest_lib.server.cros import autoupdater
28from autotest_lib.server.hosts import ssh_host
29from autotest_lib.site_utils.rpm_control_system import rpm_client
30
Garry Wangebc015b2019-06-06 17:45:06 -070031
32class BaseServoHost(ssh_host.SSHHost):
33 """Base host class for a host that manage servo(s).
34 E.g. beaglebone, labstation.
35 """
Garry Wang3d84a162020-01-24 13:29:43 +000036 REBOOT_CMD = 'sleep 5; reboot & sleep 10; reboot -f'
Garry Wangebc015b2019-06-06 17:45:06 -070037
Garry Wang79e9af62019-06-12 15:19:19 -070038 TEMP_FILE_DIR = '/var/lib/servod/'
39
40 LOCK_FILE_POSTFIX = '_in_use'
41 REBOOT_FILE_POSTFIX = '_reboot'
Garry Wangebc015b2019-06-06 17:45:06 -070042
Garry Wang5715ee52019-12-23 11:00:47 -080043 # Time to wait a rebooting servohost, in seconds.
Garry Wangfb253432019-09-11 17:08:38 -070044 REBOOT_TIMEOUT = 240
Garry Wangebc015b2019-06-06 17:45:06 -070045
Garry Wang5715ee52019-12-23 11:00:47 -080046 # Timeout value to power cycle a servohost, in seconds.
47 BOOT_TIMEOUT = 240
48
Garry Wang358aad42020-08-02 20:56:04 -070049 # Constants that reflect current host update state.
50 UPDATE_STATE = enum.Enum('IDLE', 'RUNNING', 'PENDING_REBOOT')
Garry Wangebc015b2019-06-06 17:45:06 -070051
52 def _initialize(self, hostname, is_in_lab=None, *args, **dargs):
53 """Construct a BaseServoHost object.
54
55 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
56 to None, for which utils.host_is_in_lab_zone will be
57 called to check if the servo host is in Cros lab.
58
59 """
60 super(BaseServoHost, self)._initialize(hostname=hostname,
61 *args, **dargs)
62 self._is_localhost = (self.hostname == 'localhost')
63 if self._is_localhost:
64 self._is_in_lab = False
65 elif is_in_lab is None:
66 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
67 else:
68 self._is_in_lab = is_in_lab
69
70 # Commands on the servo host must be run by the superuser.
71 # Our account on a remote host is root, but if our target is
72 # localhost then we might be running unprivileged. If so,
73 # `sudo` will have to be added to the commands.
74 if self._is_localhost:
75 self._sudo_required = utils.system_output('id -u') != '0'
76 else:
77 self._sudo_required = False
78
79 self._is_labstation = None
Gregory Nisbet8e2fbb22019-12-05 11:36:37 -080080 self._dut_host_info = None
Otabek Kasimov2b50cdb2020-07-06 19:16:06 -070081 self._dut_hostname = None
Garry Wangebc015b2019-06-06 17:45:06 -070082
83
84 def get_board(self):
85 """Determine the board for this servo host. E.g. fizz-labstation
86
Garry Wang5e118c02019-09-25 14:24:57 -070087 @returns a string representing this labstation's board or None if
88 target host is not using a ChromeOS image(e.g. test in chroot).
Garry Wangebc015b2019-06-06 17:45:06 -070089 """
Garry Wang5e118c02019-09-25 14:24:57 -070090 output = self.run('cat /etc/lsb-release', ignore_status=True).stdout
91 return lsbrelease_utils.get_current_board(lsb_release_content=output)
Garry Wangebc015b2019-06-06 17:45:06 -070092
93
Garry Wangd7367482020-02-27 13:52:40 -080094 def set_dut_host_info(self, dut_host_info):
95 """
96 @param dut_host_info: A HostInfo object.
97 """
98 logging.info('setting dut_host_info field to (%s)', dut_host_info)
99 self._dut_host_info = dut_host_info
100
101
102 def get_dut_host_info(self):
103 """
104 @return A HostInfo object.
105 """
106 return self._dut_host_info
Gregory Nisbet8e2fbb22019-12-05 11:36:37 -0800107
108
Otabek Kasimov2b50cdb2020-07-06 19:16:06 -0700109 def set_dut_hostname(self, dut_hostname):
110 """
111 @param dut_hostname: hostname of the DUT that connected to this servo.
112 """
113 logging.info('setting dut_hostname as (%s)', dut_hostname)
114 self._dut_hostname = dut_hostname
115
116
117 def get_dut_hostname(self):
118 """
119 @returns hostname of the DUT that connected to this servo.
120 """
121 return self._dut_hostname
122
123
Garry Wangebc015b2019-06-06 17:45:06 -0700124 def is_labstation(self):
125 """Determine if the host is a labstation
126
127 @returns True if ths host is a labstation otherwise False.
128 """
129 if self._is_labstation is None:
130 board = self.get_board()
Garry Wang88dc8632019-07-24 16:53:50 -0700131 self._is_labstation = board is not None and 'labstation' in board
Garry Wangebc015b2019-06-06 17:45:06 -0700132
133 return self._is_labstation
134
135
Garry Wang14831832020-03-04 17:21:49 -0800136 def _get_lsb_release_content(self):
137 """Return the content of lsb-release file of host."""
138 return self.run(
139 'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip()
140
141
142 def get_release_version(self):
Garry Wangebc015b2019-06-06 17:45:06 -0700143 """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release.
144
145 @returns The version string in lsb-release, under attribute
Garry Wang14831832020-03-04 17:21:49 -0800146 CHROMEOS_RELEASE_VERSION(e.g. 12900.0.0). None on fail.
Garry Wangebc015b2019-06-06 17:45:06 -0700147 """
Garry Wangebc015b2019-06-06 17:45:06 -0700148 return lsbrelease_utils.get_chromeos_release_version(
Garry Wang14831832020-03-04 17:21:49 -0800149 lsb_release_content=self._get_lsb_release_content()
150 )
151
152
153 def get_full_release_path(self):
154 """Get full release path from servohost as string.
155
156 @returns full release path as a string
157 (e.g. fizz-labstation-release/R82.12900.0.0). None on fail.
158 """
159 return lsbrelease_utils.get_chromeos_release_builder_path(
160 lsb_release_content=self._get_lsb_release_content()
161 )
Garry Wangebc015b2019-06-06 17:45:06 -0700162
163
164 def _check_update_status(self):
Garry Wang358aad42020-08-02 20:56:04 -0700165 """ Check servohost's current update state.
166
167 @returns: one of below state of from self.UPDATE_STATE
168 IDLE -- if the target host is not currently updating and not
169 pending on a reboot.
170 RUNNING -- if there is another updating process that running on
171 target host(note: we don't expect to hit this scenario).
172 PENDING_REBOOT -- if the target host had an update and pending
173 on reboot.
174 """
175 result = self.run('pgrep -f quick-provision | grep -v $$',
176 ignore_status=True)
177 # We don't expect any output unless there are another quick
178 # provision process is running.
179 if result.exit_status == 0:
180 return self.UPDATE_STATE.RUNNING
181
182 # Determine if we have an update that pending on reboot by check if
183 # the current inactive kernel has priority for the next boot.
184 try:
185 inactive_kernel = kernel_utils.get_kernel_state(self)[1]
186 next_kernel = kernel_utils.get_next_kernel(self)
187 if inactive_kernel == next_kernel:
188 return self.UPDATE_STATE.PENDING_REBOOT
189 except Exception as e:
190 logging.error('Unexpected error while checking kernel info; %s', e)
191 return self.UPDATE_STATE.IDLE
Garry Wangebc015b2019-06-06 17:45:06 -0700192
193
194 def is_in_lab(self):
195 """Check whether the servo host is a lab device.
196
197 @returns: True if the servo host is in Cros Lab, otherwise False.
198
199 """
200 return self._is_in_lab
201
202
203 def is_localhost(self):
204 """Checks whether the servo host points to localhost.
205
206 @returns: True if it points to localhost, otherwise False.
207
208 """
209 return self._is_localhost
210
211
212 def is_cros_host(self):
213 """Check if a servo host is running chromeos.
214
215 @return: True if the servo host is running chromeos.
216 False if it isn't, or we don't have enough information.
217 """
218 try:
219 result = self.run('grep -q CHROMEOS /etc/lsb-release',
220 ignore_status=True, timeout=10)
221 except (error.AutoservRunError, error.AutoservSSHTimeout):
222 return False
223 return result.exit_status == 0
224
225
Garry Wang358aad42020-08-02 20:56:04 -0700226 def prepare_for_update(self):
227 """Prepares the DUT for an update.
228 Subclasses may override this to perform any special actions
229 required before updating.
230 """
231 pass
232
233
Garry Wangebc015b2019-06-06 17:45:06 -0700234 def reboot(self, *args, **dargs):
235 """Reboot using special servo host reboot command."""
236 super(BaseServoHost, self).reboot(reboot_cmd=self.REBOOT_CMD,
237 *args, **dargs)
238
239
Garry Wang358aad42020-08-02 20:56:04 -0700240 def update_image(self, stable_version=None):
Garry Wangebc015b2019-06-06 17:45:06 -0700241 """Update the image on the servo host, if needed.
242
243 This method recognizes the following cases:
244 * If the Host is not running Chrome OS, do nothing.
245 * If a previously triggered update is now complete, reboot
246 to the new version.
Garry Wang358aad42020-08-02 20:56:04 -0700247 * If the host is processing an update do nothing.
248 * If the host has an update that pending on reboot, do nothing.
Garry Wangebc015b2019-06-06 17:45:06 -0700249 * If the host is running a version of Chrome OS different
Garry Wang358aad42020-08-02 20:56:04 -0700250 from the default for servo Hosts, start an update.
Garry Wangebc015b2019-06-06 17:45:06 -0700251
Garry Wang14831832020-03-04 17:21:49 -0800252 @stable_version the target build number.(e.g. R82-12900.0.0)
253
Garry Wangebc015b2019-06-06 17:45:06 -0700254 @raises dev_server.DevServerException: If all the devservers are down.
255 @raises site_utils.ParseBuildNameException: If the devserver returns
256 an invalid build name.
Garry Wangebc015b2019-06-06 17:45:06 -0700257 """
258 # servod could be running in a Ubuntu workstation.
259 if not self.is_cros_host():
260 logging.info('Not attempting an update, either %s is not running '
261 'chromeos or we cannot find enough information about '
262 'the host.', self.hostname)
263 return
264
265 if lsbrelease_utils.is_moblab():
266 logging.info('Not attempting an update, %s is running moblab.',
267 self.hostname)
268 return
269
Garry Wang14831832020-03-04 17:21:49 -0800270 if not stable_version:
271 logging.debug("BaseServoHost::update_image attempting to get"
272 " servo cros stable version")
273 try:
274 stable_version = (self.get_dut_host_info().
275 servo_cros_stable_version)
276 except AttributeError:
277 logging.error("BaseServoHost::update_image failed to get"
278 " servo cros stable version.")
Gregory Nisbet8e2fbb22019-12-05 11:36:37 -0800279
Garry Wang14831832020-03-04 17:21:49 -0800280 target_build = "%s-release/%s" % (self.get_board(), stable_version)
Garry Wangebc015b2019-06-06 17:45:06 -0700281 target_build_number = server_utils.ParseBuildName(
282 target_build)[3]
Garry Wang14831832020-03-04 17:21:49 -0800283 current_build_number = self.get_release_version()
Garry Wangebc015b2019-06-06 17:45:06 -0700284
285 if current_build_number == target_build_number:
286 logging.info('servo host %s does not require an update.',
287 self.hostname)
288 return
289
290 status = self._check_update_status()
Garry Wang358aad42020-08-02 20:56:04 -0700291 if status == self.UPDATE_STATE.RUNNING:
292 logging.info('servo host %s already processing an update',
293 self.hostname)
294 return
295 if status == self.UPDATE_STATE.PENDING_REBOOT:
Garry Wangebc015b2019-06-06 17:45:06 -0700296 # Labstation reboot is handled separately here as it require
Garry Wang358aad42020-08-02 20:56:04 -0700297 # synchronized reboot among all managed DUTs. For servo_v3, we'll
298 # reboot when initialize Servohost, if there is a update pending.
299 logging.info('An update has been completed and pending reboot.')
300 return
Garry Wangebc015b2019-06-06 17:45:06 -0700301
Garry Wang358aad42020-08-02 20:56:04 -0700302 ds = dev_server.ImageServer.resolve(self.hostname,
303 hostname=self.hostname)
304 url = ds.get_update_url(target_build)
305 updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self,
306 use_quick_provision=True,
307 is_servohost=True)
308 logging.info('Using devserver url: %s to trigger update on '
309 'servo host %s, from %s to %s', url, self.hostname,
310 current_build_number, target_build_number)
311 updater.run_update()
Garry Wangebc015b2019-06-06 17:45:06 -0700312
313
314 def has_power(self):
315 """Return whether or not the servo host is powered by PoE or RPM."""
316 # TODO(fdeng): See crbug.com/302791
317 # For now, assume all servo hosts in the lab have power.
318 return self.is_in_lab()
319
320
Garry Wang358aad42020-08-02 20:56:04 -0700321 def _post_update_reboot(self):
322 """ Reboot servohost after an quick provision.
323
324 We need to do some specifal cleanup before and after reboot
325 when there is an update pending.
326 """
327 # Regarding the 'crossystem' command below: In some cases,
328 # the update flow puts the TPM into a state such that it
329 # fails verification. We don't know why. However, this
330 # call papers over the problem by clearing the TPM during
331 # the reboot.
332 #
333 # We ignore failures from 'crossystem'. Although failure
334 # here is unexpected, and could signal a bug, the point of
335 # the exercise is to paper over problems; allowing this to
336 # fail would defeat the purpose.
337 self.run('crossystem clear_tpm_owner_request=1', ignore_status=True)
338 self._servo_host_reboot()
339 logging.debug('Cleaning up autotest directories if exist.')
340 try:
341 installed_autodir = autotest.Autotest.get_installed_autodir(self)
342 self.run('rm -rf ' + installed_autodir)
343 except autotest.AutodirNotFoundError:
344 logging.debug('No autotest installed directory found.')
345
346
Garry Wangebc015b2019-06-06 17:45:06 -0700347 def power_cycle(self):
348 """Cycle power to this host via PoE(servo v3) or RPM(labstation)
349 if it is a lab device.
350
351 @raises AutoservRepairError if it fails to power cycle the
352 servo host.
353
354 """
355 if self.has_power():
356 try:
357 rpm_client.set_power(self, 'CYCLE')
358 except (socket.error, xmlrpclib.Error,
359 httplib.BadStatusLine,
360 rpm_client.RemotePowerException) as e:
361 raise hosts.AutoservRepairError(
362 'Power cycling %s failed: %s' % (self.hostname, e),
363 'power_cycle_via_rpm_failed'
364 )
365 else:
366 logging.info('Skipping power cycling, not a lab device.')
367
368
369 def _servo_host_reboot(self):
370 """Reboot this servo host because a reboot is requested."""
371 logging.info('Rebooting servo host %s from build %s', self.hostname,
Garry Wang14831832020-03-04 17:21:49 -0800372 self.get_release_version())
Garry Wangebc015b2019-06-06 17:45:06 -0700373 # Tell the reboot() call not to wait for completion.
374 # Otherwise, the call will log reboot failure if servo does
375 # not come back. The logged reboot failure will lead to
376 # test job failure. If the test does not require servo, we
377 # don't want servo failure to fail the test with error:
378 # `Host did not return from reboot` in status.log.
379 self.reboot(fastsync=True, wait=False)
380
381 # We told the reboot() call not to wait, but we need to wait
382 # for the reboot before we continue. Alas. The code from
383 # here below is basically a copy of Host.wait_for_restart(),
384 # with the logging bits ripped out, so that they can't cause
385 # the failure logging problem described above.
386 #
387 # The black stain that this has left on my soul can never be
388 # erased.
389 old_boot_id = self.get_boot_id()
390 if not self.wait_down(timeout=self.WAIT_DOWN_REBOOT_TIMEOUT,
391 warning_timer=self.WAIT_DOWN_REBOOT_WARNING,
392 old_boot_id=old_boot_id):
393 raise error.AutoservHostError(
394 'servo host %s failed to shut down.' %
395 self.hostname)
Garry Wang79e9af62019-06-12 15:19:19 -0700396 if self.wait_up(timeout=self.REBOOT_TIMEOUT):
Garry Wangebc015b2019-06-06 17:45:06 -0700397 logging.info('servo host %s back from reboot, with build %s',
Garry Wang14831832020-03-04 17:21:49 -0800398 self.hostname, self.get_release_version())
Garry Wangebc015b2019-06-06 17:45:06 -0700399 else:
400 raise error.AutoservHostError(
401 'servo host %s failed to come back from reboot.' %
402 self.hostname)
403
404
405 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
406 connect_timeout=None, alive_interval=None, alive_count_max=None,
407 connection_attempts=None):
408 """Override default make_ssh_command to use tuned options.
409
410 Tuning changes:
411 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
412 connection failure. Consistency with remote_access.py.
413
414 - ServerAliveInterval=180; which causes SSH to ping connection every
415 180 seconds. In conjunction with ServerAliveCountMax ensures
416 that if the connection dies, Autotest will bail out quickly.
417
418 - ServerAliveCountMax=3; consistency with remote_access.py.
419
420 - ConnectAttempts=4; reduce flakiness in connection errors;
421 consistency with remote_access.py.
422
423 - UserKnownHostsFile=/dev/null; we don't care about the keys.
424
425 - SSH protocol forced to 2; needed for ServerAliveInterval.
426
427 @param user User name to use for the ssh connection.
428 @param port Port on the target host to use for ssh connection.
429 @param opts Additional options to the ssh command.
430 @param hosts_file Ignored.
431 @param connect_timeout Ignored.
432 @param alive_interval Ignored.
433 @param alive_count_max Ignored.
434 @param connection_attempts Ignored.
435
436 @returns: An ssh command with the requested settings.
437
438 """
439 options = ' '.join([opts, '-o Protocol=2'])
440 return super(BaseServoHost, self).make_ssh_command(
441 user=user, port=port, opts=options, hosts_file='/dev/null',
442 connect_timeout=30, alive_interval=180, alive_count_max=3,
443 connection_attempts=4)
444
445
446 def _make_scp_cmd(self, sources, dest):
447 """Format scp command.
448
449 Given a list of source paths and a destination path, produces the
450 appropriate scp command for encoding it. Remote paths must be
451 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
452 to allow additional ssh options.
453
454 @param sources: A list of source paths to copy from.
455 @param dest: Destination path to copy to.
456
457 @returns: An scp command that copies |sources| on local machine to
458 |dest| on the remote servo host.
459
460 """
461 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
462 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
463 return command % (self._master_ssh.ssh_option,
464 self.port, sources, dest)
465
466
467 def run(self, command, timeout=3600, ignore_status=False,
468 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
469 connect_timeout=30, ssh_failure_retry_ok=False,
470 options='', stdin=None, verbose=True, args=()):
471 """Run a command on the servo host.
472
473 Extends method `run` in SSHHost. If the servo host is a remote device,
474 it will call `run` in SSHost without changing anything.
475 If the servo host is 'localhost', it will call utils.system_output.
476
477 @param command: The command line string.
478 @param timeout: Time limit in seconds before attempting to
479 kill the running process. The run() function
480 will take a few seconds longer than 'timeout'
481 to complete if it has to kill the process.
482 @param ignore_status: Do not raise an exception, no matter
483 what the exit code of the command is.
484 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
485 @param connect_timeout: SSH connection timeout (in seconds)
486 Ignored if host is 'localhost'.
487 @param options: String with additional ssh command options
488 Ignored if host is 'localhost'.
489 @param ssh_failure_retry_ok: when True and ssh connection failure is
490 suspected, OK to retry command (but not
491 compulsory, and likely not needed here)
492 @param stdin: Stdin to pass (a string) to the executed command.
493 @param verbose: Log the commands.
494 @param args: Sequence of strings to pass as arguments to command by
495 quoting them in " and escaping their contents if necessary.
496
497 @returns: A utils.CmdResult object.
498
499 @raises AutoservRunError if the command failed.
500 @raises AutoservSSHTimeout SSH connection has timed out. Only applies
501 when servo host is not 'localhost'.
502
503 """
Gregory Nisbet32e74022020-07-14 18:42:30 -0700504 run_args = {
505 'command' : command,
506 'timeout' : timeout,
507 'ignore_status' : ignore_status,
508 'stdout_tee' : stdout_tee,
509 'stderr_tee' : stderr_tee,
510 # connect_timeout n/a for localhost
511 # options n/a for localhost
Andrew McRaeed8b52f2020-07-20 11:29:26 +1000512 # ssh_failure_retry_ok n/a for localhost
Gregory Nisbet32e74022020-07-14 18:42:30 -0700513 'stdin' : stdin,
514 'verbose' : verbose,
515 'args' : args,
516 }
Garry Wangebc015b2019-06-06 17:45:06 -0700517 if self.is_localhost():
518 if self._sudo_required:
519 run_args['command'] = 'sudo -n sh -c "%s"' % utils.sh_escape(
520 command)
521 try:
522 return utils.run(**run_args)
523 except error.CmdError as e:
524 logging.error(e)
525 raise error.AutoservRunError('command execution error',
526 e.result_obj)
527 else:
528 run_args['connect_timeout'] = connect_timeout
529 run_args['options'] = options
Andrew McRaeed8b52f2020-07-20 11:29:26 +1000530 run_args['ssh_failure_retry_ok'] = ssh_failure_retry_ok
Garry Wangebc015b2019-06-06 17:45:06 -0700531 return super(BaseServoHost, self).run(**run_args)