blob: 11af12d10da762e5ce90039504b6ffbc16caecbc [file] [log] [blame]
Garry Wangebc015b2019-06-06 17:45:06 -07001# Copyright (c) 2019 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This is a base host class for servohost and labstation."""
10
11
12import httplib
13import logging
14import socket
15import xmlrpclib
16
17from autotest_lib.client.bin import utils
Garry Wang358aad42020-08-02 20:56:04 -070018from autotest_lib.client.common_lib import enum
Garry Wangebc015b2019-06-06 17:45:06 -070019from autotest_lib.client.common_lib import error
20from autotest_lib.client.common_lib import hosts
21from autotest_lib.client.common_lib import lsbrelease_utils
22from autotest_lib.client.common_lib.cros import dev_server
Garry Wang358aad42020-08-02 20:56:04 -070023from autotest_lib.client.common_lib.cros import kernel_utils
Garry Wangebc015b2019-06-06 17:45:06 -070024from autotest_lib.client.cros import constants as client_constants
Garry Wang358aad42020-08-02 20:56:04 -070025from autotest_lib.server import autotest
Garry Wangebc015b2019-06-06 17:45:06 -070026from autotest_lib.server import site_utils as server_utils
27from autotest_lib.server.cros import autoupdater
28from autotest_lib.server.hosts import ssh_host
29from autotest_lib.site_utils.rpm_control_system import rpm_client
30
Garry Wangebc015b2019-06-06 17:45:06 -070031
32class BaseServoHost(ssh_host.SSHHost):
33 """Base host class for a host that manage servo(s).
34 E.g. beaglebone, labstation.
35 """
Garry Wang3d84a162020-01-24 13:29:43 +000036 REBOOT_CMD = 'sleep 5; reboot & sleep 10; reboot -f'
Garry Wangebc015b2019-06-06 17:45:06 -070037
Garry Wang79e9af62019-06-12 15:19:19 -070038 TEMP_FILE_DIR = '/var/lib/servod/'
39
40 LOCK_FILE_POSTFIX = '_in_use'
41 REBOOT_FILE_POSTFIX = '_reboot'
Garry Wangebc015b2019-06-06 17:45:06 -070042
Garry Wang5715ee52019-12-23 11:00:47 -080043 # Time to wait a rebooting servohost, in seconds.
Garry Wangfb253432019-09-11 17:08:38 -070044 REBOOT_TIMEOUT = 240
Garry Wangebc015b2019-06-06 17:45:06 -070045
Garry Wang5715ee52019-12-23 11:00:47 -080046 # Timeout value to power cycle a servohost, in seconds.
47 BOOT_TIMEOUT = 240
48
Garry Wang358aad42020-08-02 20:56:04 -070049 # Constants that reflect current host update state.
50 UPDATE_STATE = enum.Enum('IDLE', 'RUNNING', 'PENDING_REBOOT')
Garry Wangebc015b2019-06-06 17:45:06 -070051
52 def _initialize(self, hostname, is_in_lab=None, *args, **dargs):
53 """Construct a BaseServoHost object.
54
55 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
56 to None, for which utils.host_is_in_lab_zone will be
57 called to check if the servo host is in Cros lab.
58
59 """
60 super(BaseServoHost, self)._initialize(hostname=hostname,
61 *args, **dargs)
62 self._is_localhost = (self.hostname == 'localhost')
63 if self._is_localhost:
64 self._is_in_lab = False
65 elif is_in_lab is None:
66 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
67 else:
68 self._is_in_lab = is_in_lab
69
70 # Commands on the servo host must be run by the superuser.
71 # Our account on a remote host is root, but if our target is
72 # localhost then we might be running unprivileged. If so,
73 # `sudo` will have to be added to the commands.
74 if self._is_localhost:
75 self._sudo_required = utils.system_output('id -u') != '0'
76 else:
77 self._sudo_required = False
78
79 self._is_labstation = None
Gregory Nisbet8e2fbb22019-12-05 11:36:37 -080080 self._dut_host_info = None
Otabek Kasimov2b50cdb2020-07-06 19:16:06 -070081 self._dut_hostname = None
Garry Wangebc015b2019-06-06 17:45:06 -070082
83
84 def get_board(self):
85 """Determine the board for this servo host. E.g. fizz-labstation
86
Garry Wang5e118c02019-09-25 14:24:57 -070087 @returns a string representing this labstation's board or None if
88 target host is not using a ChromeOS image(e.g. test in chroot).
Garry Wangebc015b2019-06-06 17:45:06 -070089 """
Garry Wang5e118c02019-09-25 14:24:57 -070090 output = self.run('cat /etc/lsb-release', ignore_status=True).stdout
91 return lsbrelease_utils.get_current_board(lsb_release_content=output)
Garry Wangebc015b2019-06-06 17:45:06 -070092
93
Garry Wangd7367482020-02-27 13:52:40 -080094 def set_dut_host_info(self, dut_host_info):
95 """
96 @param dut_host_info: A HostInfo object.
97 """
98 logging.info('setting dut_host_info field to (%s)', dut_host_info)
99 self._dut_host_info = dut_host_info
100
101
102 def get_dut_host_info(self):
103 """
104 @return A HostInfo object.
105 """
106 return self._dut_host_info
Gregory Nisbet8e2fbb22019-12-05 11:36:37 -0800107
108
Otabek Kasimov2b50cdb2020-07-06 19:16:06 -0700109 def set_dut_hostname(self, dut_hostname):
110 """
111 @param dut_hostname: hostname of the DUT that connected to this servo.
112 """
113 logging.info('setting dut_hostname as (%s)', dut_hostname)
114 self._dut_hostname = dut_hostname
115
116
117 def get_dut_hostname(self):
118 """
119 @returns hostname of the DUT that connected to this servo.
120 """
121 return self._dut_hostname
122
123
Garry Wangebc015b2019-06-06 17:45:06 -0700124 def is_labstation(self):
125 """Determine if the host is a labstation
126
127 @returns True if ths host is a labstation otherwise False.
128 """
129 if self._is_labstation is None:
130 board = self.get_board()
Garry Wang88dc8632019-07-24 16:53:50 -0700131 self._is_labstation = board is not None and 'labstation' in board
Garry Wangebc015b2019-06-06 17:45:06 -0700132
133 return self._is_labstation
134
135
Garry Wang14831832020-03-04 17:21:49 -0800136 def _get_lsb_release_content(self):
137 """Return the content of lsb-release file of host."""
138 return self.run(
139 'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip()
140
141
142 def get_release_version(self):
Garry Wangebc015b2019-06-06 17:45:06 -0700143 """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release.
144
145 @returns The version string in lsb-release, under attribute
Garry Wang14831832020-03-04 17:21:49 -0800146 CHROMEOS_RELEASE_VERSION(e.g. 12900.0.0). None on fail.
Garry Wangebc015b2019-06-06 17:45:06 -0700147 """
Garry Wangebc015b2019-06-06 17:45:06 -0700148 return lsbrelease_utils.get_chromeos_release_version(
Garry Wang14831832020-03-04 17:21:49 -0800149 lsb_release_content=self._get_lsb_release_content()
150 )
151
152
153 def get_full_release_path(self):
154 """Get full release path from servohost as string.
155
156 @returns full release path as a string
157 (e.g. fizz-labstation-release/R82.12900.0.0). None on fail.
158 """
159 return lsbrelease_utils.get_chromeos_release_builder_path(
160 lsb_release_content=self._get_lsb_release_content()
161 )
Garry Wangebc015b2019-06-06 17:45:06 -0700162
163
164 def _check_update_status(self):
Garry Wang358aad42020-08-02 20:56:04 -0700165 """ Check servohost's current update state.
166
167 @returns: one of below state of from self.UPDATE_STATE
168 IDLE -- if the target host is not currently updating and not
169 pending on a reboot.
170 RUNNING -- if there is another updating process that running on
171 target host(note: we don't expect to hit this scenario).
172 PENDING_REBOOT -- if the target host had an update and pending
173 on reboot.
174 """
175 result = self.run('pgrep -f quick-provision | grep -v $$',
176 ignore_status=True)
177 # We don't expect any output unless there are another quick
178 # provision process is running.
179 if result.exit_status == 0:
180 return self.UPDATE_STATE.RUNNING
181
182 # Determine if we have an update that pending on reboot by check if
183 # the current inactive kernel has priority for the next boot.
184 try:
185 inactive_kernel = kernel_utils.get_kernel_state(self)[1]
186 next_kernel = kernel_utils.get_next_kernel(self)
187 if inactive_kernel == next_kernel:
188 return self.UPDATE_STATE.PENDING_REBOOT
189 except Exception as e:
190 logging.error('Unexpected error while checking kernel info; %s', e)
191 return self.UPDATE_STATE.IDLE
Garry Wangebc015b2019-06-06 17:45:06 -0700192
193
194 def is_in_lab(self):
195 """Check whether the servo host is a lab device.
196
197 @returns: True if the servo host is in Cros Lab, otherwise False.
198
199 """
200 return self._is_in_lab
201
202
203 def is_localhost(self):
204 """Checks whether the servo host points to localhost.
205
206 @returns: True if it points to localhost, otherwise False.
207
208 """
209 return self._is_localhost
210
211
212 def is_cros_host(self):
213 """Check if a servo host is running chromeos.
214
215 @return: True if the servo host is running chromeos.
216 False if it isn't, or we don't have enough information.
217 """
218 try:
219 result = self.run('grep -q CHROMEOS /etc/lsb-release',
220 ignore_status=True, timeout=10)
221 except (error.AutoservRunError, error.AutoservSSHTimeout):
222 return False
223 return result.exit_status == 0
224
225
Garry Wang358aad42020-08-02 20:56:04 -0700226 def prepare_for_update(self):
227 """Prepares the DUT for an update.
228 Subclasses may override this to perform any special actions
229 required before updating.
230 """
231 pass
232
233
Garry Wangebc015b2019-06-06 17:45:06 -0700234 def reboot(self, *args, **dargs):
235 """Reboot using special servo host reboot command."""
236 super(BaseServoHost, self).reboot(reboot_cmd=self.REBOOT_CMD,
237 *args, **dargs)
238
239
Garry Wang358aad42020-08-02 20:56:04 -0700240 def update_image(self, stable_version=None):
Garry Wangebc015b2019-06-06 17:45:06 -0700241 """Update the image on the servo host, if needed.
242
243 This method recognizes the following cases:
244 * If the Host is not running Chrome OS, do nothing.
245 * If a previously triggered update is now complete, reboot
246 to the new version.
Garry Wang358aad42020-08-02 20:56:04 -0700247 * If the host is processing an update do nothing.
248 * If the host has an update that pending on reboot, do nothing.
Garry Wangebc015b2019-06-06 17:45:06 -0700249 * If the host is running a version of Chrome OS different
Garry Wang358aad42020-08-02 20:56:04 -0700250 from the default for servo Hosts, start an update.
Garry Wangebc015b2019-06-06 17:45:06 -0700251
Garry Wang14831832020-03-04 17:21:49 -0800252 @stable_version the target build number.(e.g. R82-12900.0.0)
253
Garry Wangebc015b2019-06-06 17:45:06 -0700254 @raises dev_server.DevServerException: If all the devservers are down.
255 @raises site_utils.ParseBuildNameException: If the devserver returns
256 an invalid build name.
Garry Wangebc015b2019-06-06 17:45:06 -0700257 """
258 # servod could be running in a Ubuntu workstation.
259 if not self.is_cros_host():
260 logging.info('Not attempting an update, either %s is not running '
261 'chromeos or we cannot find enough information about '
262 'the host.', self.hostname)
263 return
264
265 if lsbrelease_utils.is_moblab():
266 logging.info('Not attempting an update, %s is running moblab.',
267 self.hostname)
268 return
269
Garry Wang14831832020-03-04 17:21:49 -0800270 if not stable_version:
271 logging.debug("BaseServoHost::update_image attempting to get"
272 " servo cros stable version")
273 try:
274 stable_version = (self.get_dut_host_info().
275 servo_cros_stable_version)
276 except AttributeError:
277 logging.error("BaseServoHost::update_image failed to get"
278 " servo cros stable version.")
Gregory Nisbet8e2fbb22019-12-05 11:36:37 -0800279
Garry Wang14831832020-03-04 17:21:49 -0800280 target_build = "%s-release/%s" % (self.get_board(), stable_version)
Garry Wangebc015b2019-06-06 17:45:06 -0700281 target_build_number = server_utils.ParseBuildName(
282 target_build)[3]
Garry Wang14831832020-03-04 17:21:49 -0800283 current_build_number = self.get_release_version()
Garry Wangebc015b2019-06-06 17:45:06 -0700284
285 if current_build_number == target_build_number:
286 logging.info('servo host %s does not require an update.',
287 self.hostname)
288 return
289
290 status = self._check_update_status()
Garry Wang358aad42020-08-02 20:56:04 -0700291 if status == self.UPDATE_STATE.RUNNING:
292 logging.info('servo host %s already processing an update',
293 self.hostname)
294 return
295 if status == self.UPDATE_STATE.PENDING_REBOOT:
Garry Wangebc015b2019-06-06 17:45:06 -0700296 # Labstation reboot is handled separately here as it require
Garry Wang358aad42020-08-02 20:56:04 -0700297 # synchronized reboot among all managed DUTs. For servo_v3, we'll
298 # reboot when initialize Servohost, if there is a update pending.
299 logging.info('An update has been completed and pending reboot.')
300 return
Garry Wangebc015b2019-06-06 17:45:06 -0700301
Garry Wang358aad42020-08-02 20:56:04 -0700302 ds = dev_server.ImageServer.resolve(self.hostname,
303 hostname=self.hostname)
304 url = ds.get_update_url(target_build)
305 updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self,
Garry Wang358aad42020-08-02 20:56:04 -0700306 is_servohost=True)
307 logging.info('Using devserver url: %s to trigger update on '
308 'servo host %s, from %s to %s', url, self.hostname,
309 current_build_number, target_build_number)
310 updater.run_update()
Garry Wangebc015b2019-06-06 17:45:06 -0700311
312
313 def has_power(self):
314 """Return whether or not the servo host is powered by PoE or RPM."""
315 # TODO(fdeng): See crbug.com/302791
316 # For now, assume all servo hosts in the lab have power.
317 return self.is_in_lab()
318
319
Garry Wang358aad42020-08-02 20:56:04 -0700320 def _post_update_reboot(self):
321 """ Reboot servohost after an quick provision.
322
323 We need to do some specifal cleanup before and after reboot
324 when there is an update pending.
325 """
326 # Regarding the 'crossystem' command below: In some cases,
327 # the update flow puts the TPM into a state such that it
328 # fails verification. We don't know why. However, this
329 # call papers over the problem by clearing the TPM during
330 # the reboot.
331 #
332 # We ignore failures from 'crossystem'. Although failure
333 # here is unexpected, and could signal a bug, the point of
334 # the exercise is to paper over problems; allowing this to
335 # fail would defeat the purpose.
336 self.run('crossystem clear_tpm_owner_request=1', ignore_status=True)
337 self._servo_host_reboot()
338 logging.debug('Cleaning up autotest directories if exist.')
339 try:
340 installed_autodir = autotest.Autotest.get_installed_autodir(self)
341 self.run('rm -rf ' + installed_autodir)
342 except autotest.AutodirNotFoundError:
343 logging.debug('No autotest installed directory found.')
344
345
Garry Wangebc015b2019-06-06 17:45:06 -0700346 def power_cycle(self):
347 """Cycle power to this host via PoE(servo v3) or RPM(labstation)
348 if it is a lab device.
349
350 @raises AutoservRepairError if it fails to power cycle the
351 servo host.
352
353 """
354 if self.has_power():
355 try:
356 rpm_client.set_power(self, 'CYCLE')
357 except (socket.error, xmlrpclib.Error,
358 httplib.BadStatusLine,
359 rpm_client.RemotePowerException) as e:
360 raise hosts.AutoservRepairError(
361 'Power cycling %s failed: %s' % (self.hostname, e),
362 'power_cycle_via_rpm_failed'
363 )
364 else:
365 logging.info('Skipping power cycling, not a lab device.')
366
367
368 def _servo_host_reboot(self):
369 """Reboot this servo host because a reboot is requested."""
370 logging.info('Rebooting servo host %s from build %s', self.hostname,
Garry Wang14831832020-03-04 17:21:49 -0800371 self.get_release_version())
Garry Wangebc015b2019-06-06 17:45:06 -0700372 # Tell the reboot() call not to wait for completion.
373 # Otherwise, the call will log reboot failure if servo does
374 # not come back. The logged reboot failure will lead to
375 # test job failure. If the test does not require servo, we
376 # don't want servo failure to fail the test with error:
377 # `Host did not return from reboot` in status.log.
378 self.reboot(fastsync=True, wait=False)
379
380 # We told the reboot() call not to wait, but we need to wait
381 # for the reboot before we continue. Alas. The code from
382 # here below is basically a copy of Host.wait_for_restart(),
383 # with the logging bits ripped out, so that they can't cause
384 # the failure logging problem described above.
385 #
386 # The black stain that this has left on my soul can never be
387 # erased.
388 old_boot_id = self.get_boot_id()
389 if not self.wait_down(timeout=self.WAIT_DOWN_REBOOT_TIMEOUT,
390 warning_timer=self.WAIT_DOWN_REBOOT_WARNING,
391 old_boot_id=old_boot_id):
392 raise error.AutoservHostError(
393 'servo host %s failed to shut down.' %
394 self.hostname)
Garry Wang79e9af62019-06-12 15:19:19 -0700395 if self.wait_up(timeout=self.REBOOT_TIMEOUT):
Garry Wangebc015b2019-06-06 17:45:06 -0700396 logging.info('servo host %s back from reboot, with build %s',
Garry Wang14831832020-03-04 17:21:49 -0800397 self.hostname, self.get_release_version())
Garry Wangebc015b2019-06-06 17:45:06 -0700398 else:
399 raise error.AutoservHostError(
400 'servo host %s failed to come back from reboot.' %
401 self.hostname)
402
403
404 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
405 connect_timeout=None, alive_interval=None, alive_count_max=None,
406 connection_attempts=None):
407 """Override default make_ssh_command to use tuned options.
408
409 Tuning changes:
410 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
411 connection failure. Consistency with remote_access.py.
412
413 - ServerAliveInterval=180; which causes SSH to ping connection every
414 180 seconds. In conjunction with ServerAliveCountMax ensures
415 that if the connection dies, Autotest will bail out quickly.
416
417 - ServerAliveCountMax=3; consistency with remote_access.py.
418
419 - ConnectAttempts=4; reduce flakiness in connection errors;
420 consistency with remote_access.py.
421
422 - UserKnownHostsFile=/dev/null; we don't care about the keys.
423
424 - SSH protocol forced to 2; needed for ServerAliveInterval.
425
426 @param user User name to use for the ssh connection.
427 @param port Port on the target host to use for ssh connection.
428 @param opts Additional options to the ssh command.
429 @param hosts_file Ignored.
430 @param connect_timeout Ignored.
431 @param alive_interval Ignored.
432 @param alive_count_max Ignored.
433 @param connection_attempts Ignored.
434
435 @returns: An ssh command with the requested settings.
436
437 """
438 options = ' '.join([opts, '-o Protocol=2'])
439 return super(BaseServoHost, self).make_ssh_command(
440 user=user, port=port, opts=options, hosts_file='/dev/null',
441 connect_timeout=30, alive_interval=180, alive_count_max=3,
442 connection_attempts=4)
443
444
445 def _make_scp_cmd(self, sources, dest):
446 """Format scp command.
447
448 Given a list of source paths and a destination path, produces the
449 appropriate scp command for encoding it. Remote paths must be
450 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
451 to allow additional ssh options.
452
453 @param sources: A list of source paths to copy from.
454 @param dest: Destination path to copy to.
455
456 @returns: An scp command that copies |sources| on local machine to
457 |dest| on the remote servo host.
458
459 """
460 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
461 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
462 return command % (self._master_ssh.ssh_option,
463 self.port, sources, dest)
464
465
466 def run(self, command, timeout=3600, ignore_status=False,
467 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
468 connect_timeout=30, ssh_failure_retry_ok=False,
469 options='', stdin=None, verbose=True, args=()):
470 """Run a command on the servo host.
471
472 Extends method `run` in SSHHost. If the servo host is a remote device,
473 it will call `run` in SSHost without changing anything.
474 If the servo host is 'localhost', it will call utils.system_output.
475
476 @param command: The command line string.
477 @param timeout: Time limit in seconds before attempting to
478 kill the running process. The run() function
479 will take a few seconds longer than 'timeout'
480 to complete if it has to kill the process.
481 @param ignore_status: Do not raise an exception, no matter
482 what the exit code of the command is.
483 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
484 @param connect_timeout: SSH connection timeout (in seconds)
485 Ignored if host is 'localhost'.
486 @param options: String with additional ssh command options
487 Ignored if host is 'localhost'.
488 @param ssh_failure_retry_ok: when True and ssh connection failure is
489 suspected, OK to retry command (but not
490 compulsory, and likely not needed here)
491 @param stdin: Stdin to pass (a string) to the executed command.
492 @param verbose: Log the commands.
493 @param args: Sequence of strings to pass as arguments to command by
494 quoting them in " and escaping their contents if necessary.
495
496 @returns: A utils.CmdResult object.
497
498 @raises AutoservRunError if the command failed.
499 @raises AutoservSSHTimeout SSH connection has timed out. Only applies
500 when servo host is not 'localhost'.
501
502 """
Gregory Nisbet32e74022020-07-14 18:42:30 -0700503 run_args = {
504 'command' : command,
505 'timeout' : timeout,
506 'ignore_status' : ignore_status,
507 'stdout_tee' : stdout_tee,
508 'stderr_tee' : stderr_tee,
509 # connect_timeout n/a for localhost
510 # options n/a for localhost
Andrew McRaeed8b52f2020-07-20 11:29:26 +1000511 # ssh_failure_retry_ok n/a for localhost
Gregory Nisbet32e74022020-07-14 18:42:30 -0700512 'stdin' : stdin,
513 'verbose' : verbose,
514 'args' : args,
515 }
Garry Wangebc015b2019-06-06 17:45:06 -0700516 if self.is_localhost():
517 if self._sudo_required:
518 run_args['command'] = 'sudo -n sh -c "%s"' % utils.sh_escape(
519 command)
520 try:
521 return utils.run(**run_args)
522 except error.CmdError as e:
523 logging.error(e)
524 raise error.AutoservRunError('command execution error',
525 e.result_obj)
526 else:
527 run_args['connect_timeout'] = connect_timeout
528 run_args['options'] = options
Andrew McRaeed8b52f2020-07-20 11:29:26 +1000529 run_args['ssh_failure_retry_ok'] = ssh_failure_retry_ok
Garry Wangebc015b2019-06-06 17:45:06 -0700530 return super(BaseServoHost, self).run(**run_args)
Garry Wang2b5eef92020-08-21 16:23:35 -0700531
532 def _mount_drive(self, src_path, dst_path):
533 """Mount an external drive on servohost.
534
535 @param: src_path the drive path to mount(e.g. /dev/sda3).
536 @param: dst_path the destination directory on servohost to mount
537 the drive.
538
539 @returns: True if mount success otherwise False.
540 """
541 # Make sure the dst dir exists.
542 self.run('mkdir -p %s' % dst_path)
543
544 result = self.run('mount -o ro %s %s' % (src_path, dst_path),
545 ignore_status=True)
546 return result.exit_status == 0
547
548 def _unmount_drive(self, mount_path):
549 """Unmount a drive from servohost.
550
551 @param: mount_path the path on servohost to unmount.
552
553 @returns: True if unmount success otherwise False.
554 """
555 result = self.run('umount %s' % mount_path, ignore_status=True)
556 return result.exit_status == 0