blob: febbde24ce4001ec0154c89c7f0e4011478e2f02 [file] [log] [blame]
Derek Beckettf73baca2020-08-19 15:08:47 -07001# Lint as: python2, python3
Fang Deng0ca40e22013-08-27 17:47:44 -07002"""This class defines the Remote host class."""
mbligh321b1f52008-04-09 16:23:43 +00003
Derek Beckettf73baca2020-08-19 15:08:47 -07004from __future__ import absolute_import
5from __future__ import division
6from __future__ import print_function
7import os, logging, time
8import six
9from six.moves import urllib
Alex Khouderchahc44e7772018-07-16 10:53:14 -070010import re
Derek Beckettfccbb622021-02-08 16:44:53 -080011
12import common
13
mblighf2c33762008-10-18 14:42:34 +000014from autotest_lib.client.common_lib import error
Derek Beckettfccbb622021-02-08 16:44:53 -080015from autotest_lib.client.common_lib.global_config import global_config
jadmanski96b78072009-05-21 22:21:04 +000016from autotest_lib.server import utils
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -070017from autotest_lib.server.hosts import base_classes
Derek Beckettc09cc5e2021-03-18 10:40:11 -070018from autotest_lib.server.hosts.tls_client.connection import TLSConnection
mbligh321b1f52008-04-09 16:23:43 +000019
20
jadmanski1c5e3a12008-08-15 23:08:20 +000021class RemoteHost(base_classes.Host):
jadmanskid60321a2008-10-28 20:32:05 +000022 """
23 This class represents a remote machine on which you can run
jadmanski0afbb632008-06-06 21:10:57 +000024 programs.
mbligh321b1f52008-04-09 16:23:43 +000025
jadmanski0afbb632008-06-06 21:10:57 +000026 It may be accessed through a network, a serial line, ...
27 It is not the machine autoserv is running on.
mbligh321b1f52008-04-09 16:23:43 +000028
jadmanski0afbb632008-06-06 21:10:57 +000029 Implementation details:
30 This is an abstract class, leaf subclasses must implement the methods
31 listed here and in parent classes which have no implementation. They
32 may reimplement methods which already have an implementation. You
33 must not instantiate this class but should instantiate one of those
jadmanskid60321a2008-10-28 20:32:05 +000034 leaf subclasses.
35 """
mbligh321b1f52008-04-09 16:23:43 +000036
mblighf2c33762008-10-18 14:42:34 +000037 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
Eric Li6f27d4f2010-09-29 10:55:17 -070038 DEFAULT_HALT_TIMEOUT = 2 * 60
Kevin Cheng3a4a57a2015-09-30 12:09:50 -070039 _LABEL_FUNCTIONS = []
40 _DETECTABLE_LABELS = []
mblighf2c33762008-10-18 14:42:34 +000041
Eric Li861b2d52011-02-04 14:50:35 -080042 VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start"
Mike Frysingerb718b032019-07-01 07:55:44 -040043 TMP_DIR_TEMPLATE = '/usr/local/tmp/autoserv-XXXXXX'
jadmanski4900b3b2009-07-02 22:12:08 +000044
Kevin Cheng3a4a57a2015-09-30 12:09:50 -070045
jadmanskif6562912008-10-21 17:59:01 +000046 def _initialize(self, hostname, autodir=None, *args, **dargs):
47 super(RemoteHost, self)._initialize(*args, **dargs)
mbligh321b1f52008-04-09 16:23:43 +000048
jadmanski1c5e3a12008-08-15 23:08:20 +000049 self.hostname = hostname
mblighf2c33762008-10-18 14:42:34 +000050 self.autodir = autodir
51 self.tmp_dirs = []
jadmanskia2db9412008-08-22 21:47:24 +000052
Derek Beckettfccbb622021-02-08 16:44:53 -080053 get_value = global_config.get_config_value
54
55 self.tls_connection = None
Derek Beckett3b9974a2021-03-18 11:16:00 -070056 try:
Derek Beckettc09cc5e2021-03-18 10:40:11 -070057 self.tls_connection = TLSConnection()
Derek Beckett3b9974a2021-03-18 11:16:00 -070058 except Exception as e:
59 logging.warning("Could not establish TLS connection %s", e)
jadmanskia2db9412008-08-22 21:47:24 +000060
jadmanskiedf33e02009-05-22 16:47:27 +000061 def __repr__(self):
62 return "<remote host: %s>" % self.hostname
63
64
jadmanski53aaf382008-11-17 16:22:31 +000065 def close(self):
Richard Barnette73b35172018-07-27 10:59:01 -070066 # pylint: disable=missing-docstring
jadmanski53aaf382008-11-17 16:22:31 +000067 super(RemoteHost, self).close()
mblighf2c33762008-10-18 14:42:34 +000068 self.stop_loggers()
69
70 if hasattr(self, 'tmp_dirs'):
71 for dir in self.tmp_dirs:
72 try:
Allen Liad719c12017-06-27 23:48:04 +000073 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
mblighf2c33762008-10-18 14:42:34 +000074 except error.AutoservRunError:
75 pass
Derek Beckettfccbb622021-02-08 16:44:53 -080076 if self.tls_connection:
77 self.tls_connection.close()
78 self.tls_connection = None
mblighf2c33762008-10-18 14:42:34 +000079
jadmanskid60321a2008-10-28 20:32:05 +000080 def job_start(self):
81 """
82 Abstract method, called the first time a remote host object
83 is created for a specific host after a job starts.
84
85 This method depends on the create_host factory being used to
86 construct your host object. If you directly construct host objects
87 you will need to call this method yourself (and enforce the
88 single-call rule).
89 """
jadmanski4900b3b2009-07-02 22:12:08 +000090 try:
Andrey Ulanovad472902016-01-11 17:31:18 -080091 cmd = ('test ! -e /var/log/messages || cp -f /var/log/messages '
92 '%s') % self.VAR_LOG_MESSAGES_COPY_PATH
Allen Liad719c12017-06-27 23:48:04 +000093 self.run(cmd)
Derek Beckettf73baca2020-08-19 15:08:47 -070094 except Exception as e:
jadmanski4900b3b2009-07-02 22:12:08 +000095 # Non-fatal error
96 logging.info('Failed to copy /var/log/messages at startup: %s', e)
jadmanskid60321a2008-10-28 20:32:05 +000097
98
mblighf2c33762008-10-18 14:42:34 +000099 def get_autodir(self):
100 return self.autodir
101
102
103 def set_autodir(self, autodir):
jadmanskid60321a2008-10-28 20:32:05 +0000104 """
mblighf2c33762008-10-18 14:42:34 +0000105 This method is called to make the host object aware of the
106 where autotest is installed. Called in server/autotest.py
107 after a successful install
jadmanskid60321a2008-10-28 20:32:05 +0000108 """
mblighf2c33762008-10-18 14:42:34 +0000109 self.autodir = autodir
110
111
112 def sysrq_reboot(self):
Richard Barnette73b35172018-07-27 10:59:01 -0700113 # pylint: disable=missing-docstring
J. Richard Barnette9af19632015-09-25 12:18:03 -0700114 self.run_background('echo b > /proc/sysrq-trigger')
mblighf2c33762008-10-18 14:42:34 +0000115
116
Eric Li6f27d4f2010-09-29 10:55:17 -0700117 def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True):
Richard Barnetteab9769f2016-06-01 15:01:44 -0700118 """
119 Shut down the remote host.
120
121 N.B. This method makes no provision to bring the target back
122 up. The target will be offline indefinitely if there's no
123 independent hardware (servo, RPM, etc.) to force the target to
124 power on.
125
126 @param timeout Maximum time to wait for host down, in seconds.
127 @param wait Whether to wait for the host to go offline.
128 """
J. Richard Barnette9af19632015-09-25 12:18:03 -0700129 self.run_background('sleep 1 ; halt')
Eric Li6f27d4f2010-09-29 10:55:17 -0700130 if wait:
131 self.wait_down(timeout=timeout)
132
133
Richard Barnetteab9769f2016-06-01 15:01:44 -0700134 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True,
135 fastsync=False, reboot_cmd=None, **dargs):
mblighf2c33762008-10-18 14:42:34 +0000136 """
137 Reboot the remote host.
138
139 Args:
140 timeout - How long to wait for the reboot.
mblighf2c33762008-10-18 14:42:34 +0000141 wait - Should we wait to see if the machine comes back up.
Hidehiko Abe3d512d32017-04-27 15:11:33 +0900142 If this is set to True, ignores reboot_cmd's error
143 even if occurs.
mbligh2b949772009-02-26 00:59:36 +0000144 fastsync - Don't wait for the sync to complete, just start one
145 and move on. This is for cases where rebooting prompty
146 is more important than data integrity and/or the
147 machine may have disks that cause sync to never return.
mbligh959ed872009-04-17 22:18:25 +0000148 reboot_cmd - Reboot command to execute.
mblighf2c33762008-10-18 14:42:34 +0000149 """
Richard Barnetteab9769f2016-06-01 15:01:44 -0700150 self.reboot_setup(**dargs)
J. Richard Barnette9af19632015-09-25 12:18:03 -0700151 if not reboot_cmd:
152 reboot_cmd = ('sync & sleep 5; '
153 'reboot & sleep 60; '
154 'reboot -f & sleep 10; '
155 'reboot -nf & sleep 10; '
156 'telinit 6')
157
mblighf2c33762008-10-18 14:42:34 +0000158 def reboot():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700159 # pylint: disable=missing-docstring
mblighf2c33762008-10-18 14:42:34 +0000160 self.record("GOOD", None, "reboot.start")
Richard Barnette73b35172018-07-27 10:59:01 -0700161 current_boot_id = None
mblighf2c33762008-10-18 14:42:34 +0000162 try:
jadmanskic0354912010-01-12 15:57:29 +0000163 current_boot_id = self.get_boot_id()
164
jadmanskid544a352009-01-14 23:36:28 +0000165 # sync before starting the reboot, so that a long sync during
166 # shutdown isn't timed out by wait_down's short timeout
mbligh2b949772009-02-26 00:59:36 +0000167 if not fastsync:
mbligh959ed872009-04-17 22:18:25 +0000168 self.run('sync; sync', timeout=timeout, ignore_status=True)
jadmanskid544a352009-01-14 23:36:28 +0000169
J. Richard Barnette9af19632015-09-25 12:18:03 -0700170 self.run_background(reboot_cmd)
mblighf2c33762008-10-18 14:42:34 +0000171 except error.AutoservRunError:
Hidehiko Abe3d512d32017-04-27 15:11:33 +0900172 # If wait is set, ignore the error here, and rely on the
173 # wait_for_restart() for stability, instead.
174 # reboot_cmd sometimes causes an error even if reboot is
175 # successfully in progress. This is difficult to be avoided,
176 # because we have no much control on remote machine after
177 # "reboot" starts.
Richard Barnette73b35172018-07-27 10:59:01 -0700178 if not wait or current_boot_id is None:
Hidehiko Abe3d512d32017-04-27 15:11:33 +0900179 # TODO(b/37652392): Revisit no-wait case, later.
180 self.record("ABORT", None, "reboot.start",
181 "reboot command failed")
182 raise
mblighf2c33762008-10-18 14:42:34 +0000183 if wait:
jadmanskic0354912010-01-12 15:57:29 +0000184 self.wait_for_restart(timeout, old_boot_id=current_boot_id,
185 **dargs)
mblighf2c33762008-10-18 14:42:34 +0000186
187 # if this is a full reboot-and-wait, run the reboot inside a group
188 if wait:
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700189 self.log_op(self.OP_REBOOT, reboot)
mblighf2c33762008-10-18 14:42:34 +0000190 else:
191 reboot()
192
Ravi Chandra Sadineni812e61b2018-07-09 11:16:50 -0700193 def suspend(self, timeout, suspend_cmd,
194 allow_early_resume=False):
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700195 """
196 Suspend the remote host.
197
198 Args:
Ravi Chandra Sadineni812e61b2018-07-09 11:16:50 -0700199 timeout - How long to wait for the suspend in integer seconds.
200 suspend_cmd - suspend command to execute.
201 allow_early_resume - Boolean that indicate whether resume
202 before |timeout| is ok.
203 Raises:
204 error.AutoservSuspendError - If |allow_early_resume| is False
205 and if device resumes before
206 |timeout|.
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700207 """
208 # define a function for the supend and run it in a group
209 def suspend():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700210 # pylint: disable=missing-docstring
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700211 self.record("GOOD", None, "suspend.start for %d seconds" % (timeout))
212 try:
J. Richard Barnette9af19632015-09-25 12:18:03 -0700213 self.run_background(suspend_cmd)
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700214 except error.AutoservRunError:
215 self.record("ABORT", None, "suspend.start",
216 "suspend command failed")
217 raise error.AutoservSuspendError("suspend command failed")
218
219 # Wait for some time, to ensure the machine is going to sleep.
220 # Not too long to check if the machine really suspended.
221 time_slice = min(timeout / 2, 300)
222 time.sleep(time_slice)
223 time_counter = time_slice
224 while time_counter < timeout + 60:
225 # Check if the machine is back. We check regularely to
226 # ensure the machine was suspended long enough.
227 if utils.ping(self.hostname, tries=1, deadline=1) == 0:
228 return
229 else:
230 if time_counter > timeout - 10:
231 time_slice = 5
232 time.sleep(time_slice)
233 time_counter += time_slice
234
235 if utils.ping(self.hostname, tries=1, deadline=1) != 0:
236 raise error.AutoservSuspendError(
237 "DUT is not responding after %d seconds" % (time_counter))
238
239 start_time = time.time()
240 self.log_op(self.OP_SUSPEND, suspend)
241 lasted = time.time() - start_time
Ravi Chandra Sadineni812e61b2018-07-09 11:16:50 -0700242 logging.info("Device resumed after %d secs", lasted)
243 if (lasted < timeout and not allow_early_resume):
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700244 raise error.AutoservSuspendError(
245 "Suspend did not last long enough: %d instead of %d" % (
246 lasted, timeout))
mblighf2c33762008-10-18 14:42:34 +0000247
jadmanski4f909252008-12-01 20:47:10 +0000248 def reboot_followup(self, *args, **dargs):
Richard Barnette73b35172018-07-27 10:59:01 -0700249 # pylint: disable=missing-docstring
jadmanski4f909252008-12-01 20:47:10 +0000250 super(RemoteHost, self).reboot_followup(*args, **dargs)
251 if self.job:
252 self.job.profilers.handle_reboot(self)
253
254
jadmanskid778ae42009-01-07 15:07:36 +0000255 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs):
jadmanskid60321a2008-10-28 20:32:05 +0000256 """
257 Wait for the host to come back from a reboot. This wraps the
258 generic wait_for_restart implementation in a reboot group.
259 """
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700260 def op_func():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700261 # pylint: disable=missing-docstring
jadmanskid778ae42009-01-07 15:07:36 +0000262 super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs)
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700263 self.log_op(self.OP_REBOOT, op_func)
mblighf2c33762008-10-18 14:42:34 +0000264
265
Gregory Nisbetec615d62020-12-11 17:59:20 +0000266 def cleanup(self):
Richard Barnette73b35172018-07-27 10:59:01 -0700267 # pylint: disable=missing-docstring
mbligh1264b512008-11-05 22:21:49 +0000268 super(RemoteHost, self).cleanup()
Gregory Nisbetec615d62020-12-11 17:59:20 +0000269 self.reboot()
mbligh1264b512008-11-05 22:21:49 +0000270
271
mblighe48bcfb2008-11-11 17:09:44 +0000272 def get_tmp_dir(self, parent='/tmp'):
mblighf2c33762008-10-18 14:42:34 +0000273 """
274 Return the pathname of a directory on the host suitable
275 for temporary file storage.
276
277 The directory and its content will be deleted automatically
278 on the destruction of the Host object that was used to obtain
279 it.
280 """
Alex Khouderchahc44e7772018-07-16 10:53:14 -0700281 template = os.path.join(parent, self.TMP_DIR_TEMPLATE)
Jae Hoon Kimba8080e2021-03-24 15:14:27 -0700282 parent = os.path.dirname(template)
Derek Beckett24c0e822020-06-10 12:54:04 -0700283 dir_name = self.run('mkdir -p %s && mktemp -d %s' % (parent, template)).stdout.rstrip()
mblighf2c33762008-10-18 14:42:34 +0000284 self.tmp_dirs.append(dir_name)
285 return dir_name
286
287
mbligh6b95b522010-02-19 19:17:41 +0000288 def get_platform_label(self):
289 """
290 Return the platform label, or None if platform label is not set.
291 """
292
293 if self.job:
294 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
295 self.hostname)
296 keyvals = utils.read_keyval(keyval_path)
297 return keyvals.get('platform', None)
298 else:
299 return None
300
301
Eric Li6f27d4f2010-09-29 10:55:17 -0700302 def get_all_labels(self):
303 """
304 Return all labels, or empty list if label is not set.
305 """
306 if self.job:
307 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
308 self.hostname)
309 keyvals = utils.read_keyval(keyval_path)
310 all_labels = keyvals.get('labels', '')
311 if all_labels:
Eric Li861b2d52011-02-04 14:50:35 -0800312 all_labels = all_labels.split(',')
Derek Beckettf73baca2020-08-19 15:08:47 -0700313 return [urllib.parse.unquote(label) for label in all_labels]
Eric Li6f27d4f2010-09-29 10:55:17 -0700314 return []
315
316
jadmanskiea455662009-03-25 22:25:39 +0000317 def delete_tmp_dir(self, tmpdir):
318 """
319 Delete the given temporary directory on the remote machine.
Richard Barnetteab9769f2016-06-01 15:01:44 -0700320
321 @param tmpdir The directory to delete.
jadmanskiea455662009-03-25 22:25:39 +0000322 """
323 self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True)
324 self.tmp_dirs.remove(tmpdir)
325
326
Alex Khouderchahc44e7772018-07-16 10:53:14 -0700327 def delete_all_tmp_dirs(self, parent='/tmp'):
328 """
329 Delete all directories in parent that were created by get_tmp_dir
330
331 Note that this may involve deleting directories created by calls to
332 get_tmp_dir on a different RemoteHost instance than the one running this
333 method. Only perform this operation when certain that this will not
334 cause unexpected behavior.
335 """
336 # follow mktemp's behavior of only expanding 3 or more consecutive Xs
Derek Beckett24c0e822020-06-10 12:54:04 -0700337 if isinstance(parent, (list, tuple)):
338 parents = parent
339 else:
340 parents = [parent]
341 rm_paths = []
342 for parent in parents:
343 base_template = re.sub('XXXX*', '*', self.TMP_DIR_TEMPLATE)
344 # distinguish between non-wildcard asterisks in parent directory name
345 # and wildcards inserted from the template
Derek Beckettf73baca2020-08-19 15:08:47 -0700346 base = '*'.join(
347 ['"%s"' % utils.sh_escape(x) for x in base_template.split('*')])
Derek Beckett24c0e822020-06-10 12:54:04 -0700348 path = '"%s' % os.path.join(utils.sh_escape(parent), base[1:])
349 rm_paths.append(path)
350 # remove deleted directories from tmp_dirs
351 regex = os.path.join(parent, re.sub('(XXXX*)',
352 lambda match: '[a-zA-Z0-9]{%d}' % len(match.group(1)),
353 self.TMP_DIR_TEMPLATE))
354 regex += '(/|$)' # remove if matches, or is within a dir that matches
Derek Beckettf73baca2020-08-19 15:08:47 -0700355 self.tmp_dirs = [x for x in self.tmp_dirs if not re.match(regex, x)]
Alex Khouderchahc44e7772018-07-16 10:53:14 -0700356
Derek Beckett24c0e822020-06-10 12:54:04 -0700357 self.run('rm -rf {}'.format(" ".join(rm_paths)), ignore_status=True)
Alex Khouderchahc44e7772018-07-16 10:53:14 -0700358
mblighf2c33762008-10-18 14:42:34 +0000359 def check_uptime(self):
360 """
361 Check that uptime is available and monotonically increasing.
362 """
mbligha43f6d22009-08-24 22:09:44 +0000363 if not self.is_up():
364 raise error.AutoservHostError('Client does not appear to be up')
mblighf2c33762008-10-18 14:42:34 +0000365 result = self.run("/bin/cat /proc/uptime", 30)
366 return result.stdout.strip().split()[0]
367
368
Puthikorn Voravootivatdf29d642014-04-25 11:45:36 -0700369 def check_for_lkdtm(self):
370 """
371 Check for kernel dump test module. return True if exist.
372 """
373 cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT'
374 return self.run(cmd, ignore_status=True).exit_status == 0
375
376
jadmanskica7da372008-10-21 16:26:52 +0000377 def are_wait_up_processes_up(self):
mblighf2c33762008-10-18 14:42:34 +0000378 """
379 Checks if any HOSTS waitup processes are running yet on the
380 remote host.
381
382 Returns True if any the waitup processes are running, False
383 otherwise.
384 """
385 processes = self.get_wait_up_processes()
386 if len(processes) == 0:
387 return True # wait up processes aren't being used
388 for procname in processes:
389 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
390 ignore_status=True).exit_status
391 if exit_status == 0:
392 return True
393 return False
Kevin Cheng3a4a57a2015-09-30 12:09:50 -0700394
395
396 def get_labels(self):
397 """Return a list of labels for this given host.
398
399 This is the main way to retrieve all the automatic labels for a host
400 as it will run through all the currently implemented label functions.
401 """
402 labels = []
403 for label_function in self._LABEL_FUNCTIONS:
404 try:
405 label = label_function(self)
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700406 except Exception:
407 logging.exception('Label function %s failed; ignoring it.',
408 label_function.__name__)
Kevin Cheng3a4a57a2015-09-30 12:09:50 -0700409 label = None
410 if label:
411 if type(label) is str:
412 labels.append(label)
413 elif type(label) is list:
414 labels.extend(label)
415 return labels
Otabek Kasimov480e7fa2020-11-23 18:52:23 -0800416
417 def get_result_dir(self):
418 """Return the result directory path if passed or None if not.
419
420 @return string
421 """
422 if self.job and hasattr(self.job, 'resultdir'):
423 return self.job.resultdir
424 return None