blob: 0385e3818156f60525cd9cc7f23a8d71f9729676 [file] [log] [blame]
Derek Beckettf73baca2020-08-19 15:08:47 -07001# Lint as: python2, python3
Fang Deng0ca40e22013-08-27 17:47:44 -07002"""This class defines the Remote host class."""
mbligh321b1f52008-04-09 16:23:43 +00003
Derek Beckettf73baca2020-08-19 15:08:47 -07004from __future__ import absolute_import
5from __future__ import division
6from __future__ import print_function
7import os, logging, time
8import six
9from six.moves import urllib
Alex Khouderchahc44e7772018-07-16 10:53:14 -070010import re
Derek Beckettfccbb622021-02-08 16:44:53 -080011
12import common
13
mblighf2c33762008-10-18 14:42:34 +000014from autotest_lib.client.common_lib import error
Derek Beckettfccbb622021-02-08 16:44:53 -080015from autotest_lib.client.common_lib.global_config import global_config
jadmanski96b78072009-05-21 22:21:04 +000016from autotest_lib.server import utils
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -070017from autotest_lib.server.hosts import base_classes
Derek Beckettc09cc5e2021-03-18 10:40:11 -070018from autotest_lib.server.hosts.tls_client.connection import TLSConnection
mbligh321b1f52008-04-09 16:23:43 +000019
20
jadmanski1c5e3a12008-08-15 23:08:20 +000021class RemoteHost(base_classes.Host):
jadmanskid60321a2008-10-28 20:32:05 +000022 """
23 This class represents a remote machine on which you can run
jadmanski0afbb632008-06-06 21:10:57 +000024 programs.
mbligh321b1f52008-04-09 16:23:43 +000025
jadmanski0afbb632008-06-06 21:10:57 +000026 It may be accessed through a network, a serial line, ...
27 It is not the machine autoserv is running on.
mbligh321b1f52008-04-09 16:23:43 +000028
jadmanski0afbb632008-06-06 21:10:57 +000029 Implementation details:
30 This is an abstract class, leaf subclasses must implement the methods
31 listed here and in parent classes which have no implementation. They
32 may reimplement methods which already have an implementation. You
33 must not instantiate this class but should instantiate one of those
jadmanskid60321a2008-10-28 20:32:05 +000034 leaf subclasses.
35 """
mbligh321b1f52008-04-09 16:23:43 +000036
mblighf2c33762008-10-18 14:42:34 +000037 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
Eric Li6f27d4f2010-09-29 10:55:17 -070038 DEFAULT_HALT_TIMEOUT = 2 * 60
Kevin Cheng3a4a57a2015-09-30 12:09:50 -070039 _LABEL_FUNCTIONS = []
40 _DETECTABLE_LABELS = []
mblighf2c33762008-10-18 14:42:34 +000041
Eric Li861b2d52011-02-04 14:50:35 -080042 VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start"
Mike Frysingerb718b032019-07-01 07:55:44 -040043 TMP_DIR_TEMPLATE = '/usr/local/tmp/autoserv-XXXXXX'
jadmanski4900b3b2009-07-02 22:12:08 +000044
Kevin Cheng3a4a57a2015-09-30 12:09:50 -070045
jadmanskif6562912008-10-21 17:59:01 +000046 def _initialize(self, hostname, autodir=None, *args, **dargs):
47 super(RemoteHost, self)._initialize(*args, **dargs)
mbligh321b1f52008-04-09 16:23:43 +000048
jadmanski1c5e3a12008-08-15 23:08:20 +000049 self.hostname = hostname
mblighf2c33762008-10-18 14:42:34 +000050 self.autodir = autodir
51 self.tmp_dirs = []
jadmanskia2db9412008-08-22 21:47:24 +000052
Derek Beckettfccbb622021-02-08 16:44:53 -080053 get_value = global_config.get_config_value
54
55 self.tls_connection = None
Derek Beckett3b9974a2021-03-18 11:16:00 -070056 try:
Derek Beckettc09cc5e2021-03-18 10:40:11 -070057 self.tls_connection = TLSConnection()
Derek Beckett3b9974a2021-03-18 11:16:00 -070058 except Exception as e:
59 logging.warning("Could not establish TLS connection %s", e)
jadmanskia2db9412008-08-22 21:47:24 +000060
jadmanskiedf33e02009-05-22 16:47:27 +000061 def __repr__(self):
62 return "<remote host: %s>" % self.hostname
63
64
jadmanski53aaf382008-11-17 16:22:31 +000065 def close(self):
Richard Barnette73b35172018-07-27 10:59:01 -070066 # pylint: disable=missing-docstring
jadmanski53aaf382008-11-17 16:22:31 +000067 super(RemoteHost, self).close()
mblighf2c33762008-10-18 14:42:34 +000068 self.stop_loggers()
69
70 if hasattr(self, 'tmp_dirs'):
71 for dir in self.tmp_dirs:
72 try:
Allen Liad719c12017-06-27 23:48:04 +000073 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
mblighf2c33762008-10-18 14:42:34 +000074 except error.AutoservRunError:
75 pass
Derek Beckettfccbb622021-02-08 16:44:53 -080076 if self.tls_connection:
77 self.tls_connection.close()
78 self.tls_connection = None
mblighf2c33762008-10-18 14:42:34 +000079
jadmanskid60321a2008-10-28 20:32:05 +000080 def job_start(self):
81 """
82 Abstract method, called the first time a remote host object
83 is created for a specific host after a job starts.
84
85 This method depends on the create_host factory being used to
86 construct your host object. If you directly construct host objects
87 you will need to call this method yourself (and enforce the
88 single-call rule).
89 """
jadmanski4900b3b2009-07-02 22:12:08 +000090 try:
Andrey Ulanovad472902016-01-11 17:31:18 -080091 cmd = ('test ! -e /var/log/messages || cp -f /var/log/messages '
92 '%s') % self.VAR_LOG_MESSAGES_COPY_PATH
Allen Liad719c12017-06-27 23:48:04 +000093 self.run(cmd)
Derek Beckettf73baca2020-08-19 15:08:47 -070094 except Exception as e:
jadmanski4900b3b2009-07-02 22:12:08 +000095 # Non-fatal error
96 logging.info('Failed to copy /var/log/messages at startup: %s', e)
jadmanskid60321a2008-10-28 20:32:05 +000097
98
mblighf2c33762008-10-18 14:42:34 +000099 def get_autodir(self):
100 return self.autodir
101
102
103 def set_autodir(self, autodir):
jadmanskid60321a2008-10-28 20:32:05 +0000104 """
mblighf2c33762008-10-18 14:42:34 +0000105 This method is called to make the host object aware of the
106 where autotest is installed. Called in server/autotest.py
107 after a successful install
jadmanskid60321a2008-10-28 20:32:05 +0000108 """
mblighf2c33762008-10-18 14:42:34 +0000109 self.autodir = autodir
110
111
112 def sysrq_reboot(self):
Richard Barnette73b35172018-07-27 10:59:01 -0700113 # pylint: disable=missing-docstring
J. Richard Barnette9af19632015-09-25 12:18:03 -0700114 self.run_background('echo b > /proc/sysrq-trigger')
mblighf2c33762008-10-18 14:42:34 +0000115
116
Eric Li6f27d4f2010-09-29 10:55:17 -0700117 def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True):
Richard Barnetteab9769f2016-06-01 15:01:44 -0700118 """
119 Shut down the remote host.
120
121 N.B. This method makes no provision to bring the target back
122 up. The target will be offline indefinitely if there's no
123 independent hardware (servo, RPM, etc.) to force the target to
124 power on.
125
126 @param timeout Maximum time to wait for host down, in seconds.
127 @param wait Whether to wait for the host to go offline.
128 """
J. Richard Barnette9af19632015-09-25 12:18:03 -0700129 self.run_background('sleep 1 ; halt')
Eric Li6f27d4f2010-09-29 10:55:17 -0700130 if wait:
131 self.wait_down(timeout=timeout)
132
133
Richard Barnetteab9769f2016-06-01 15:01:44 -0700134 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True,
135 fastsync=False, reboot_cmd=None, **dargs):
mblighf2c33762008-10-18 14:42:34 +0000136 """
137 Reboot the remote host.
138
139 Args:
140 timeout - How long to wait for the reboot.
mblighf2c33762008-10-18 14:42:34 +0000141 wait - Should we wait to see if the machine comes back up.
Hidehiko Abe3d512d32017-04-27 15:11:33 +0900142 If this is set to True, ignores reboot_cmd's error
143 even if occurs.
mbligh2b949772009-02-26 00:59:36 +0000144 fastsync - Don't wait for the sync to complete, just start one
145 and move on. This is for cases where rebooting prompty
146 is more important than data integrity and/or the
147 machine may have disks that cause sync to never return.
mbligh959ed872009-04-17 22:18:25 +0000148 reboot_cmd - Reboot command to execute.
mblighf2c33762008-10-18 14:42:34 +0000149 """
Richard Barnetteab9769f2016-06-01 15:01:44 -0700150 self.reboot_setup(**dargs)
J. Richard Barnette9af19632015-09-25 12:18:03 -0700151 if not reboot_cmd:
152 reboot_cmd = ('sync & sleep 5; '
153 'reboot & sleep 60; '
154 'reboot -f & sleep 10; '
155 'reboot -nf & sleep 10; '
156 'telinit 6')
157
mblighf2c33762008-10-18 14:42:34 +0000158 def reboot():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700159 # pylint: disable=missing-docstring
mblighf2c33762008-10-18 14:42:34 +0000160 self.record("GOOD", None, "reboot.start")
Richard Barnette73b35172018-07-27 10:59:01 -0700161 current_boot_id = None
mblighf2c33762008-10-18 14:42:34 +0000162 try:
jadmanskic0354912010-01-12 15:57:29 +0000163 current_boot_id = self.get_boot_id()
164
jadmanskid544a352009-01-14 23:36:28 +0000165 # sync before starting the reboot, so that a long sync during
166 # shutdown isn't timed out by wait_down's short timeout
mbligh2b949772009-02-26 00:59:36 +0000167 if not fastsync:
mbligh959ed872009-04-17 22:18:25 +0000168 self.run('sync; sync', timeout=timeout, ignore_status=True)
jadmanskid544a352009-01-14 23:36:28 +0000169
J. Richard Barnette9af19632015-09-25 12:18:03 -0700170 self.run_background(reboot_cmd)
mblighf2c33762008-10-18 14:42:34 +0000171 except error.AutoservRunError:
Hidehiko Abe3d512d32017-04-27 15:11:33 +0900172 # If wait is set, ignore the error here, and rely on the
173 # wait_for_restart() for stability, instead.
174 # reboot_cmd sometimes causes an error even if reboot is
175 # successfully in progress. This is difficult to be avoided,
176 # because we have no much control on remote machine after
177 # "reboot" starts.
Richard Barnette73b35172018-07-27 10:59:01 -0700178 if not wait or current_boot_id is None:
Hidehiko Abe3d512d32017-04-27 15:11:33 +0900179 # TODO(b/37652392): Revisit no-wait case, later.
180 self.record("ABORT", None, "reboot.start",
181 "reboot command failed")
182 raise
mblighf2c33762008-10-18 14:42:34 +0000183 if wait:
jadmanskic0354912010-01-12 15:57:29 +0000184 self.wait_for_restart(timeout, old_boot_id=current_boot_id,
185 **dargs)
mblighf2c33762008-10-18 14:42:34 +0000186
187 # if this is a full reboot-and-wait, run the reboot inside a group
188 if wait:
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700189 self.log_op(self.OP_REBOOT, reboot)
mblighf2c33762008-10-18 14:42:34 +0000190 else:
191 reboot()
192
Ravi Chandra Sadineni812e61b2018-07-09 11:16:50 -0700193 def suspend(self, timeout, suspend_cmd,
194 allow_early_resume=False):
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700195 """
196 Suspend the remote host.
197
198 Args:
Ravi Chandra Sadineni812e61b2018-07-09 11:16:50 -0700199 timeout - How long to wait for the suspend in integer seconds.
200 suspend_cmd - suspend command to execute.
201 allow_early_resume - Boolean that indicate whether resume
202 before |timeout| is ok.
203 Raises:
204 error.AutoservSuspendError - If |allow_early_resume| is False
205 and if device resumes before
206 |timeout|.
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700207 """
208 # define a function for the supend and run it in a group
209 def suspend():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700210 # pylint: disable=missing-docstring
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700211 self.record("GOOD", None, "suspend.start for %d seconds" % (timeout))
212 try:
J. Richard Barnette9af19632015-09-25 12:18:03 -0700213 self.run_background(suspend_cmd)
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700214 except error.AutoservRunError:
215 self.record("ABORT", None, "suspend.start",
216 "suspend command failed")
217 raise error.AutoservSuspendError("suspend command failed")
218
219 # Wait for some time, to ensure the machine is going to sleep.
220 # Not too long to check if the machine really suspended.
221 time_slice = min(timeout / 2, 300)
222 time.sleep(time_slice)
223 time_counter = time_slice
224 while time_counter < timeout + 60:
225 # Check if the machine is back. We check regularely to
226 # ensure the machine was suspended long enough.
227 if utils.ping(self.hostname, tries=1, deadline=1) == 0:
228 return
229 else:
230 if time_counter > timeout - 10:
231 time_slice = 5
232 time.sleep(time_slice)
233 time_counter += time_slice
234
235 if utils.ping(self.hostname, tries=1, deadline=1) != 0:
236 raise error.AutoservSuspendError(
237 "DUT is not responding after %d seconds" % (time_counter))
238
239 start_time = time.time()
240 self.log_op(self.OP_SUSPEND, suspend)
241 lasted = time.time() - start_time
Ravi Chandra Sadineni812e61b2018-07-09 11:16:50 -0700242 logging.info("Device resumed after %d secs", lasted)
243 if (lasted < timeout and not allow_early_resume):
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700244 raise error.AutoservSuspendError(
245 "Suspend did not last long enough: %d instead of %d" % (
246 lasted, timeout))
mblighf2c33762008-10-18 14:42:34 +0000247
jadmanski4f909252008-12-01 20:47:10 +0000248 def reboot_followup(self, *args, **dargs):
Richard Barnette73b35172018-07-27 10:59:01 -0700249 # pylint: disable=missing-docstring
jadmanski4f909252008-12-01 20:47:10 +0000250 super(RemoteHost, self).reboot_followup(*args, **dargs)
251 if self.job:
252 self.job.profilers.handle_reboot(self)
253
254
jadmanskid778ae42009-01-07 15:07:36 +0000255 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs):
jadmanskid60321a2008-10-28 20:32:05 +0000256 """
257 Wait for the host to come back from a reboot. This wraps the
258 generic wait_for_restart implementation in a reboot group.
259 """
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700260 def op_func():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700261 # pylint: disable=missing-docstring
jadmanskid778ae42009-01-07 15:07:36 +0000262 super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs)
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700263 self.log_op(self.OP_REBOOT, op_func)
mblighf2c33762008-10-18 14:42:34 +0000264
265
Gregory Nisbetec615d62020-12-11 17:59:20 +0000266 def cleanup(self):
Richard Barnette73b35172018-07-27 10:59:01 -0700267 # pylint: disable=missing-docstring
mbligh1264b512008-11-05 22:21:49 +0000268 super(RemoteHost, self).cleanup()
Gregory Nisbetec615d62020-12-11 17:59:20 +0000269 self.reboot()
mbligh1264b512008-11-05 22:21:49 +0000270
271
mblighe48bcfb2008-11-11 17:09:44 +0000272 def get_tmp_dir(self, parent='/tmp'):
mblighf2c33762008-10-18 14:42:34 +0000273 """
274 Return the pathname of a directory on the host suitable
275 for temporary file storage.
276
277 The directory and its content will be deleted automatically
278 on the destruction of the Host object that was used to obtain
279 it.
280 """
Alex Khouderchahc44e7772018-07-16 10:53:14 -0700281 template = os.path.join(parent, self.TMP_DIR_TEMPLATE)
Derek Beckett24c0e822020-06-10 12:54:04 -0700282 dir_name = self.run('mkdir -p %s && mktemp -d %s' % (parent, template)).stdout.rstrip()
mblighf2c33762008-10-18 14:42:34 +0000283 self.tmp_dirs.append(dir_name)
284 return dir_name
285
286
mbligh6b95b522010-02-19 19:17:41 +0000287 def get_platform_label(self):
288 """
289 Return the platform label, or None if platform label is not set.
290 """
291
292 if self.job:
293 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
294 self.hostname)
295 keyvals = utils.read_keyval(keyval_path)
296 return keyvals.get('platform', None)
297 else:
298 return None
299
300
Eric Li6f27d4f2010-09-29 10:55:17 -0700301 def get_all_labels(self):
302 """
303 Return all labels, or empty list if label is not set.
304 """
305 if self.job:
306 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
307 self.hostname)
308 keyvals = utils.read_keyval(keyval_path)
309 all_labels = keyvals.get('labels', '')
310 if all_labels:
Eric Li861b2d52011-02-04 14:50:35 -0800311 all_labels = all_labels.split(',')
Derek Beckettf73baca2020-08-19 15:08:47 -0700312 return [urllib.parse.unquote(label) for label in all_labels]
Eric Li6f27d4f2010-09-29 10:55:17 -0700313 return []
314
315
jadmanskiea455662009-03-25 22:25:39 +0000316 def delete_tmp_dir(self, tmpdir):
317 """
318 Delete the given temporary directory on the remote machine.
Richard Barnetteab9769f2016-06-01 15:01:44 -0700319
320 @param tmpdir The directory to delete.
jadmanskiea455662009-03-25 22:25:39 +0000321 """
322 self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True)
323 self.tmp_dirs.remove(tmpdir)
324
325
Alex Khouderchahc44e7772018-07-16 10:53:14 -0700326 def delete_all_tmp_dirs(self, parent='/tmp'):
327 """
328 Delete all directories in parent that were created by get_tmp_dir
329
330 Note that this may involve deleting directories created by calls to
331 get_tmp_dir on a different RemoteHost instance than the one running this
332 method. Only perform this operation when certain that this will not
333 cause unexpected behavior.
334 """
335 # follow mktemp's behavior of only expanding 3 or more consecutive Xs
Derek Beckett24c0e822020-06-10 12:54:04 -0700336 if isinstance(parent, (list, tuple)):
337 parents = parent
338 else:
339 parents = [parent]
340 rm_paths = []
341 for parent in parents:
342 base_template = re.sub('XXXX*', '*', self.TMP_DIR_TEMPLATE)
343 # distinguish between non-wildcard asterisks in parent directory name
344 # and wildcards inserted from the template
Derek Beckettf73baca2020-08-19 15:08:47 -0700345 base = '*'.join(
346 ['"%s"' % utils.sh_escape(x) for x in base_template.split('*')])
Derek Beckett24c0e822020-06-10 12:54:04 -0700347 path = '"%s' % os.path.join(utils.sh_escape(parent), base[1:])
348 rm_paths.append(path)
349 # remove deleted directories from tmp_dirs
350 regex = os.path.join(parent, re.sub('(XXXX*)',
351 lambda match: '[a-zA-Z0-9]{%d}' % len(match.group(1)),
352 self.TMP_DIR_TEMPLATE))
353 regex += '(/|$)' # remove if matches, or is within a dir that matches
Derek Beckettf73baca2020-08-19 15:08:47 -0700354 self.tmp_dirs = [x for x in self.tmp_dirs if not re.match(regex, x)]
Alex Khouderchahc44e7772018-07-16 10:53:14 -0700355
Derek Beckett24c0e822020-06-10 12:54:04 -0700356 self.run('rm -rf {}'.format(" ".join(rm_paths)), ignore_status=True)
Alex Khouderchahc44e7772018-07-16 10:53:14 -0700357
mblighf2c33762008-10-18 14:42:34 +0000358 def check_uptime(self):
359 """
360 Check that uptime is available and monotonically increasing.
361 """
mbligha43f6d22009-08-24 22:09:44 +0000362 if not self.is_up():
363 raise error.AutoservHostError('Client does not appear to be up')
mblighf2c33762008-10-18 14:42:34 +0000364 result = self.run("/bin/cat /proc/uptime", 30)
365 return result.stdout.strip().split()[0]
366
367
Puthikorn Voravootivatdf29d642014-04-25 11:45:36 -0700368 def check_for_lkdtm(self):
369 """
370 Check for kernel dump test module. return True if exist.
371 """
372 cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT'
373 return self.run(cmd, ignore_status=True).exit_status == 0
374
375
jadmanskica7da372008-10-21 16:26:52 +0000376 def are_wait_up_processes_up(self):
mblighf2c33762008-10-18 14:42:34 +0000377 """
378 Checks if any HOSTS waitup processes are running yet on the
379 remote host.
380
381 Returns True if any the waitup processes are running, False
382 otherwise.
383 """
384 processes = self.get_wait_up_processes()
385 if len(processes) == 0:
386 return True # wait up processes aren't being used
387 for procname in processes:
388 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
389 ignore_status=True).exit_status
390 if exit_status == 0:
391 return True
392 return False
Kevin Cheng3a4a57a2015-09-30 12:09:50 -0700393
394
395 def get_labels(self):
396 """Return a list of labels for this given host.
397
398 This is the main way to retrieve all the automatic labels for a host
399 as it will run through all the currently implemented label functions.
400 """
401 labels = []
402 for label_function in self._LABEL_FUNCTIONS:
403 try:
404 label = label_function(self)
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700405 except Exception:
406 logging.exception('Label function %s failed; ignoring it.',
407 label_function.__name__)
Kevin Cheng3a4a57a2015-09-30 12:09:50 -0700408 label = None
409 if label:
410 if type(label) is str:
411 labels.append(label)
412 elif type(label) is list:
413 labels.extend(label)
414 return labels
Otabek Kasimov480e7fa2020-11-23 18:52:23 -0800415
416 def get_result_dir(self):
417 """Return the result directory path if passed or None if not.
418
419 @return string
420 """
421 if self.job and hasattr(self.job, 'resultdir'):
422 return self.job.resultdir
423 return None