blob: d49d145485a99a002aea8b20ec3b9cae9aad504c [file] [log] [blame]
Derek Beckettf73baca2020-08-19 15:08:47 -07001# Lint as: python2, python3
Fang Deng0ca40e22013-08-27 17:47:44 -07002"""This class defines the Remote host class."""
mbligh321b1f52008-04-09 16:23:43 +00003
Derek Beckettf73baca2020-08-19 15:08:47 -07004from __future__ import absolute_import
5from __future__ import division
6from __future__ import print_function
7import os, logging, time
8import six
9from six.moves import urllib
Alex Khouderchahc44e7772018-07-16 10:53:14 -070010import re
Derek Beckettfccbb622021-02-08 16:44:53 -080011
12import common
13
mblighf2c33762008-10-18 14:42:34 +000014from autotest_lib.client.common_lib import error
Derek Beckettfccbb622021-02-08 16:44:53 -080015from autotest_lib.client.common_lib.global_config import global_config
jadmanski96b78072009-05-21 22:21:04 +000016from autotest_lib.server import utils
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -070017from autotest_lib.server.hosts import base_classes
Derek Beckettfccbb622021-02-08 16:44:53 -080018from autotest_lib.server.hosts.tls_client.connection import TLSCConnection
mbligh321b1f52008-04-09 16:23:43 +000019
20
jadmanski1c5e3a12008-08-15 23:08:20 +000021class RemoteHost(base_classes.Host):
jadmanskid60321a2008-10-28 20:32:05 +000022 """
23 This class represents a remote machine on which you can run
jadmanski0afbb632008-06-06 21:10:57 +000024 programs.
mbligh321b1f52008-04-09 16:23:43 +000025
jadmanski0afbb632008-06-06 21:10:57 +000026 It may be accessed through a network, a serial line, ...
27 It is not the machine autoserv is running on.
mbligh321b1f52008-04-09 16:23:43 +000028
jadmanski0afbb632008-06-06 21:10:57 +000029 Implementation details:
30 This is an abstract class, leaf subclasses must implement the methods
31 listed here and in parent classes which have no implementation. They
32 may reimplement methods which already have an implementation. You
33 must not instantiate this class but should instantiate one of those
jadmanskid60321a2008-10-28 20:32:05 +000034 leaf subclasses.
35 """
mbligh321b1f52008-04-09 16:23:43 +000036
mblighf2c33762008-10-18 14:42:34 +000037 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
Eric Li6f27d4f2010-09-29 10:55:17 -070038 DEFAULT_HALT_TIMEOUT = 2 * 60
Kevin Cheng3a4a57a2015-09-30 12:09:50 -070039 _LABEL_FUNCTIONS = []
40 _DETECTABLE_LABELS = []
mblighf2c33762008-10-18 14:42:34 +000041
Eric Li861b2d52011-02-04 14:50:35 -080042 VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start"
Mike Frysingerb718b032019-07-01 07:55:44 -040043 TMP_DIR_TEMPLATE = '/usr/local/tmp/autoserv-XXXXXX'
jadmanski4900b3b2009-07-02 22:12:08 +000044
Kevin Cheng3a4a57a2015-09-30 12:09:50 -070045
jadmanskif6562912008-10-21 17:59:01 +000046 def _initialize(self, hostname, autodir=None, *args, **dargs):
47 super(RemoteHost, self)._initialize(*args, **dargs)
mbligh321b1f52008-04-09 16:23:43 +000048
jadmanski1c5e3a12008-08-15 23:08:20 +000049 self.hostname = hostname
mblighf2c33762008-10-18 14:42:34 +000050 self.autodir = autodir
51 self.tmp_dirs = []
jadmanskia2db9412008-08-22 21:47:24 +000052
Derek Beckettfccbb622021-02-08 16:44:53 -080053 get_value = global_config.get_config_value
54
55 self.tls_connection = None
56 if get_value('AUTOSERV', 'enable_tls', type=bool, default=False):
57 self.tls_connection = TLSCConnection()
jadmanskia2db9412008-08-22 21:47:24 +000058
jadmanskiedf33e02009-05-22 16:47:27 +000059 def __repr__(self):
60 return "<remote host: %s>" % self.hostname
61
62
jadmanski53aaf382008-11-17 16:22:31 +000063 def close(self):
Richard Barnette73b35172018-07-27 10:59:01 -070064 # pylint: disable=missing-docstring
jadmanski53aaf382008-11-17 16:22:31 +000065 super(RemoteHost, self).close()
mblighf2c33762008-10-18 14:42:34 +000066 self.stop_loggers()
67
68 if hasattr(self, 'tmp_dirs'):
69 for dir in self.tmp_dirs:
70 try:
Allen Liad719c12017-06-27 23:48:04 +000071 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
mblighf2c33762008-10-18 14:42:34 +000072 except error.AutoservRunError:
73 pass
Derek Beckettfccbb622021-02-08 16:44:53 -080074 if self.tls_connection:
75 self.tls_connection.close()
76 self.tls_connection = None
mblighf2c33762008-10-18 14:42:34 +000077
jadmanskid60321a2008-10-28 20:32:05 +000078 def job_start(self):
79 """
80 Abstract method, called the first time a remote host object
81 is created for a specific host after a job starts.
82
83 This method depends on the create_host factory being used to
84 construct your host object. If you directly construct host objects
85 you will need to call this method yourself (and enforce the
86 single-call rule).
87 """
jadmanski4900b3b2009-07-02 22:12:08 +000088 try:
Andrey Ulanovad472902016-01-11 17:31:18 -080089 cmd = ('test ! -e /var/log/messages || cp -f /var/log/messages '
90 '%s') % self.VAR_LOG_MESSAGES_COPY_PATH
Allen Liad719c12017-06-27 23:48:04 +000091 self.run(cmd)
Derek Beckettf73baca2020-08-19 15:08:47 -070092 except Exception as e:
jadmanski4900b3b2009-07-02 22:12:08 +000093 # Non-fatal error
94 logging.info('Failed to copy /var/log/messages at startup: %s', e)
jadmanskid60321a2008-10-28 20:32:05 +000095
96
mblighf2c33762008-10-18 14:42:34 +000097 def get_autodir(self):
98 return self.autodir
99
100
101 def set_autodir(self, autodir):
jadmanskid60321a2008-10-28 20:32:05 +0000102 """
mblighf2c33762008-10-18 14:42:34 +0000103 This method is called to make the host object aware of the
104 where autotest is installed. Called in server/autotest.py
105 after a successful install
jadmanskid60321a2008-10-28 20:32:05 +0000106 """
mblighf2c33762008-10-18 14:42:34 +0000107 self.autodir = autodir
108
109
110 def sysrq_reboot(self):
Richard Barnette73b35172018-07-27 10:59:01 -0700111 # pylint: disable=missing-docstring
J. Richard Barnette9af19632015-09-25 12:18:03 -0700112 self.run_background('echo b > /proc/sysrq-trigger')
mblighf2c33762008-10-18 14:42:34 +0000113
114
Eric Li6f27d4f2010-09-29 10:55:17 -0700115 def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True):
Richard Barnetteab9769f2016-06-01 15:01:44 -0700116 """
117 Shut down the remote host.
118
119 N.B. This method makes no provision to bring the target back
120 up. The target will be offline indefinitely if there's no
121 independent hardware (servo, RPM, etc.) to force the target to
122 power on.
123
124 @param timeout Maximum time to wait for host down, in seconds.
125 @param wait Whether to wait for the host to go offline.
126 """
J. Richard Barnette9af19632015-09-25 12:18:03 -0700127 self.run_background('sleep 1 ; halt')
Eric Li6f27d4f2010-09-29 10:55:17 -0700128 if wait:
129 self.wait_down(timeout=timeout)
130
131
Richard Barnetteab9769f2016-06-01 15:01:44 -0700132 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True,
133 fastsync=False, reboot_cmd=None, **dargs):
mblighf2c33762008-10-18 14:42:34 +0000134 """
135 Reboot the remote host.
136
137 Args:
138 timeout - How long to wait for the reboot.
mblighf2c33762008-10-18 14:42:34 +0000139 wait - Should we wait to see if the machine comes back up.
Hidehiko Abe3d512d32017-04-27 15:11:33 +0900140 If this is set to True, ignores reboot_cmd's error
141 even if occurs.
mbligh2b949772009-02-26 00:59:36 +0000142 fastsync - Don't wait for the sync to complete, just start one
143 and move on. This is for cases where rebooting prompty
144 is more important than data integrity and/or the
145 machine may have disks that cause sync to never return.
mbligh959ed872009-04-17 22:18:25 +0000146 reboot_cmd - Reboot command to execute.
mblighf2c33762008-10-18 14:42:34 +0000147 """
Richard Barnetteab9769f2016-06-01 15:01:44 -0700148 self.reboot_setup(**dargs)
J. Richard Barnette9af19632015-09-25 12:18:03 -0700149 if not reboot_cmd:
150 reboot_cmd = ('sync & sleep 5; '
151 'reboot & sleep 60; '
152 'reboot -f & sleep 10; '
153 'reboot -nf & sleep 10; '
154 'telinit 6')
155
mblighf2c33762008-10-18 14:42:34 +0000156 def reboot():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700157 # pylint: disable=missing-docstring
mblighf2c33762008-10-18 14:42:34 +0000158 self.record("GOOD", None, "reboot.start")
Richard Barnette73b35172018-07-27 10:59:01 -0700159 current_boot_id = None
mblighf2c33762008-10-18 14:42:34 +0000160 try:
jadmanskic0354912010-01-12 15:57:29 +0000161 current_boot_id = self.get_boot_id()
162
jadmanskid544a352009-01-14 23:36:28 +0000163 # sync before starting the reboot, so that a long sync during
164 # shutdown isn't timed out by wait_down's short timeout
mbligh2b949772009-02-26 00:59:36 +0000165 if not fastsync:
mbligh959ed872009-04-17 22:18:25 +0000166 self.run('sync; sync', timeout=timeout, ignore_status=True)
jadmanskid544a352009-01-14 23:36:28 +0000167
J. Richard Barnette9af19632015-09-25 12:18:03 -0700168 self.run_background(reboot_cmd)
mblighf2c33762008-10-18 14:42:34 +0000169 except error.AutoservRunError:
Hidehiko Abe3d512d32017-04-27 15:11:33 +0900170 # If wait is set, ignore the error here, and rely on the
171 # wait_for_restart() for stability, instead.
172 # reboot_cmd sometimes causes an error even if reboot is
173 # successfully in progress. This is difficult to be avoided,
174 # because we have no much control on remote machine after
175 # "reboot" starts.
Richard Barnette73b35172018-07-27 10:59:01 -0700176 if not wait or current_boot_id is None:
Hidehiko Abe3d512d32017-04-27 15:11:33 +0900177 # TODO(b/37652392): Revisit no-wait case, later.
178 self.record("ABORT", None, "reboot.start",
179 "reboot command failed")
180 raise
mblighf2c33762008-10-18 14:42:34 +0000181 if wait:
jadmanskic0354912010-01-12 15:57:29 +0000182 self.wait_for_restart(timeout, old_boot_id=current_boot_id,
183 **dargs)
mblighf2c33762008-10-18 14:42:34 +0000184
185 # if this is a full reboot-and-wait, run the reboot inside a group
186 if wait:
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700187 self.log_op(self.OP_REBOOT, reboot)
mblighf2c33762008-10-18 14:42:34 +0000188 else:
189 reboot()
190
Ravi Chandra Sadineni812e61b2018-07-09 11:16:50 -0700191 def suspend(self, timeout, suspend_cmd,
192 allow_early_resume=False):
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700193 """
194 Suspend the remote host.
195
196 Args:
Ravi Chandra Sadineni812e61b2018-07-09 11:16:50 -0700197 timeout - How long to wait for the suspend in integer seconds.
198 suspend_cmd - suspend command to execute.
199 allow_early_resume - Boolean that indicate whether resume
200 before |timeout| is ok.
201 Raises:
202 error.AutoservSuspendError - If |allow_early_resume| is False
203 and if device resumes before
204 |timeout|.
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700205 """
206 # define a function for the supend and run it in a group
207 def suspend():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700208 # pylint: disable=missing-docstring
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700209 self.record("GOOD", None, "suspend.start for %d seconds" % (timeout))
210 try:
J. Richard Barnette9af19632015-09-25 12:18:03 -0700211 self.run_background(suspend_cmd)
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700212 except error.AutoservRunError:
213 self.record("ABORT", None, "suspend.start",
214 "suspend command failed")
215 raise error.AutoservSuspendError("suspend command failed")
216
217 # Wait for some time, to ensure the machine is going to sleep.
218 # Not too long to check if the machine really suspended.
219 time_slice = min(timeout / 2, 300)
220 time.sleep(time_slice)
221 time_counter = time_slice
222 while time_counter < timeout + 60:
223 # Check if the machine is back. We check regularely to
224 # ensure the machine was suspended long enough.
225 if utils.ping(self.hostname, tries=1, deadline=1) == 0:
226 return
227 else:
228 if time_counter > timeout - 10:
229 time_slice = 5
230 time.sleep(time_slice)
231 time_counter += time_slice
232
233 if utils.ping(self.hostname, tries=1, deadline=1) != 0:
234 raise error.AutoservSuspendError(
235 "DUT is not responding after %d seconds" % (time_counter))
236
237 start_time = time.time()
238 self.log_op(self.OP_SUSPEND, suspend)
239 lasted = time.time() - start_time
Ravi Chandra Sadineni812e61b2018-07-09 11:16:50 -0700240 logging.info("Device resumed after %d secs", lasted)
241 if (lasted < timeout and not allow_early_resume):
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700242 raise error.AutoservSuspendError(
243 "Suspend did not last long enough: %d instead of %d" % (
244 lasted, timeout))
mblighf2c33762008-10-18 14:42:34 +0000245
jadmanski4f909252008-12-01 20:47:10 +0000246 def reboot_followup(self, *args, **dargs):
Richard Barnette73b35172018-07-27 10:59:01 -0700247 # pylint: disable=missing-docstring
jadmanski4f909252008-12-01 20:47:10 +0000248 super(RemoteHost, self).reboot_followup(*args, **dargs)
249 if self.job:
250 self.job.profilers.handle_reboot(self)
251
252
jadmanskid778ae42009-01-07 15:07:36 +0000253 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs):
jadmanskid60321a2008-10-28 20:32:05 +0000254 """
255 Wait for the host to come back from a reboot. This wraps the
256 generic wait_for_restart implementation in a reboot group.
257 """
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700258 def op_func():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700259 # pylint: disable=missing-docstring
jadmanskid778ae42009-01-07 15:07:36 +0000260 super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs)
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700261 self.log_op(self.OP_REBOOT, op_func)
mblighf2c33762008-10-18 14:42:34 +0000262
263
Gregory Nisbetec615d62020-12-11 17:59:20 +0000264 def cleanup(self):
Richard Barnette73b35172018-07-27 10:59:01 -0700265 # pylint: disable=missing-docstring
mbligh1264b512008-11-05 22:21:49 +0000266 super(RemoteHost, self).cleanup()
Gregory Nisbetec615d62020-12-11 17:59:20 +0000267 self.reboot()
mbligh1264b512008-11-05 22:21:49 +0000268
269
mblighe48bcfb2008-11-11 17:09:44 +0000270 def get_tmp_dir(self, parent='/tmp'):
mblighf2c33762008-10-18 14:42:34 +0000271 """
272 Return the pathname of a directory on the host suitable
273 for temporary file storage.
274
275 The directory and its content will be deleted automatically
276 on the destruction of the Host object that was used to obtain
277 it.
278 """
Alex Khouderchahc44e7772018-07-16 10:53:14 -0700279 template = os.path.join(parent, self.TMP_DIR_TEMPLATE)
Derek Beckett24c0e822020-06-10 12:54:04 -0700280 dir_name = self.run('mkdir -p %s && mktemp -d %s' % (parent, template)).stdout.rstrip()
mblighf2c33762008-10-18 14:42:34 +0000281 self.tmp_dirs.append(dir_name)
282 return dir_name
283
284
mbligh6b95b522010-02-19 19:17:41 +0000285 def get_platform_label(self):
286 """
287 Return the platform label, or None if platform label is not set.
288 """
289
290 if self.job:
291 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
292 self.hostname)
293 keyvals = utils.read_keyval(keyval_path)
294 return keyvals.get('platform', None)
295 else:
296 return None
297
298
Eric Li6f27d4f2010-09-29 10:55:17 -0700299 def get_all_labels(self):
300 """
301 Return all labels, or empty list if label is not set.
302 """
303 if self.job:
304 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
305 self.hostname)
306 keyvals = utils.read_keyval(keyval_path)
307 all_labels = keyvals.get('labels', '')
308 if all_labels:
Eric Li861b2d52011-02-04 14:50:35 -0800309 all_labels = all_labels.split(',')
Derek Beckettf73baca2020-08-19 15:08:47 -0700310 return [urllib.parse.unquote(label) for label in all_labels]
Eric Li6f27d4f2010-09-29 10:55:17 -0700311 return []
312
313
jadmanskiea455662009-03-25 22:25:39 +0000314 def delete_tmp_dir(self, tmpdir):
315 """
316 Delete the given temporary directory on the remote machine.
Richard Barnetteab9769f2016-06-01 15:01:44 -0700317
318 @param tmpdir The directory to delete.
jadmanskiea455662009-03-25 22:25:39 +0000319 """
320 self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True)
321 self.tmp_dirs.remove(tmpdir)
322
323
Alex Khouderchahc44e7772018-07-16 10:53:14 -0700324 def delete_all_tmp_dirs(self, parent='/tmp'):
325 """
326 Delete all directories in parent that were created by get_tmp_dir
327
328 Note that this may involve deleting directories created by calls to
329 get_tmp_dir on a different RemoteHost instance than the one running this
330 method. Only perform this operation when certain that this will not
331 cause unexpected behavior.
332 """
333 # follow mktemp's behavior of only expanding 3 or more consecutive Xs
Derek Beckett24c0e822020-06-10 12:54:04 -0700334 if isinstance(parent, (list, tuple)):
335 parents = parent
336 else:
337 parents = [parent]
338 rm_paths = []
339 for parent in parents:
340 base_template = re.sub('XXXX*', '*', self.TMP_DIR_TEMPLATE)
341 # distinguish between non-wildcard asterisks in parent directory name
342 # and wildcards inserted from the template
Derek Beckettf73baca2020-08-19 15:08:47 -0700343 base = '*'.join(
344 ['"%s"' % utils.sh_escape(x) for x in base_template.split('*')])
Derek Beckett24c0e822020-06-10 12:54:04 -0700345 path = '"%s' % os.path.join(utils.sh_escape(parent), base[1:])
346 rm_paths.append(path)
347 # remove deleted directories from tmp_dirs
348 regex = os.path.join(parent, re.sub('(XXXX*)',
349 lambda match: '[a-zA-Z0-9]{%d}' % len(match.group(1)),
350 self.TMP_DIR_TEMPLATE))
351 regex += '(/|$)' # remove if matches, or is within a dir that matches
Derek Beckettf73baca2020-08-19 15:08:47 -0700352 self.tmp_dirs = [x for x in self.tmp_dirs if not re.match(regex, x)]
Alex Khouderchahc44e7772018-07-16 10:53:14 -0700353
Derek Beckett24c0e822020-06-10 12:54:04 -0700354 self.run('rm -rf {}'.format(" ".join(rm_paths)), ignore_status=True)
Alex Khouderchahc44e7772018-07-16 10:53:14 -0700355
mblighf2c33762008-10-18 14:42:34 +0000356 def check_uptime(self):
357 """
358 Check that uptime is available and monotonically increasing.
359 """
mbligha43f6d22009-08-24 22:09:44 +0000360 if not self.is_up():
361 raise error.AutoservHostError('Client does not appear to be up')
mblighf2c33762008-10-18 14:42:34 +0000362 result = self.run("/bin/cat /proc/uptime", 30)
363 return result.stdout.strip().split()[0]
364
365
Puthikorn Voravootivatdf29d642014-04-25 11:45:36 -0700366 def check_for_lkdtm(self):
367 """
368 Check for kernel dump test module. return True if exist.
369 """
370 cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT'
371 return self.run(cmd, ignore_status=True).exit_status == 0
372
373
jadmanskica7da372008-10-21 16:26:52 +0000374 def are_wait_up_processes_up(self):
mblighf2c33762008-10-18 14:42:34 +0000375 """
376 Checks if any HOSTS waitup processes are running yet on the
377 remote host.
378
379 Returns True if any the waitup processes are running, False
380 otherwise.
381 """
382 processes = self.get_wait_up_processes()
383 if len(processes) == 0:
384 return True # wait up processes aren't being used
385 for procname in processes:
386 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
387 ignore_status=True).exit_status
388 if exit_status == 0:
389 return True
390 return False
Kevin Cheng3a4a57a2015-09-30 12:09:50 -0700391
392
393 def get_labels(self):
394 """Return a list of labels for this given host.
395
396 This is the main way to retrieve all the automatic labels for a host
397 as it will run through all the currently implemented label functions.
398 """
399 labels = []
400 for label_function in self._LABEL_FUNCTIONS:
401 try:
402 label = label_function(self)
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700403 except Exception:
404 logging.exception('Label function %s failed; ignoring it.',
405 label_function.__name__)
Kevin Cheng3a4a57a2015-09-30 12:09:50 -0700406 label = None
407 if label:
408 if type(label) is str:
409 labels.append(label)
410 elif type(label) is list:
411 labels.extend(label)
412 return labels
Otabek Kasimov480e7fa2020-11-23 18:52:23 -0800413
414 def get_result_dir(self):
415 """Return the result directory path if passed or None if not.
416
417 @return string
418 """
419 if self.job and hasattr(self.job, 'resultdir'):
420 return self.job.resultdir
421 return None