blob: 6df4192c5c5b3f63b4337b172f56184beae187d2 [file] [log] [blame]
Derek Beckettf73baca2020-08-19 15:08:47 -07001# Lint as: python2, python3
Fang Deng0ca40e22013-08-27 17:47:44 -07002"""This class defines the Remote host class."""
mbligh321b1f52008-04-09 16:23:43 +00003
Derek Beckettf73baca2020-08-19 15:08:47 -07004from __future__ import absolute_import
5from __future__ import division
6from __future__ import print_function
Derek Becketta20b12a2021-08-27 15:04:44 -07007
Derek Beckettf73baca2020-08-19 15:08:47 -07008import os, logging, time
Alex Khouderchahc44e7772018-07-16 10:53:14 -07009import re
Derek Becketta20b12a2021-08-27 15:04:44 -070010from six.moves import urllib
Derek Beckettfccbb622021-02-08 16:44:53 -080011
12import common
13
mblighf2c33762008-10-18 14:42:34 +000014from autotest_lib.client.common_lib import error
Derek Beckettfccbb622021-02-08 16:44:53 -080015from autotest_lib.client.common_lib.global_config import global_config
jadmanski96b78072009-05-21 22:21:04 +000016from autotest_lib.server import utils
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -070017from autotest_lib.server.hosts import base_classes
mbligh321b1f52008-04-09 16:23:43 +000018
19
jadmanski1c5e3a12008-08-15 23:08:20 +000020class RemoteHost(base_classes.Host):
jadmanskid60321a2008-10-28 20:32:05 +000021 """
22 This class represents a remote machine on which you can run
jadmanski0afbb632008-06-06 21:10:57 +000023 programs.
mbligh321b1f52008-04-09 16:23:43 +000024
jadmanski0afbb632008-06-06 21:10:57 +000025 It may be accessed through a network, a serial line, ...
26 It is not the machine autoserv is running on.
mbligh321b1f52008-04-09 16:23:43 +000027
jadmanski0afbb632008-06-06 21:10:57 +000028 Implementation details:
29 This is an abstract class, leaf subclasses must implement the methods
30 listed here and in parent classes which have no implementation. They
31 may reimplement methods which already have an implementation. You
32 must not instantiate this class but should instantiate one of those
jadmanskid60321a2008-10-28 20:32:05 +000033 leaf subclasses.
34 """
mbligh321b1f52008-04-09 16:23:43 +000035
mblighf2c33762008-10-18 14:42:34 +000036 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
Eric Li6f27d4f2010-09-29 10:55:17 -070037 DEFAULT_HALT_TIMEOUT = 2 * 60
Kevin Cheng3a4a57a2015-09-30 12:09:50 -070038 _LABEL_FUNCTIONS = []
39 _DETECTABLE_LABELS = []
mblighf2c33762008-10-18 14:42:34 +000040
Eric Li861b2d52011-02-04 14:50:35 -080041 VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start"
Mike Frysingerb718b032019-07-01 07:55:44 -040042 TMP_DIR_TEMPLATE = '/usr/local/tmp/autoserv-XXXXXX'
jadmanski4900b3b2009-07-02 22:12:08 +000043
Kevin Cheng3a4a57a2015-09-30 12:09:50 -070044
jadmanskif6562912008-10-21 17:59:01 +000045 def _initialize(self, hostname, autodir=None, *args, **dargs):
46 super(RemoteHost, self)._initialize(*args, **dargs)
mbligh321b1f52008-04-09 16:23:43 +000047
jadmanski1c5e3a12008-08-15 23:08:20 +000048 self.hostname = hostname
mblighf2c33762008-10-18 14:42:34 +000049 self.autodir = autodir
50 self.tmp_dirs = []
jadmanskia2db9412008-08-22 21:47:24 +000051
Derek Beckettfccbb622021-02-08 16:44:53 -080052 get_value = global_config.get_config_value
53
54 self.tls_connection = None
Derek Beckett3b9974a2021-03-18 11:16:00 -070055 try:
Derek Becketta20b12a2021-08-27 15:04:44 -070056 self.tls_connection = None
Derek Beckett3b9974a2021-03-18 11:16:00 -070057 except Exception as e:
58 logging.warning("Could not establish TLS connection %s", e)
jadmanskia2db9412008-08-22 21:47:24 +000059
jadmanskiedf33e02009-05-22 16:47:27 +000060 def __repr__(self):
61 return "<remote host: %s>" % self.hostname
62
63
jadmanski53aaf382008-11-17 16:22:31 +000064 def close(self):
Richard Barnette73b35172018-07-27 10:59:01 -070065 # pylint: disable=missing-docstring
jadmanski53aaf382008-11-17 16:22:31 +000066 super(RemoteHost, self).close()
mblighf2c33762008-10-18 14:42:34 +000067 self.stop_loggers()
68
69 if hasattr(self, 'tmp_dirs'):
70 for dir in self.tmp_dirs:
71 try:
Allen Liad719c12017-06-27 23:48:04 +000072 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
mblighf2c33762008-10-18 14:42:34 +000073 except error.AutoservRunError:
74 pass
Derek Beckettfccbb622021-02-08 16:44:53 -080075 if self.tls_connection:
76 self.tls_connection.close()
77 self.tls_connection = None
mblighf2c33762008-10-18 14:42:34 +000078
jadmanskid60321a2008-10-28 20:32:05 +000079 def job_start(self):
80 """
81 Abstract method, called the first time a remote host object
82 is created for a specific host after a job starts.
83
84 This method depends on the create_host factory being used to
85 construct your host object. If you directly construct host objects
86 you will need to call this method yourself (and enforce the
87 single-call rule).
88 """
jadmanski4900b3b2009-07-02 22:12:08 +000089 try:
Andrey Ulanovad472902016-01-11 17:31:18 -080090 cmd = ('test ! -e /var/log/messages || cp -f /var/log/messages '
91 '%s') % self.VAR_LOG_MESSAGES_COPY_PATH
Allen Liad719c12017-06-27 23:48:04 +000092 self.run(cmd)
Derek Beckettf73baca2020-08-19 15:08:47 -070093 except Exception as e:
jadmanski4900b3b2009-07-02 22:12:08 +000094 # Non-fatal error
95 logging.info('Failed to copy /var/log/messages at startup: %s', e)
jadmanskid60321a2008-10-28 20:32:05 +000096
97
mblighf2c33762008-10-18 14:42:34 +000098 def get_autodir(self):
99 return self.autodir
100
101
102 def set_autodir(self, autodir):
jadmanskid60321a2008-10-28 20:32:05 +0000103 """
mblighf2c33762008-10-18 14:42:34 +0000104 This method is called to make the host object aware of the
105 where autotest is installed. Called in server/autotest.py
106 after a successful install
jadmanskid60321a2008-10-28 20:32:05 +0000107 """
mblighf2c33762008-10-18 14:42:34 +0000108 self.autodir = autodir
109
110
111 def sysrq_reboot(self):
Richard Barnette73b35172018-07-27 10:59:01 -0700112 # pylint: disable=missing-docstring
J. Richard Barnette9af19632015-09-25 12:18:03 -0700113 self.run_background('echo b > /proc/sysrq-trigger')
mblighf2c33762008-10-18 14:42:34 +0000114
115
Eric Li6f27d4f2010-09-29 10:55:17 -0700116 def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True):
Richard Barnetteab9769f2016-06-01 15:01:44 -0700117 """
118 Shut down the remote host.
119
120 N.B. This method makes no provision to bring the target back
121 up. The target will be offline indefinitely if there's no
122 independent hardware (servo, RPM, etc.) to force the target to
123 power on.
124
125 @param timeout Maximum time to wait for host down, in seconds.
126 @param wait Whether to wait for the host to go offline.
127 """
J. Richard Barnette9af19632015-09-25 12:18:03 -0700128 self.run_background('sleep 1 ; halt')
Eric Li6f27d4f2010-09-29 10:55:17 -0700129 if wait:
130 self.wait_down(timeout=timeout)
131
132
Richard Barnetteab9769f2016-06-01 15:01:44 -0700133 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True,
134 fastsync=False, reboot_cmd=None, **dargs):
mblighf2c33762008-10-18 14:42:34 +0000135 """
136 Reboot the remote host.
137
138 Args:
139 timeout - How long to wait for the reboot.
mblighf2c33762008-10-18 14:42:34 +0000140 wait - Should we wait to see if the machine comes back up.
Hidehiko Abe3d512d32017-04-27 15:11:33 +0900141 If this is set to True, ignores reboot_cmd's error
142 even if occurs.
mbligh2b949772009-02-26 00:59:36 +0000143 fastsync - Don't wait for the sync to complete, just start one
144 and move on. This is for cases where rebooting prompty
145 is more important than data integrity and/or the
146 machine may have disks that cause sync to never return.
mbligh959ed872009-04-17 22:18:25 +0000147 reboot_cmd - Reboot command to execute.
mblighf2c33762008-10-18 14:42:34 +0000148 """
Richard Barnetteab9769f2016-06-01 15:01:44 -0700149 self.reboot_setup(**dargs)
J. Richard Barnette9af19632015-09-25 12:18:03 -0700150 if not reboot_cmd:
151 reboot_cmd = ('sync & sleep 5; '
152 'reboot & sleep 60; '
153 'reboot -f & sleep 10; '
154 'reboot -nf & sleep 10; '
155 'telinit 6')
156
mblighf2c33762008-10-18 14:42:34 +0000157 def reboot():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700158 # pylint: disable=missing-docstring
mblighf2c33762008-10-18 14:42:34 +0000159 self.record("GOOD", None, "reboot.start")
Richard Barnette73b35172018-07-27 10:59:01 -0700160 current_boot_id = None
mblighf2c33762008-10-18 14:42:34 +0000161 try:
jadmanskic0354912010-01-12 15:57:29 +0000162 current_boot_id = self.get_boot_id()
163
jadmanskid544a352009-01-14 23:36:28 +0000164 # sync before starting the reboot, so that a long sync during
165 # shutdown isn't timed out by wait_down's short timeout
mbligh2b949772009-02-26 00:59:36 +0000166 if not fastsync:
mbligh959ed872009-04-17 22:18:25 +0000167 self.run('sync; sync', timeout=timeout, ignore_status=True)
jadmanskid544a352009-01-14 23:36:28 +0000168
J. Richard Barnette9af19632015-09-25 12:18:03 -0700169 self.run_background(reboot_cmd)
mblighf2c33762008-10-18 14:42:34 +0000170 except error.AutoservRunError:
Hidehiko Abe3d512d32017-04-27 15:11:33 +0900171 # If wait is set, ignore the error here, and rely on the
172 # wait_for_restart() for stability, instead.
173 # reboot_cmd sometimes causes an error even if reboot is
174 # successfully in progress. This is difficult to be avoided,
175 # because we have no much control on remote machine after
176 # "reboot" starts.
Richard Barnette73b35172018-07-27 10:59:01 -0700177 if not wait or current_boot_id is None:
Hidehiko Abe3d512d32017-04-27 15:11:33 +0900178 # TODO(b/37652392): Revisit no-wait case, later.
179 self.record("ABORT", None, "reboot.start",
180 "reboot command failed")
181 raise
mblighf2c33762008-10-18 14:42:34 +0000182 if wait:
jadmanskic0354912010-01-12 15:57:29 +0000183 self.wait_for_restart(timeout, old_boot_id=current_boot_id,
184 **dargs)
mblighf2c33762008-10-18 14:42:34 +0000185
186 # if this is a full reboot-and-wait, run the reboot inside a group
187 if wait:
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700188 self.log_op(self.OP_REBOOT, reboot)
mblighf2c33762008-10-18 14:42:34 +0000189 else:
190 reboot()
191
Ravi Chandra Sadineni812e61b2018-07-09 11:16:50 -0700192 def suspend(self, timeout, suspend_cmd,
193 allow_early_resume=False):
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700194 """
195 Suspend the remote host.
196
197 Args:
Ravi Chandra Sadineni812e61b2018-07-09 11:16:50 -0700198 timeout - How long to wait for the suspend in integer seconds.
199 suspend_cmd - suspend command to execute.
200 allow_early_resume - Boolean that indicate whether resume
201 before |timeout| is ok.
202 Raises:
203 error.AutoservSuspendError - If |allow_early_resume| is False
204 and if device resumes before
205 |timeout|.
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700206 """
207 # define a function for the supend and run it in a group
208 def suspend():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700209 # pylint: disable=missing-docstring
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700210 self.record("GOOD", None, "suspend.start for %d seconds" % (timeout))
211 try:
J. Richard Barnette9af19632015-09-25 12:18:03 -0700212 self.run_background(suspend_cmd)
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700213 except error.AutoservRunError:
214 self.record("ABORT", None, "suspend.start",
215 "suspend command failed")
216 raise error.AutoservSuspendError("suspend command failed")
217
218 # Wait for some time, to ensure the machine is going to sleep.
219 # Not too long to check if the machine really suspended.
220 time_slice = min(timeout / 2, 300)
221 time.sleep(time_slice)
222 time_counter = time_slice
223 while time_counter < timeout + 60:
224 # Check if the machine is back. We check regularely to
225 # ensure the machine was suspended long enough.
226 if utils.ping(self.hostname, tries=1, deadline=1) == 0:
227 return
228 else:
229 if time_counter > timeout - 10:
230 time_slice = 5
231 time.sleep(time_slice)
232 time_counter += time_slice
233
234 if utils.ping(self.hostname, tries=1, deadline=1) != 0:
235 raise error.AutoservSuspendError(
236 "DUT is not responding after %d seconds" % (time_counter))
237
238 start_time = time.time()
239 self.log_op(self.OP_SUSPEND, suspend)
240 lasted = time.time() - start_time
Ravi Chandra Sadineni812e61b2018-07-09 11:16:50 -0700241 logging.info("Device resumed after %d secs", lasted)
242 if (lasted < timeout and not allow_early_resume):
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700243 raise error.AutoservSuspendError(
244 "Suspend did not last long enough: %d instead of %d" % (
245 lasted, timeout))
mblighf2c33762008-10-18 14:42:34 +0000246
jadmanski4f909252008-12-01 20:47:10 +0000247 def reboot_followup(self, *args, **dargs):
Richard Barnette73b35172018-07-27 10:59:01 -0700248 # pylint: disable=missing-docstring
jadmanski4f909252008-12-01 20:47:10 +0000249 super(RemoteHost, self).reboot_followup(*args, **dargs)
250 if self.job:
251 self.job.profilers.handle_reboot(self)
252
253
jadmanskid778ae42009-01-07 15:07:36 +0000254 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs):
jadmanskid60321a2008-10-28 20:32:05 +0000255 """
256 Wait for the host to come back from a reboot. This wraps the
257 generic wait_for_restart implementation in a reboot group.
258 """
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700259 def op_func():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700260 # pylint: disable=missing-docstring
jadmanskid778ae42009-01-07 15:07:36 +0000261 super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs)
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700262 self.log_op(self.OP_REBOOT, op_func)
mblighf2c33762008-10-18 14:42:34 +0000263
264
Gregory Nisbetec615d62020-12-11 17:59:20 +0000265 def cleanup(self):
Richard Barnette73b35172018-07-27 10:59:01 -0700266 # pylint: disable=missing-docstring
mbligh1264b512008-11-05 22:21:49 +0000267 super(RemoteHost, self).cleanup()
Gregory Nisbetec615d62020-12-11 17:59:20 +0000268 self.reboot()
mbligh1264b512008-11-05 22:21:49 +0000269
270
mblighe48bcfb2008-11-11 17:09:44 +0000271 def get_tmp_dir(self, parent='/tmp'):
mblighf2c33762008-10-18 14:42:34 +0000272 """
273 Return the pathname of a directory on the host suitable
274 for temporary file storage.
275
276 The directory and its content will be deleted automatically
277 on the destruction of the Host object that was used to obtain
278 it.
279 """
Alex Khouderchahc44e7772018-07-16 10:53:14 -0700280 template = os.path.join(parent, self.TMP_DIR_TEMPLATE)
Jae Hoon Kimba8080e2021-03-24 15:14:27 -0700281 parent = os.path.dirname(template)
Derek Beckett24c0e822020-06-10 12:54:04 -0700282 dir_name = self.run('mkdir -p %s && mktemp -d %s' % (parent, template)).stdout.rstrip()
mblighf2c33762008-10-18 14:42:34 +0000283 self.tmp_dirs.append(dir_name)
284 return dir_name
285
286
mbligh6b95b522010-02-19 19:17:41 +0000287 def get_platform_label(self):
288 """
289 Return the platform label, or None if platform label is not set.
290 """
291
292 if self.job:
293 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
294 self.hostname)
295 keyvals = utils.read_keyval(keyval_path)
296 return keyvals.get('platform', None)
297 else:
298 return None
299
300
Eric Li6f27d4f2010-09-29 10:55:17 -0700301 def get_all_labels(self):
302 """
303 Return all labels, or empty list if label is not set.
304 """
305 if self.job:
306 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
307 self.hostname)
308 keyvals = utils.read_keyval(keyval_path)
309 all_labels = keyvals.get('labels', '')
310 if all_labels:
Eric Li861b2d52011-02-04 14:50:35 -0800311 all_labels = all_labels.split(',')
Derek Beckettf73baca2020-08-19 15:08:47 -0700312 return [urllib.parse.unquote(label) for label in all_labels]
Eric Li6f27d4f2010-09-29 10:55:17 -0700313 return []
314
315
jadmanskiea455662009-03-25 22:25:39 +0000316 def delete_tmp_dir(self, tmpdir):
317 """
318 Delete the given temporary directory on the remote machine.
Richard Barnetteab9769f2016-06-01 15:01:44 -0700319
320 @param tmpdir The directory to delete.
jadmanskiea455662009-03-25 22:25:39 +0000321 """
322 self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True)
323 self.tmp_dirs.remove(tmpdir)
324
325
Alex Khouderchahc44e7772018-07-16 10:53:14 -0700326 def delete_all_tmp_dirs(self, parent='/tmp'):
327 """
328 Delete all directories in parent that were created by get_tmp_dir
329
330 Note that this may involve deleting directories created by calls to
331 get_tmp_dir on a different RemoteHost instance than the one running this
332 method. Only perform this operation when certain that this will not
333 cause unexpected behavior.
334 """
335 # follow mktemp's behavior of only expanding 3 or more consecutive Xs
Derek Beckett24c0e822020-06-10 12:54:04 -0700336 if isinstance(parent, (list, tuple)):
337 parents = parent
338 else:
339 parents = [parent]
340 rm_paths = []
341 for parent in parents:
342 base_template = re.sub('XXXX*', '*', self.TMP_DIR_TEMPLATE)
343 # distinguish between non-wildcard asterisks in parent directory name
344 # and wildcards inserted from the template
Derek Beckettf73baca2020-08-19 15:08:47 -0700345 base = '*'.join(
346 ['"%s"' % utils.sh_escape(x) for x in base_template.split('*')])
Derek Beckett24c0e822020-06-10 12:54:04 -0700347 path = '"%s' % os.path.join(utils.sh_escape(parent), base[1:])
348 rm_paths.append(path)
349 # remove deleted directories from tmp_dirs
350 regex = os.path.join(parent, re.sub('(XXXX*)',
351 lambda match: '[a-zA-Z0-9]{%d}' % len(match.group(1)),
352 self.TMP_DIR_TEMPLATE))
353 regex += '(/|$)' # remove if matches, or is within a dir that matches
Derek Beckettf73baca2020-08-19 15:08:47 -0700354 self.tmp_dirs = [x for x in self.tmp_dirs if not re.match(regex, x)]
Alex Khouderchahc44e7772018-07-16 10:53:14 -0700355
Derek Beckett24c0e822020-06-10 12:54:04 -0700356 self.run('rm -rf {}'.format(" ".join(rm_paths)), ignore_status=True)
Alex Khouderchahc44e7772018-07-16 10:53:14 -0700357
mblighf2c33762008-10-18 14:42:34 +0000358 def check_uptime(self):
359 """
360 Check that uptime is available and monotonically increasing.
361 """
mbligha43f6d22009-08-24 22:09:44 +0000362 if not self.is_up():
363 raise error.AutoservHostError('Client does not appear to be up')
mblighf2c33762008-10-18 14:42:34 +0000364 result = self.run("/bin/cat /proc/uptime", 30)
365 return result.stdout.strip().split()[0]
366
367
Puthikorn Voravootivatdf29d642014-04-25 11:45:36 -0700368 def check_for_lkdtm(self):
369 """
370 Check for kernel dump test module. return True if exist.
371 """
372 cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT'
373 return self.run(cmd, ignore_status=True).exit_status == 0
374
375
jadmanskica7da372008-10-21 16:26:52 +0000376 def are_wait_up_processes_up(self):
mblighf2c33762008-10-18 14:42:34 +0000377 """
378 Checks if any HOSTS waitup processes are running yet on the
379 remote host.
380
381 Returns True if any the waitup processes are running, False
382 otherwise.
383 """
384 processes = self.get_wait_up_processes()
385 if len(processes) == 0:
386 return True # wait up processes aren't being used
387 for procname in processes:
388 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
389 ignore_status=True).exit_status
390 if exit_status == 0:
391 return True
392 return False
Kevin Cheng3a4a57a2015-09-30 12:09:50 -0700393
394
395 def get_labels(self):
396 """Return a list of labels for this given host.
397
398 This is the main way to retrieve all the automatic labels for a host
399 as it will run through all the currently implemented label functions.
400 """
401 labels = []
402 for label_function in self._LABEL_FUNCTIONS:
403 try:
404 label = label_function(self)
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700405 except Exception:
406 logging.exception('Label function %s failed; ignoring it.',
407 label_function.__name__)
Kevin Cheng3a4a57a2015-09-30 12:09:50 -0700408 label = None
409 if label:
410 if type(label) is str:
411 labels.append(label)
412 elif type(label) is list:
413 labels.extend(label)
414 return labels
Otabek Kasimov480e7fa2020-11-23 18:52:23 -0800415
416 def get_result_dir(self):
417 """Return the result directory path if passed or None if not.
418
419 @return string
420 """
421 if self.job and hasattr(self.job, 'resultdir'):
422 return self.job.resultdir
423 return None