blob: afe8e31997d97327b680af26ac2cc0edacd7e4c5 [file] [log] [blame]
Derek Beckettf73baca2020-08-19 15:08:47 -07001# Lint as: python2, python3
Fang Deng0ca40e22013-08-27 17:47:44 -07002"""This class defines the Remote host class."""
mbligh321b1f52008-04-09 16:23:43 +00003
Derek Beckettf73baca2020-08-19 15:08:47 -07004from __future__ import absolute_import
5from __future__ import division
6from __future__ import print_function
7import os, logging, time
8import six
9from six.moves import urllib
Alex Khouderchahc44e7772018-07-16 10:53:14 -070010import re
mblighf2c33762008-10-18 14:42:34 +000011from autotest_lib.client.common_lib import error
jadmanski96b78072009-05-21 22:21:04 +000012from autotest_lib.server import utils
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -070013from autotest_lib.server.hosts import base_classes
mbligh321b1f52008-04-09 16:23:43 +000014
15
jadmanski1c5e3a12008-08-15 23:08:20 +000016class RemoteHost(base_classes.Host):
jadmanskid60321a2008-10-28 20:32:05 +000017 """
18 This class represents a remote machine on which you can run
jadmanski0afbb632008-06-06 21:10:57 +000019 programs.
mbligh321b1f52008-04-09 16:23:43 +000020
jadmanski0afbb632008-06-06 21:10:57 +000021 It may be accessed through a network, a serial line, ...
22 It is not the machine autoserv is running on.
mbligh321b1f52008-04-09 16:23:43 +000023
jadmanski0afbb632008-06-06 21:10:57 +000024 Implementation details:
25 This is an abstract class, leaf subclasses must implement the methods
26 listed here and in parent classes which have no implementation. They
27 may reimplement methods which already have an implementation. You
28 must not instantiate this class but should instantiate one of those
jadmanskid60321a2008-10-28 20:32:05 +000029 leaf subclasses.
30 """
mbligh321b1f52008-04-09 16:23:43 +000031
mblighf2c33762008-10-18 14:42:34 +000032 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
Eric Li6f27d4f2010-09-29 10:55:17 -070033 DEFAULT_HALT_TIMEOUT = 2 * 60
Kevin Cheng3a4a57a2015-09-30 12:09:50 -070034 _LABEL_FUNCTIONS = []
35 _DETECTABLE_LABELS = []
mblighf2c33762008-10-18 14:42:34 +000036
Eric Li861b2d52011-02-04 14:50:35 -080037 VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start"
Mike Frysingerb718b032019-07-01 07:55:44 -040038 TMP_DIR_TEMPLATE = '/usr/local/tmp/autoserv-XXXXXX'
jadmanski4900b3b2009-07-02 22:12:08 +000039
Kevin Cheng3a4a57a2015-09-30 12:09:50 -070040
jadmanskif6562912008-10-21 17:59:01 +000041 def _initialize(self, hostname, autodir=None, *args, **dargs):
42 super(RemoteHost, self)._initialize(*args, **dargs)
mbligh321b1f52008-04-09 16:23:43 +000043
jadmanski1c5e3a12008-08-15 23:08:20 +000044 self.hostname = hostname
mblighf2c33762008-10-18 14:42:34 +000045 self.autodir = autodir
46 self.tmp_dirs = []
jadmanskia2db9412008-08-22 21:47:24 +000047
48
jadmanskiedf33e02009-05-22 16:47:27 +000049 def __repr__(self):
50 return "<remote host: %s>" % self.hostname
51
52
jadmanski53aaf382008-11-17 16:22:31 +000053 def close(self):
Richard Barnette73b35172018-07-27 10:59:01 -070054 # pylint: disable=missing-docstring
jadmanski53aaf382008-11-17 16:22:31 +000055 super(RemoteHost, self).close()
mblighf2c33762008-10-18 14:42:34 +000056 self.stop_loggers()
57
58 if hasattr(self, 'tmp_dirs'):
59 for dir in self.tmp_dirs:
60 try:
Allen Liad719c12017-06-27 23:48:04 +000061 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
mblighf2c33762008-10-18 14:42:34 +000062 except error.AutoservRunError:
63 pass
64
65
jadmanskid60321a2008-10-28 20:32:05 +000066 def job_start(self):
67 """
68 Abstract method, called the first time a remote host object
69 is created for a specific host after a job starts.
70
71 This method depends on the create_host factory being used to
72 construct your host object. If you directly construct host objects
73 you will need to call this method yourself (and enforce the
74 single-call rule).
75 """
jadmanski4900b3b2009-07-02 22:12:08 +000076 try:
Andrey Ulanovad472902016-01-11 17:31:18 -080077 cmd = ('test ! -e /var/log/messages || cp -f /var/log/messages '
78 '%s') % self.VAR_LOG_MESSAGES_COPY_PATH
Allen Liad719c12017-06-27 23:48:04 +000079 self.run(cmd)
Derek Beckettf73baca2020-08-19 15:08:47 -070080 except Exception as e:
jadmanski4900b3b2009-07-02 22:12:08 +000081 # Non-fatal error
82 logging.info('Failed to copy /var/log/messages at startup: %s', e)
jadmanskid60321a2008-10-28 20:32:05 +000083
84
mblighf2c33762008-10-18 14:42:34 +000085 def get_autodir(self):
86 return self.autodir
87
88
89 def set_autodir(self, autodir):
jadmanskid60321a2008-10-28 20:32:05 +000090 """
mblighf2c33762008-10-18 14:42:34 +000091 This method is called to make the host object aware of the
92 where autotest is installed. Called in server/autotest.py
93 after a successful install
jadmanskid60321a2008-10-28 20:32:05 +000094 """
mblighf2c33762008-10-18 14:42:34 +000095 self.autodir = autodir
96
97
98 def sysrq_reboot(self):
Richard Barnette73b35172018-07-27 10:59:01 -070099 # pylint: disable=missing-docstring
J. Richard Barnette9af19632015-09-25 12:18:03 -0700100 self.run_background('echo b > /proc/sysrq-trigger')
mblighf2c33762008-10-18 14:42:34 +0000101
102
Eric Li6f27d4f2010-09-29 10:55:17 -0700103 def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True):
Richard Barnetteab9769f2016-06-01 15:01:44 -0700104 """
105 Shut down the remote host.
106
107 N.B. This method makes no provision to bring the target back
108 up. The target will be offline indefinitely if there's no
109 independent hardware (servo, RPM, etc.) to force the target to
110 power on.
111
112 @param timeout Maximum time to wait for host down, in seconds.
113 @param wait Whether to wait for the host to go offline.
114 """
J. Richard Barnette9af19632015-09-25 12:18:03 -0700115 self.run_background('sleep 1 ; halt')
Eric Li6f27d4f2010-09-29 10:55:17 -0700116 if wait:
117 self.wait_down(timeout=timeout)
118
119
Richard Barnetteab9769f2016-06-01 15:01:44 -0700120 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True,
121 fastsync=False, reboot_cmd=None, **dargs):
mblighf2c33762008-10-18 14:42:34 +0000122 """
123 Reboot the remote host.
124
125 Args:
126 timeout - How long to wait for the reboot.
mblighf2c33762008-10-18 14:42:34 +0000127 wait - Should we wait to see if the machine comes back up.
Hidehiko Abe3d512d32017-04-27 15:11:33 +0900128 If this is set to True, ignores reboot_cmd's error
129 even if occurs.
mbligh2b949772009-02-26 00:59:36 +0000130 fastsync - Don't wait for the sync to complete, just start one
131 and move on. This is for cases where rebooting prompty
132 is more important than data integrity and/or the
133 machine may have disks that cause sync to never return.
mbligh959ed872009-04-17 22:18:25 +0000134 reboot_cmd - Reboot command to execute.
mblighf2c33762008-10-18 14:42:34 +0000135 """
Richard Barnetteab9769f2016-06-01 15:01:44 -0700136 self.reboot_setup(**dargs)
J. Richard Barnette9af19632015-09-25 12:18:03 -0700137 if not reboot_cmd:
138 reboot_cmd = ('sync & sleep 5; '
139 'reboot & sleep 60; '
140 'reboot -f & sleep 10; '
141 'reboot -nf & sleep 10; '
142 'telinit 6')
143
mblighf2c33762008-10-18 14:42:34 +0000144 def reboot():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700145 # pylint: disable=missing-docstring
mblighf2c33762008-10-18 14:42:34 +0000146 self.record("GOOD", None, "reboot.start")
Richard Barnette73b35172018-07-27 10:59:01 -0700147 current_boot_id = None
mblighf2c33762008-10-18 14:42:34 +0000148 try:
jadmanskic0354912010-01-12 15:57:29 +0000149 current_boot_id = self.get_boot_id()
150
jadmanskid544a352009-01-14 23:36:28 +0000151 # sync before starting the reboot, so that a long sync during
152 # shutdown isn't timed out by wait_down's short timeout
mbligh2b949772009-02-26 00:59:36 +0000153 if not fastsync:
mbligh959ed872009-04-17 22:18:25 +0000154 self.run('sync; sync', timeout=timeout, ignore_status=True)
jadmanskid544a352009-01-14 23:36:28 +0000155
J. Richard Barnette9af19632015-09-25 12:18:03 -0700156 self.run_background(reboot_cmd)
mblighf2c33762008-10-18 14:42:34 +0000157 except error.AutoservRunError:
Hidehiko Abe3d512d32017-04-27 15:11:33 +0900158 # If wait is set, ignore the error here, and rely on the
159 # wait_for_restart() for stability, instead.
160 # reboot_cmd sometimes causes an error even if reboot is
161 # successfully in progress. This is difficult to be avoided,
162 # because we have no much control on remote machine after
163 # "reboot" starts.
Richard Barnette73b35172018-07-27 10:59:01 -0700164 if not wait or current_boot_id is None:
Hidehiko Abe3d512d32017-04-27 15:11:33 +0900165 # TODO(b/37652392): Revisit no-wait case, later.
166 self.record("ABORT", None, "reboot.start",
167 "reboot command failed")
168 raise
mblighf2c33762008-10-18 14:42:34 +0000169 if wait:
jadmanskic0354912010-01-12 15:57:29 +0000170 self.wait_for_restart(timeout, old_boot_id=current_boot_id,
171 **dargs)
mblighf2c33762008-10-18 14:42:34 +0000172
173 # if this is a full reboot-and-wait, run the reboot inside a group
174 if wait:
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700175 self.log_op(self.OP_REBOOT, reboot)
mblighf2c33762008-10-18 14:42:34 +0000176 else:
177 reboot()
178
Ravi Chandra Sadineni812e61b2018-07-09 11:16:50 -0700179 def suspend(self, timeout, suspend_cmd,
180 allow_early_resume=False):
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700181 """
182 Suspend the remote host.
183
184 Args:
Ravi Chandra Sadineni812e61b2018-07-09 11:16:50 -0700185 timeout - How long to wait for the suspend in integer seconds.
186 suspend_cmd - suspend command to execute.
187 allow_early_resume - Boolean that indicate whether resume
188 before |timeout| is ok.
189 Raises:
190 error.AutoservSuspendError - If |allow_early_resume| is False
191 and if device resumes before
192 |timeout|.
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700193 """
194 # define a function for the supend and run it in a group
195 def suspend():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700196 # pylint: disable=missing-docstring
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700197 self.record("GOOD", None, "suspend.start for %d seconds" % (timeout))
198 try:
J. Richard Barnette9af19632015-09-25 12:18:03 -0700199 self.run_background(suspend_cmd)
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700200 except error.AutoservRunError:
201 self.record("ABORT", None, "suspend.start",
202 "suspend command failed")
203 raise error.AutoservSuspendError("suspend command failed")
204
205 # Wait for some time, to ensure the machine is going to sleep.
206 # Not too long to check if the machine really suspended.
207 time_slice = min(timeout / 2, 300)
208 time.sleep(time_slice)
209 time_counter = time_slice
210 while time_counter < timeout + 60:
211 # Check if the machine is back. We check regularely to
212 # ensure the machine was suspended long enough.
213 if utils.ping(self.hostname, tries=1, deadline=1) == 0:
214 return
215 else:
216 if time_counter > timeout - 10:
217 time_slice = 5
218 time.sleep(time_slice)
219 time_counter += time_slice
220
221 if utils.ping(self.hostname, tries=1, deadline=1) != 0:
222 raise error.AutoservSuspendError(
223 "DUT is not responding after %d seconds" % (time_counter))
224
225 start_time = time.time()
226 self.log_op(self.OP_SUSPEND, suspend)
227 lasted = time.time() - start_time
Ravi Chandra Sadineni812e61b2018-07-09 11:16:50 -0700228 logging.info("Device resumed after %d secs", lasted)
229 if (lasted < timeout and not allow_early_resume):
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700230 raise error.AutoservSuspendError(
231 "Suspend did not last long enough: %d instead of %d" % (
232 lasted, timeout))
mblighf2c33762008-10-18 14:42:34 +0000233
jadmanski4f909252008-12-01 20:47:10 +0000234 def reboot_followup(self, *args, **dargs):
Richard Barnette73b35172018-07-27 10:59:01 -0700235 # pylint: disable=missing-docstring
jadmanski4f909252008-12-01 20:47:10 +0000236 super(RemoteHost, self).reboot_followup(*args, **dargs)
237 if self.job:
238 self.job.profilers.handle_reboot(self)
239
240
jadmanskid778ae42009-01-07 15:07:36 +0000241 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs):
jadmanskid60321a2008-10-28 20:32:05 +0000242 """
243 Wait for the host to come back from a reboot. This wraps the
244 generic wait_for_restart implementation in a reboot group.
245 """
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700246 def op_func():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700247 # pylint: disable=missing-docstring
jadmanskid778ae42009-01-07 15:07:36 +0000248 super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs)
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700249 self.log_op(self.OP_REBOOT, op_func)
mblighf2c33762008-10-18 14:42:34 +0000250
251
mbligh1264b512008-11-05 22:21:49 +0000252 def cleanup(self):
Richard Barnette73b35172018-07-27 10:59:01 -0700253 # pylint: disable=missing-docstring
mbligh1264b512008-11-05 22:21:49 +0000254 super(RemoteHost, self).cleanup()
255 self.reboot()
256
257
mblighe48bcfb2008-11-11 17:09:44 +0000258 def get_tmp_dir(self, parent='/tmp'):
mblighf2c33762008-10-18 14:42:34 +0000259 """
260 Return the pathname of a directory on the host suitable
261 for temporary file storage.
262
263 The directory and its content will be deleted automatically
264 on the destruction of the Host object that was used to obtain
265 it.
266 """
Alex Khouderchahc44e7772018-07-16 10:53:14 -0700267 template = os.path.join(parent, self.TMP_DIR_TEMPLATE)
Derek Beckett24c0e822020-06-10 12:54:04 -0700268 dir_name = self.run('mkdir -p %s && mktemp -d %s' % (parent, template)).stdout.rstrip()
mblighf2c33762008-10-18 14:42:34 +0000269 self.tmp_dirs.append(dir_name)
270 return dir_name
271
272
mbligh6b95b522010-02-19 19:17:41 +0000273 def get_platform_label(self):
274 """
275 Return the platform label, or None if platform label is not set.
276 """
277
278 if self.job:
279 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
280 self.hostname)
281 keyvals = utils.read_keyval(keyval_path)
282 return keyvals.get('platform', None)
283 else:
284 return None
285
286
Eric Li6f27d4f2010-09-29 10:55:17 -0700287 def get_all_labels(self):
288 """
289 Return all labels, or empty list if label is not set.
290 """
291 if self.job:
292 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
293 self.hostname)
294 keyvals = utils.read_keyval(keyval_path)
295 all_labels = keyvals.get('labels', '')
296 if all_labels:
Eric Li861b2d52011-02-04 14:50:35 -0800297 all_labels = all_labels.split(',')
Derek Beckettf73baca2020-08-19 15:08:47 -0700298 return [urllib.parse.unquote(label) for label in all_labels]
Eric Li6f27d4f2010-09-29 10:55:17 -0700299 return []
300
301
jadmanskiea455662009-03-25 22:25:39 +0000302 def delete_tmp_dir(self, tmpdir):
303 """
304 Delete the given temporary directory on the remote machine.
Richard Barnetteab9769f2016-06-01 15:01:44 -0700305
306 @param tmpdir The directory to delete.
jadmanskiea455662009-03-25 22:25:39 +0000307 """
308 self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True)
309 self.tmp_dirs.remove(tmpdir)
310
311
Alex Khouderchahc44e7772018-07-16 10:53:14 -0700312 def delete_all_tmp_dirs(self, parent='/tmp'):
313 """
314 Delete all directories in parent that were created by get_tmp_dir
315
316 Note that this may involve deleting directories created by calls to
317 get_tmp_dir on a different RemoteHost instance than the one running this
318 method. Only perform this operation when certain that this will not
319 cause unexpected behavior.
320 """
321 # follow mktemp's behavior of only expanding 3 or more consecutive Xs
Derek Beckett24c0e822020-06-10 12:54:04 -0700322 if isinstance(parent, (list, tuple)):
323 parents = parent
324 else:
325 parents = [parent]
326 rm_paths = []
327 for parent in parents:
328 base_template = re.sub('XXXX*', '*', self.TMP_DIR_TEMPLATE)
329 # distinguish between non-wildcard asterisks in parent directory name
330 # and wildcards inserted from the template
Derek Beckettf73baca2020-08-19 15:08:47 -0700331 base = '*'.join(
332 ['"%s"' % utils.sh_escape(x) for x in base_template.split('*')])
Derek Beckett24c0e822020-06-10 12:54:04 -0700333 path = '"%s' % os.path.join(utils.sh_escape(parent), base[1:])
334 rm_paths.append(path)
335 # remove deleted directories from tmp_dirs
336 regex = os.path.join(parent, re.sub('(XXXX*)',
337 lambda match: '[a-zA-Z0-9]{%d}' % len(match.group(1)),
338 self.TMP_DIR_TEMPLATE))
339 regex += '(/|$)' # remove if matches, or is within a dir that matches
Derek Beckettf73baca2020-08-19 15:08:47 -0700340 self.tmp_dirs = [x for x in self.tmp_dirs if not re.match(regex, x)]
Alex Khouderchahc44e7772018-07-16 10:53:14 -0700341
Derek Beckett24c0e822020-06-10 12:54:04 -0700342 self.run('rm -rf {}'.format(" ".join(rm_paths)), ignore_status=True)
Alex Khouderchahc44e7772018-07-16 10:53:14 -0700343
mblighf2c33762008-10-18 14:42:34 +0000344 def check_uptime(self):
345 """
346 Check that uptime is available and monotonically increasing.
347 """
mbligha43f6d22009-08-24 22:09:44 +0000348 if not self.is_up():
349 raise error.AutoservHostError('Client does not appear to be up')
mblighf2c33762008-10-18 14:42:34 +0000350 result = self.run("/bin/cat /proc/uptime", 30)
351 return result.stdout.strip().split()[0]
352
353
Puthikorn Voravootivatdf29d642014-04-25 11:45:36 -0700354 def check_for_lkdtm(self):
355 """
356 Check for kernel dump test module. return True if exist.
357 """
358 cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT'
359 return self.run(cmd, ignore_status=True).exit_status == 0
360
361
jadmanskica7da372008-10-21 16:26:52 +0000362 def are_wait_up_processes_up(self):
mblighf2c33762008-10-18 14:42:34 +0000363 """
364 Checks if any HOSTS waitup processes are running yet on the
365 remote host.
366
367 Returns True if any the waitup processes are running, False
368 otherwise.
369 """
370 processes = self.get_wait_up_processes()
371 if len(processes) == 0:
372 return True # wait up processes aren't being used
373 for procname in processes:
374 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
375 ignore_status=True).exit_status
376 if exit_status == 0:
377 return True
378 return False
Kevin Cheng3a4a57a2015-09-30 12:09:50 -0700379
380
381 def get_labels(self):
382 """Return a list of labels for this given host.
383
384 This is the main way to retrieve all the automatic labels for a host
385 as it will run through all the currently implemented label functions.
386 """
387 labels = []
388 for label_function in self._LABEL_FUNCTIONS:
389 try:
390 label = label_function(self)
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700391 except Exception:
392 logging.exception('Label function %s failed; ignoring it.',
393 label_function.__name__)
Kevin Cheng3a4a57a2015-09-30 12:09:50 -0700394 label = None
395 if label:
396 if type(label) is str:
397 labels.append(label)
398 elif type(label) is list:
399 labels.extend(label)
400 return labels