blob: 492c3d437cdc5d47358e51b24d66de04626c9cf1 [file] [log] [blame]
Fang Deng0ca40e22013-08-27 17:47:44 -07001"""This class defines the Remote host class."""
mbligh321b1f52008-04-09 16:23:43 +00002
Andrey Ulanovad472902016-01-11 17:31:18 -08003import os, logging, urllib, time
Alex Khouderchahc44e7772018-07-16 10:53:14 -07004import re
mblighf2c33762008-10-18 14:42:34 +00005from autotest_lib.client.common_lib import error
jadmanski96b78072009-05-21 22:21:04 +00006from autotest_lib.server import utils
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -07007from autotest_lib.server.hosts import base_classes
mbligh321b1f52008-04-09 16:23:43 +00008
9
jadmanski1c5e3a12008-08-15 23:08:20 +000010class RemoteHost(base_classes.Host):
jadmanskid60321a2008-10-28 20:32:05 +000011 """
12 This class represents a remote machine on which you can run
jadmanski0afbb632008-06-06 21:10:57 +000013 programs.
mbligh321b1f52008-04-09 16:23:43 +000014
jadmanski0afbb632008-06-06 21:10:57 +000015 It may be accessed through a network, a serial line, ...
16 It is not the machine autoserv is running on.
mbligh321b1f52008-04-09 16:23:43 +000017
jadmanski0afbb632008-06-06 21:10:57 +000018 Implementation details:
19 This is an abstract class, leaf subclasses must implement the methods
20 listed here and in parent classes which have no implementation. They
21 may reimplement methods which already have an implementation. You
22 must not instantiate this class but should instantiate one of those
jadmanskid60321a2008-10-28 20:32:05 +000023 leaf subclasses.
24 """
mbligh321b1f52008-04-09 16:23:43 +000025
mblighf2c33762008-10-18 14:42:34 +000026 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
Eric Li6f27d4f2010-09-29 10:55:17 -070027 DEFAULT_HALT_TIMEOUT = 2 * 60
Kevin Cheng3a4a57a2015-09-30 12:09:50 -070028 _LABEL_FUNCTIONS = []
29 _DETECTABLE_LABELS = []
mblighf2c33762008-10-18 14:42:34 +000030
Eric Li861b2d52011-02-04 14:50:35 -080031 VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start"
Mike Frysingerb718b032019-07-01 07:55:44 -040032 TMP_DIR_TEMPLATE = '/usr/local/tmp/autoserv-XXXXXX'
jadmanski4900b3b2009-07-02 22:12:08 +000033
Kevin Cheng3a4a57a2015-09-30 12:09:50 -070034
jadmanskif6562912008-10-21 17:59:01 +000035 def _initialize(self, hostname, autodir=None, *args, **dargs):
36 super(RemoteHost, self)._initialize(*args, **dargs)
mbligh321b1f52008-04-09 16:23:43 +000037
jadmanski1c5e3a12008-08-15 23:08:20 +000038 self.hostname = hostname
mblighf2c33762008-10-18 14:42:34 +000039 self.autodir = autodir
40 self.tmp_dirs = []
jadmanskia2db9412008-08-22 21:47:24 +000041
42
jadmanskiedf33e02009-05-22 16:47:27 +000043 def __repr__(self):
44 return "<remote host: %s>" % self.hostname
45
46
jadmanski53aaf382008-11-17 16:22:31 +000047 def close(self):
Richard Barnette73b35172018-07-27 10:59:01 -070048 # pylint: disable=missing-docstring
jadmanski53aaf382008-11-17 16:22:31 +000049 super(RemoteHost, self).close()
mblighf2c33762008-10-18 14:42:34 +000050 self.stop_loggers()
51
52 if hasattr(self, 'tmp_dirs'):
53 for dir in self.tmp_dirs:
54 try:
Allen Liad719c12017-06-27 23:48:04 +000055 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
mblighf2c33762008-10-18 14:42:34 +000056 except error.AutoservRunError:
57 pass
58
59
jadmanskid60321a2008-10-28 20:32:05 +000060 def job_start(self):
61 """
62 Abstract method, called the first time a remote host object
63 is created for a specific host after a job starts.
64
65 This method depends on the create_host factory being used to
66 construct your host object. If you directly construct host objects
67 you will need to call this method yourself (and enforce the
68 single-call rule).
69 """
jadmanski4900b3b2009-07-02 22:12:08 +000070 try:
Andrey Ulanovad472902016-01-11 17:31:18 -080071 cmd = ('test ! -e /var/log/messages || cp -f /var/log/messages '
72 '%s') % self.VAR_LOG_MESSAGES_COPY_PATH
Allen Liad719c12017-06-27 23:48:04 +000073 self.run(cmd)
jadmanski4900b3b2009-07-02 22:12:08 +000074 except Exception, e:
75 # Non-fatal error
76 logging.info('Failed to copy /var/log/messages at startup: %s', e)
jadmanskid60321a2008-10-28 20:32:05 +000077
78
mblighf2c33762008-10-18 14:42:34 +000079 def get_autodir(self):
80 return self.autodir
81
82
83 def set_autodir(self, autodir):
jadmanskid60321a2008-10-28 20:32:05 +000084 """
mblighf2c33762008-10-18 14:42:34 +000085 This method is called to make the host object aware of the
86 where autotest is installed. Called in server/autotest.py
87 after a successful install
jadmanskid60321a2008-10-28 20:32:05 +000088 """
mblighf2c33762008-10-18 14:42:34 +000089 self.autodir = autodir
90
91
92 def sysrq_reboot(self):
Richard Barnette73b35172018-07-27 10:59:01 -070093 # pylint: disable=missing-docstring
J. Richard Barnette9af19632015-09-25 12:18:03 -070094 self.run_background('echo b > /proc/sysrq-trigger')
mblighf2c33762008-10-18 14:42:34 +000095
96
Eric Li6f27d4f2010-09-29 10:55:17 -070097 def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True):
Richard Barnetteab9769f2016-06-01 15:01:44 -070098 """
99 Shut down the remote host.
100
101 N.B. This method makes no provision to bring the target back
102 up. The target will be offline indefinitely if there's no
103 independent hardware (servo, RPM, etc.) to force the target to
104 power on.
105
106 @param timeout Maximum time to wait for host down, in seconds.
107 @param wait Whether to wait for the host to go offline.
108 """
J. Richard Barnette9af19632015-09-25 12:18:03 -0700109 self.run_background('sleep 1 ; halt')
Eric Li6f27d4f2010-09-29 10:55:17 -0700110 if wait:
111 self.wait_down(timeout=timeout)
112
113
Richard Barnetteab9769f2016-06-01 15:01:44 -0700114 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True,
115 fastsync=False, reboot_cmd=None, **dargs):
mblighf2c33762008-10-18 14:42:34 +0000116 """
117 Reboot the remote host.
118
119 Args:
120 timeout - How long to wait for the reboot.
mblighf2c33762008-10-18 14:42:34 +0000121 wait - Should we wait to see if the machine comes back up.
Hidehiko Abe3d512d32017-04-27 15:11:33 +0900122 If this is set to True, ignores reboot_cmd's error
123 even if occurs.
mbligh2b949772009-02-26 00:59:36 +0000124 fastsync - Don't wait for the sync to complete, just start one
125 and move on. This is for cases where rebooting prompty
126 is more important than data integrity and/or the
127 machine may have disks that cause sync to never return.
mbligh959ed872009-04-17 22:18:25 +0000128 reboot_cmd - Reboot command to execute.
mblighf2c33762008-10-18 14:42:34 +0000129 """
Richard Barnetteab9769f2016-06-01 15:01:44 -0700130 self.reboot_setup(**dargs)
J. Richard Barnette9af19632015-09-25 12:18:03 -0700131 if not reboot_cmd:
132 reboot_cmd = ('sync & sleep 5; '
133 'reboot & sleep 60; '
134 'reboot -f & sleep 10; '
135 'reboot -nf & sleep 10; '
136 'telinit 6')
137
mblighf2c33762008-10-18 14:42:34 +0000138 def reboot():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700139 # pylint: disable=missing-docstring
mblighf2c33762008-10-18 14:42:34 +0000140 self.record("GOOD", None, "reboot.start")
Richard Barnette73b35172018-07-27 10:59:01 -0700141 current_boot_id = None
mblighf2c33762008-10-18 14:42:34 +0000142 try:
jadmanskic0354912010-01-12 15:57:29 +0000143 current_boot_id = self.get_boot_id()
144
jadmanskid544a352009-01-14 23:36:28 +0000145 # sync before starting the reboot, so that a long sync during
146 # shutdown isn't timed out by wait_down's short timeout
mbligh2b949772009-02-26 00:59:36 +0000147 if not fastsync:
mbligh959ed872009-04-17 22:18:25 +0000148 self.run('sync; sync', timeout=timeout, ignore_status=True)
jadmanskid544a352009-01-14 23:36:28 +0000149
J. Richard Barnette9af19632015-09-25 12:18:03 -0700150 self.run_background(reboot_cmd)
mblighf2c33762008-10-18 14:42:34 +0000151 except error.AutoservRunError:
Hidehiko Abe3d512d32017-04-27 15:11:33 +0900152 # If wait is set, ignore the error here, and rely on the
153 # wait_for_restart() for stability, instead.
154 # reboot_cmd sometimes causes an error even if reboot is
155 # successfully in progress. This is difficult to be avoided,
156 # because we have no much control on remote machine after
157 # "reboot" starts.
Richard Barnette73b35172018-07-27 10:59:01 -0700158 if not wait or current_boot_id is None:
Hidehiko Abe3d512d32017-04-27 15:11:33 +0900159 # TODO(b/37652392): Revisit no-wait case, later.
160 self.record("ABORT", None, "reboot.start",
161 "reboot command failed")
162 raise
mblighf2c33762008-10-18 14:42:34 +0000163 if wait:
jadmanskic0354912010-01-12 15:57:29 +0000164 self.wait_for_restart(timeout, old_boot_id=current_boot_id,
165 **dargs)
mblighf2c33762008-10-18 14:42:34 +0000166
167 # if this is a full reboot-and-wait, run the reboot inside a group
168 if wait:
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700169 self.log_op(self.OP_REBOOT, reboot)
mblighf2c33762008-10-18 14:42:34 +0000170 else:
171 reboot()
172
Ravi Chandra Sadineni812e61b2018-07-09 11:16:50 -0700173 def suspend(self, timeout, suspend_cmd,
174 allow_early_resume=False):
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700175 """
176 Suspend the remote host.
177
178 Args:
Ravi Chandra Sadineni812e61b2018-07-09 11:16:50 -0700179 timeout - How long to wait for the suspend in integer seconds.
180 suspend_cmd - suspend command to execute.
181 allow_early_resume - Boolean that indicate whether resume
182 before |timeout| is ok.
183 Raises:
184 error.AutoservSuspendError - If |allow_early_resume| is False
185 and if device resumes before
186 |timeout|.
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700187 """
188 # define a function for the supend and run it in a group
189 def suspend():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700190 # pylint: disable=missing-docstring
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700191 self.record("GOOD", None, "suspend.start for %d seconds" % (timeout))
192 try:
J. Richard Barnette9af19632015-09-25 12:18:03 -0700193 self.run_background(suspend_cmd)
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700194 except error.AutoservRunError:
195 self.record("ABORT", None, "suspend.start",
196 "suspend command failed")
197 raise error.AutoservSuspendError("suspend command failed")
198
199 # Wait for some time, to ensure the machine is going to sleep.
200 # Not too long to check if the machine really suspended.
201 time_slice = min(timeout / 2, 300)
202 time.sleep(time_slice)
203 time_counter = time_slice
204 while time_counter < timeout + 60:
205 # Check if the machine is back. We check regularely to
206 # ensure the machine was suspended long enough.
207 if utils.ping(self.hostname, tries=1, deadline=1) == 0:
208 return
209 else:
210 if time_counter > timeout - 10:
211 time_slice = 5
212 time.sleep(time_slice)
213 time_counter += time_slice
214
215 if utils.ping(self.hostname, tries=1, deadline=1) != 0:
216 raise error.AutoservSuspendError(
217 "DUT is not responding after %d seconds" % (time_counter))
218
219 start_time = time.time()
220 self.log_op(self.OP_SUSPEND, suspend)
221 lasted = time.time() - start_time
Ravi Chandra Sadineni812e61b2018-07-09 11:16:50 -0700222 logging.info("Device resumed after %d secs", lasted)
223 if (lasted < timeout and not allow_early_resume):
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700224 raise error.AutoservSuspendError(
225 "Suspend did not last long enough: %d instead of %d" % (
226 lasted, timeout))
mblighf2c33762008-10-18 14:42:34 +0000227
jadmanski4f909252008-12-01 20:47:10 +0000228 def reboot_followup(self, *args, **dargs):
Richard Barnette73b35172018-07-27 10:59:01 -0700229 # pylint: disable=missing-docstring
jadmanski4f909252008-12-01 20:47:10 +0000230 super(RemoteHost, self).reboot_followup(*args, **dargs)
231 if self.job:
232 self.job.profilers.handle_reboot(self)
233
234
jadmanskid778ae42009-01-07 15:07:36 +0000235 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs):
jadmanskid60321a2008-10-28 20:32:05 +0000236 """
237 Wait for the host to come back from a reboot. This wraps the
238 generic wait_for_restart implementation in a reboot group.
239 """
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700240 def op_func():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700241 # pylint: disable=missing-docstring
jadmanskid778ae42009-01-07 15:07:36 +0000242 super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs)
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700243 self.log_op(self.OP_REBOOT, op_func)
mblighf2c33762008-10-18 14:42:34 +0000244
245
mbligh1264b512008-11-05 22:21:49 +0000246 def cleanup(self):
Richard Barnette73b35172018-07-27 10:59:01 -0700247 # pylint: disable=missing-docstring
mbligh1264b512008-11-05 22:21:49 +0000248 super(RemoteHost, self).cleanup()
249 self.reboot()
250
251
mblighe48bcfb2008-11-11 17:09:44 +0000252 def get_tmp_dir(self, parent='/tmp'):
mblighf2c33762008-10-18 14:42:34 +0000253 """
254 Return the pathname of a directory on the host suitable
255 for temporary file storage.
256
257 The directory and its content will be deleted automatically
258 on the destruction of the Host object that was used to obtain
259 it.
260 """
Alex Khouderchahc44e7772018-07-16 10:53:14 -0700261 template = os.path.join(parent, self.TMP_DIR_TEMPLATE)
Derek Beckett24c0e822020-06-10 12:54:04 -0700262 dir_name = self.run('mkdir -p %s && mktemp -d %s' % (parent, template)).stdout.rstrip()
mblighf2c33762008-10-18 14:42:34 +0000263 self.tmp_dirs.append(dir_name)
264 return dir_name
265
266
mbligh6b95b522010-02-19 19:17:41 +0000267 def get_platform_label(self):
268 """
269 Return the platform label, or None if platform label is not set.
270 """
271
272 if self.job:
273 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
274 self.hostname)
275 keyvals = utils.read_keyval(keyval_path)
276 return keyvals.get('platform', None)
277 else:
278 return None
279
280
Eric Li6f27d4f2010-09-29 10:55:17 -0700281 def get_all_labels(self):
282 """
283 Return all labels, or empty list if label is not set.
284 """
285 if self.job:
286 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
287 self.hostname)
288 keyvals = utils.read_keyval(keyval_path)
289 all_labels = keyvals.get('labels', '')
290 if all_labels:
Eric Li861b2d52011-02-04 14:50:35 -0800291 all_labels = all_labels.split(',')
292 return [urllib.unquote(label) for label in all_labels]
Eric Li6f27d4f2010-09-29 10:55:17 -0700293 return []
294
295
jadmanskiea455662009-03-25 22:25:39 +0000296 def delete_tmp_dir(self, tmpdir):
297 """
298 Delete the given temporary directory on the remote machine.
Richard Barnetteab9769f2016-06-01 15:01:44 -0700299
300 @param tmpdir The directory to delete.
jadmanskiea455662009-03-25 22:25:39 +0000301 """
302 self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True)
303 self.tmp_dirs.remove(tmpdir)
304
305
Alex Khouderchahc44e7772018-07-16 10:53:14 -0700306 def delete_all_tmp_dirs(self, parent='/tmp'):
307 """
308 Delete all directories in parent that were created by get_tmp_dir
309
310 Note that this may involve deleting directories created by calls to
311 get_tmp_dir on a different RemoteHost instance than the one running this
312 method. Only perform this operation when certain that this will not
313 cause unexpected behavior.
314 """
315 # follow mktemp's behavior of only expanding 3 or more consecutive Xs
Derek Beckett24c0e822020-06-10 12:54:04 -0700316 if isinstance(parent, (list, tuple)):
317 parents = parent
318 else:
319 parents = [parent]
320 rm_paths = []
321 for parent in parents:
322 base_template = re.sub('XXXX*', '*', self.TMP_DIR_TEMPLATE)
323 # distinguish between non-wildcard asterisks in parent directory name
324 # and wildcards inserted from the template
325 base = '*'.join(map(lambda x: '"%s"' % utils.sh_escape(x),
326 base_template.split('*')))
327 path = '"%s' % os.path.join(utils.sh_escape(parent), base[1:])
328 rm_paths.append(path)
329 # remove deleted directories from tmp_dirs
330 regex = os.path.join(parent, re.sub('(XXXX*)',
331 lambda match: '[a-zA-Z0-9]{%d}' % len(match.group(1)),
332 self.TMP_DIR_TEMPLATE))
333 regex += '(/|$)' # remove if matches, or is within a dir that matches
334 self.tmp_dirs = filter(lambda x: not re.match(regex, x), self.tmp_dirs)
Alex Khouderchahc44e7772018-07-16 10:53:14 -0700335
Derek Beckett24c0e822020-06-10 12:54:04 -0700336 self.run('rm -rf {}'.format(" ".join(rm_paths)), ignore_status=True)
Alex Khouderchahc44e7772018-07-16 10:53:14 -0700337
mblighf2c33762008-10-18 14:42:34 +0000338 def check_uptime(self):
339 """
340 Check that uptime is available and monotonically increasing.
341 """
mbligha43f6d22009-08-24 22:09:44 +0000342 if not self.is_up():
343 raise error.AutoservHostError('Client does not appear to be up')
mblighf2c33762008-10-18 14:42:34 +0000344 result = self.run("/bin/cat /proc/uptime", 30)
345 return result.stdout.strip().split()[0]
346
347
Puthikorn Voravootivatdf29d642014-04-25 11:45:36 -0700348 def check_for_lkdtm(self):
349 """
350 Check for kernel dump test module. return True if exist.
351 """
352 cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT'
353 return self.run(cmd, ignore_status=True).exit_status == 0
354
355
jadmanskica7da372008-10-21 16:26:52 +0000356 def are_wait_up_processes_up(self):
mblighf2c33762008-10-18 14:42:34 +0000357 """
358 Checks if any HOSTS waitup processes are running yet on the
359 remote host.
360
361 Returns True if any the waitup processes are running, False
362 otherwise.
363 """
364 processes = self.get_wait_up_processes()
365 if len(processes) == 0:
366 return True # wait up processes aren't being used
367 for procname in processes:
368 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
369 ignore_status=True).exit_status
370 if exit_status == 0:
371 return True
372 return False
Kevin Cheng3a4a57a2015-09-30 12:09:50 -0700373
374
375 def get_labels(self):
376 """Return a list of labels for this given host.
377
378 This is the main way to retrieve all the automatic labels for a host
379 as it will run through all the currently implemented label functions.
380 """
381 labels = []
382 for label_function in self._LABEL_FUNCTIONS:
383 try:
384 label = label_function(self)
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700385 except Exception:
386 logging.exception('Label function %s failed; ignoring it.',
387 label_function.__name__)
Kevin Cheng3a4a57a2015-09-30 12:09:50 -0700388 label = None
389 if label:
390 if type(label) is str:
391 labels.append(label)
392 elif type(label) is list:
393 labels.extend(label)
394 return labels