blob: b6aff7922bd75f71f945157ca9629dd2323c485b [file] [log] [blame]
Fang Deng0ca40e22013-08-27 17:47:44 -07001"""This class defines the Remote host class."""
mbligh321b1f52008-04-09 16:23:43 +00002
Andrey Ulanovad472902016-01-11 17:31:18 -08003import os, logging, urllib, time
mblighf2c33762008-10-18 14:42:34 +00004from autotest_lib.client.common_lib import error
jadmanski96b78072009-05-21 22:21:04 +00005from autotest_lib.server import utils
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -07006from autotest_lib.server.hosts import base_classes
mbligh321b1f52008-04-09 16:23:43 +00007
8
jadmanski1c5e3a12008-08-15 23:08:20 +00009class RemoteHost(base_classes.Host):
jadmanskid60321a2008-10-28 20:32:05 +000010 """
11 This class represents a remote machine on which you can run
jadmanski0afbb632008-06-06 21:10:57 +000012 programs.
mbligh321b1f52008-04-09 16:23:43 +000013
jadmanski0afbb632008-06-06 21:10:57 +000014 It may be accessed through a network, a serial line, ...
15 It is not the machine autoserv is running on.
mbligh321b1f52008-04-09 16:23:43 +000016
jadmanski0afbb632008-06-06 21:10:57 +000017 Implementation details:
18 This is an abstract class, leaf subclasses must implement the methods
19 listed here and in parent classes which have no implementation. They
20 may reimplement methods which already have an implementation. You
21 must not instantiate this class but should instantiate one of those
jadmanskid60321a2008-10-28 20:32:05 +000022 leaf subclasses.
23 """
mbligh321b1f52008-04-09 16:23:43 +000024
mblighf2c33762008-10-18 14:42:34 +000025 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
Eric Li6f27d4f2010-09-29 10:55:17 -070026 DEFAULT_HALT_TIMEOUT = 2 * 60
Kevin Cheng3a4a57a2015-09-30 12:09:50 -070027 _LABEL_FUNCTIONS = []
28 _DETECTABLE_LABELS = []
mblighf2c33762008-10-18 14:42:34 +000029
Eric Li861b2d52011-02-04 14:50:35 -080030 VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start"
jadmanski4900b3b2009-07-02 22:12:08 +000031
Kevin Cheng3a4a57a2015-09-30 12:09:50 -070032
jadmanskif6562912008-10-21 17:59:01 +000033 def _initialize(self, hostname, autodir=None, *args, **dargs):
34 super(RemoteHost, self)._initialize(*args, **dargs)
mbligh321b1f52008-04-09 16:23:43 +000035
jadmanski1c5e3a12008-08-15 23:08:20 +000036 self.hostname = hostname
mblighf2c33762008-10-18 14:42:34 +000037 self.autodir = autodir
38 self.tmp_dirs = []
jadmanskia2db9412008-08-22 21:47:24 +000039
40
jadmanskiedf33e02009-05-22 16:47:27 +000041 def __repr__(self):
42 return "<remote host: %s>" % self.hostname
43
44
jadmanski53aaf382008-11-17 16:22:31 +000045 def close(self):
46 super(RemoteHost, self).close()
mblighf2c33762008-10-18 14:42:34 +000047 self.stop_loggers()
48
49 if hasattr(self, 'tmp_dirs'):
50 for dir in self.tmp_dirs:
51 try:
Allen Liad719c12017-06-27 23:48:04 +000052 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
mblighf2c33762008-10-18 14:42:34 +000053 except error.AutoservRunError:
54 pass
55
56
jadmanskid60321a2008-10-28 20:32:05 +000057 def job_start(self):
58 """
59 Abstract method, called the first time a remote host object
60 is created for a specific host after a job starts.
61
62 This method depends on the create_host factory being used to
63 construct your host object. If you directly construct host objects
64 you will need to call this method yourself (and enforce the
65 single-call rule).
66 """
jadmanski4900b3b2009-07-02 22:12:08 +000067 try:
Andrey Ulanovad472902016-01-11 17:31:18 -080068 cmd = ('test ! -e /var/log/messages || cp -f /var/log/messages '
69 '%s') % self.VAR_LOG_MESSAGES_COPY_PATH
Allen Liad719c12017-06-27 23:48:04 +000070 self.run(cmd)
jadmanski4900b3b2009-07-02 22:12:08 +000071 except Exception, e:
72 # Non-fatal error
73 logging.info('Failed to copy /var/log/messages at startup: %s', e)
jadmanskid60321a2008-10-28 20:32:05 +000074
75
mblighf2c33762008-10-18 14:42:34 +000076 def get_autodir(self):
77 return self.autodir
78
79
80 def set_autodir(self, autodir):
jadmanskid60321a2008-10-28 20:32:05 +000081 """
mblighf2c33762008-10-18 14:42:34 +000082 This method is called to make the host object aware of the
83 where autotest is installed. Called in server/autotest.py
84 after a successful install
jadmanskid60321a2008-10-28 20:32:05 +000085 """
mblighf2c33762008-10-18 14:42:34 +000086 self.autodir = autodir
87
88
89 def sysrq_reboot(self):
J. Richard Barnette9af19632015-09-25 12:18:03 -070090 self.run_background('echo b > /proc/sysrq-trigger')
mblighf2c33762008-10-18 14:42:34 +000091
92
Eric Li6f27d4f2010-09-29 10:55:17 -070093 def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True):
Richard Barnetteab9769f2016-06-01 15:01:44 -070094 """
95 Shut down the remote host.
96
97 N.B. This method makes no provision to bring the target back
98 up. The target will be offline indefinitely if there's no
99 independent hardware (servo, RPM, etc.) to force the target to
100 power on.
101
102 @param timeout Maximum time to wait for host down, in seconds.
103 @param wait Whether to wait for the host to go offline.
104 """
J. Richard Barnette9af19632015-09-25 12:18:03 -0700105 self.run_background('sleep 1 ; halt')
Eric Li6f27d4f2010-09-29 10:55:17 -0700106 if wait:
107 self.wait_down(timeout=timeout)
108
109
Richard Barnetteab9769f2016-06-01 15:01:44 -0700110 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True,
111 fastsync=False, reboot_cmd=None, **dargs):
mblighf2c33762008-10-18 14:42:34 +0000112 """
113 Reboot the remote host.
114
115 Args:
116 timeout - How long to wait for the reboot.
mblighf2c33762008-10-18 14:42:34 +0000117 wait - Should we wait to see if the machine comes back up.
Hidehiko Abe3d512d32017-04-27 15:11:33 +0900118 If this is set to True, ignores reboot_cmd's error
119 even if occurs.
mbligh2b949772009-02-26 00:59:36 +0000120 fastsync - Don't wait for the sync to complete, just start one
121 and move on. This is for cases where rebooting prompty
122 is more important than data integrity and/or the
123 machine may have disks that cause sync to never return.
mbligh959ed872009-04-17 22:18:25 +0000124 reboot_cmd - Reboot command to execute.
mblighf2c33762008-10-18 14:42:34 +0000125 """
Richard Barnetteab9769f2016-06-01 15:01:44 -0700126 self.reboot_setup(**dargs)
J. Richard Barnette9af19632015-09-25 12:18:03 -0700127 if not reboot_cmd:
128 reboot_cmd = ('sync & sleep 5; '
129 'reboot & sleep 60; '
130 'reboot -f & sleep 10; '
131 'reboot -nf & sleep 10; '
132 'telinit 6')
133
mblighf2c33762008-10-18 14:42:34 +0000134 def reboot():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700135 # pylint: disable=missing-docstring
mblighf2c33762008-10-18 14:42:34 +0000136 self.record("GOOD", None, "reboot.start")
137 try:
jadmanskic0354912010-01-12 15:57:29 +0000138 current_boot_id = self.get_boot_id()
139
jadmanskid544a352009-01-14 23:36:28 +0000140 # sync before starting the reboot, so that a long sync during
141 # shutdown isn't timed out by wait_down's short timeout
mbligh2b949772009-02-26 00:59:36 +0000142 if not fastsync:
mbligh959ed872009-04-17 22:18:25 +0000143 self.run('sync; sync', timeout=timeout, ignore_status=True)
jadmanskid544a352009-01-14 23:36:28 +0000144
J. Richard Barnette9af19632015-09-25 12:18:03 -0700145 self.run_background(reboot_cmd)
mblighf2c33762008-10-18 14:42:34 +0000146 except error.AutoservRunError:
Hidehiko Abe3d512d32017-04-27 15:11:33 +0900147 # If wait is set, ignore the error here, and rely on the
148 # wait_for_restart() for stability, instead.
149 # reboot_cmd sometimes causes an error even if reboot is
150 # successfully in progress. This is difficult to be avoided,
151 # because we have no much control on remote machine after
152 # "reboot" starts.
153 if not wait:
154 # TODO(b/37652392): Revisit no-wait case, later.
155 self.record("ABORT", None, "reboot.start",
156 "reboot command failed")
157 raise
mblighf2c33762008-10-18 14:42:34 +0000158 if wait:
jadmanskic0354912010-01-12 15:57:29 +0000159 self.wait_for_restart(timeout, old_boot_id=current_boot_id,
160 **dargs)
mblighf2c33762008-10-18 14:42:34 +0000161
162 # if this is a full reboot-and-wait, run the reboot inside a group
163 if wait:
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700164 self.log_op(self.OP_REBOOT, reboot)
mblighf2c33762008-10-18 14:42:34 +0000165 else:
166 reboot()
167
Ravi Chandra Sadineni812e61b2018-07-09 11:16:50 -0700168 def suspend(self, timeout, suspend_cmd,
169 allow_early_resume=False):
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700170 """
171 Suspend the remote host.
172
173 Args:
Ravi Chandra Sadineni812e61b2018-07-09 11:16:50 -0700174 timeout - How long to wait for the suspend in integer seconds.
175 suspend_cmd - suspend command to execute.
176 allow_early_resume - Boolean that indicate whether resume
177 before |timeout| is ok.
178 Raises:
179 error.AutoservSuspendError - If |allow_early_resume| is False
180 and if device resumes before
181 |timeout|.
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700182 """
183 # define a function for the supend and run it in a group
184 def suspend():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700185 # pylint: disable=missing-docstring
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700186 self.record("GOOD", None, "suspend.start for %d seconds" % (timeout))
187 try:
J. Richard Barnette9af19632015-09-25 12:18:03 -0700188 self.run_background(suspend_cmd)
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700189 except error.AutoservRunError:
190 self.record("ABORT", None, "suspend.start",
191 "suspend command failed")
192 raise error.AutoservSuspendError("suspend command failed")
193
194 # Wait for some time, to ensure the machine is going to sleep.
195 # Not too long to check if the machine really suspended.
196 time_slice = min(timeout / 2, 300)
197 time.sleep(time_slice)
198 time_counter = time_slice
199 while time_counter < timeout + 60:
200 # Check if the machine is back. We check regularely to
201 # ensure the machine was suspended long enough.
202 if utils.ping(self.hostname, tries=1, deadline=1) == 0:
203 return
204 else:
205 if time_counter > timeout - 10:
206 time_slice = 5
207 time.sleep(time_slice)
208 time_counter += time_slice
209
210 if utils.ping(self.hostname, tries=1, deadline=1) != 0:
211 raise error.AutoservSuspendError(
212 "DUT is not responding after %d seconds" % (time_counter))
213
214 start_time = time.time()
215 self.log_op(self.OP_SUSPEND, suspend)
216 lasted = time.time() - start_time
Ravi Chandra Sadineni812e61b2018-07-09 11:16:50 -0700217 logging.info("Device resumed after %d secs", lasted)
218 if (lasted < timeout and not allow_early_resume):
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700219 raise error.AutoservSuspendError(
220 "Suspend did not last long enough: %d instead of %d" % (
221 lasted, timeout))
mblighf2c33762008-10-18 14:42:34 +0000222
jadmanski4f909252008-12-01 20:47:10 +0000223 def reboot_followup(self, *args, **dargs):
224 super(RemoteHost, self).reboot_followup(*args, **dargs)
225 if self.job:
226 self.job.profilers.handle_reboot(self)
227
228
jadmanskid778ae42009-01-07 15:07:36 +0000229 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs):
jadmanskid60321a2008-10-28 20:32:05 +0000230 """
231 Wait for the host to come back from a reboot. This wraps the
232 generic wait_for_restart implementation in a reboot group.
233 """
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700234 def op_func():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700235 # pylint: disable=missing-docstring
jadmanskid778ae42009-01-07 15:07:36 +0000236 super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs)
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700237 self.log_op(self.OP_REBOOT, op_func)
mblighf2c33762008-10-18 14:42:34 +0000238
239
mbligh1264b512008-11-05 22:21:49 +0000240 def cleanup(self):
241 super(RemoteHost, self).cleanup()
242 self.reboot()
243
244
mblighe48bcfb2008-11-11 17:09:44 +0000245 def get_tmp_dir(self, parent='/tmp'):
mblighf2c33762008-10-18 14:42:34 +0000246 """
247 Return the pathname of a directory on the host suitable
248 for temporary file storage.
249
250 The directory and its content will be deleted automatically
251 on the destruction of the Host object that was used to obtain
252 it.
253 """
Allen Liad719c12017-06-27 23:48:04 +0000254 self.run("mkdir -p %s" % parent)
mblighe48bcfb2008-11-11 17:09:44 +0000255 template = os.path.join(parent, 'autoserv-XXXXXX')
Allen Liad719c12017-06-27 23:48:04 +0000256 dir_name = self.run("mktemp -d %s" % template).stdout.rstrip()
mblighf2c33762008-10-18 14:42:34 +0000257 self.tmp_dirs.append(dir_name)
258 return dir_name
259
260
mbligh6b95b522010-02-19 19:17:41 +0000261 def get_platform_label(self):
262 """
263 Return the platform label, or None if platform label is not set.
264 """
265
266 if self.job:
267 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
268 self.hostname)
269 keyvals = utils.read_keyval(keyval_path)
270 return keyvals.get('platform', None)
271 else:
272 return None
273
274
Eric Li6f27d4f2010-09-29 10:55:17 -0700275 def get_all_labels(self):
276 """
277 Return all labels, or empty list if label is not set.
278 """
279 if self.job:
280 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
281 self.hostname)
282 keyvals = utils.read_keyval(keyval_path)
283 all_labels = keyvals.get('labels', '')
284 if all_labels:
Eric Li861b2d52011-02-04 14:50:35 -0800285 all_labels = all_labels.split(',')
286 return [urllib.unquote(label) for label in all_labels]
Eric Li6f27d4f2010-09-29 10:55:17 -0700287 return []
288
289
jadmanskiea455662009-03-25 22:25:39 +0000290 def delete_tmp_dir(self, tmpdir):
291 """
292 Delete the given temporary directory on the remote machine.
Richard Barnetteab9769f2016-06-01 15:01:44 -0700293
294 @param tmpdir The directory to delete.
jadmanskiea455662009-03-25 22:25:39 +0000295 """
296 self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True)
297 self.tmp_dirs.remove(tmpdir)
298
299
mblighf2c33762008-10-18 14:42:34 +0000300 def check_uptime(self):
301 """
302 Check that uptime is available and monotonically increasing.
303 """
mbligha43f6d22009-08-24 22:09:44 +0000304 if not self.is_up():
305 raise error.AutoservHostError('Client does not appear to be up')
mblighf2c33762008-10-18 14:42:34 +0000306 result = self.run("/bin/cat /proc/uptime", 30)
307 return result.stdout.strip().split()[0]
308
309
Puthikorn Voravootivatdf29d642014-04-25 11:45:36 -0700310 def check_for_lkdtm(self):
311 """
312 Check for kernel dump test module. return True if exist.
313 """
314 cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT'
315 return self.run(cmd, ignore_status=True).exit_status == 0
316
317
jadmanskica7da372008-10-21 16:26:52 +0000318 def are_wait_up_processes_up(self):
mblighf2c33762008-10-18 14:42:34 +0000319 """
320 Checks if any HOSTS waitup processes are running yet on the
321 remote host.
322
323 Returns True if any the waitup processes are running, False
324 otherwise.
325 """
326 processes = self.get_wait_up_processes()
327 if len(processes) == 0:
328 return True # wait up processes aren't being used
329 for procname in processes:
330 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
331 ignore_status=True).exit_status
332 if exit_status == 0:
333 return True
334 return False
Kevin Cheng3a4a57a2015-09-30 12:09:50 -0700335
336
337 def get_labels(self):
338 """Return a list of labels for this given host.
339
340 This is the main way to retrieve all the automatic labels for a host
341 as it will run through all the currently implemented label functions.
342 """
343 labels = []
344 for label_function in self._LABEL_FUNCTIONS:
345 try:
346 label = label_function(self)
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700347 except Exception:
348 logging.exception('Label function %s failed; ignoring it.',
349 label_function.__name__)
Kevin Cheng3a4a57a2015-09-30 12:09:50 -0700350 label = None
351 if label:
352 if type(label) is str:
353 labels.append(label)
354 elif type(label) is list:
355 labels.extend(label)
356 return labels