blob: 0d4994f1df4451817a0c725a09381047c7031ebf [file] [log] [blame]
Fang Deng0ca40e22013-08-27 17:47:44 -07001"""This class defines the Remote host class."""
mbligh321b1f52008-04-09 16:23:43 +00002
Andrey Ulanovad472902016-01-11 17:31:18 -08003import os, logging, urllib, time
mblighf2c33762008-10-18 14:42:34 +00004from autotest_lib.client.common_lib import error
jadmanski96b78072009-05-21 22:21:04 +00005from autotest_lib.server import utils
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -07006from autotest_lib.server.hosts import base_classes
mbligh321b1f52008-04-09 16:23:43 +00007
8
jadmanski1c5e3a12008-08-15 23:08:20 +00009class RemoteHost(base_classes.Host):
jadmanskid60321a2008-10-28 20:32:05 +000010 """
11 This class represents a remote machine on which you can run
jadmanski0afbb632008-06-06 21:10:57 +000012 programs.
mbligh321b1f52008-04-09 16:23:43 +000013
jadmanski0afbb632008-06-06 21:10:57 +000014 It may be accessed through a network, a serial line, ...
15 It is not the machine autoserv is running on.
mbligh321b1f52008-04-09 16:23:43 +000016
jadmanski0afbb632008-06-06 21:10:57 +000017 Implementation details:
18 This is an abstract class, leaf subclasses must implement the methods
19 listed here and in parent classes which have no implementation. They
20 may reimplement methods which already have an implementation. You
21 must not instantiate this class but should instantiate one of those
jadmanskid60321a2008-10-28 20:32:05 +000022 leaf subclasses.
23 """
mbligh321b1f52008-04-09 16:23:43 +000024
mblighf2c33762008-10-18 14:42:34 +000025 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
Eric Li6f27d4f2010-09-29 10:55:17 -070026 DEFAULT_HALT_TIMEOUT = 2 * 60
Kevin Cheng3a4a57a2015-09-30 12:09:50 -070027 _LABEL_FUNCTIONS = []
28 _DETECTABLE_LABELS = []
mblighf2c33762008-10-18 14:42:34 +000029
Eric Li861b2d52011-02-04 14:50:35 -080030 VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start"
jadmanski4900b3b2009-07-02 22:12:08 +000031
Kevin Cheng3a4a57a2015-09-30 12:09:50 -070032
jadmanskif6562912008-10-21 17:59:01 +000033 def _initialize(self, hostname, autodir=None, *args, **dargs):
34 super(RemoteHost, self)._initialize(*args, **dargs)
mbligh321b1f52008-04-09 16:23:43 +000035
jadmanski1c5e3a12008-08-15 23:08:20 +000036 self.hostname = hostname
mblighf2c33762008-10-18 14:42:34 +000037 self.autodir = autodir
38 self.tmp_dirs = []
jadmanskia2db9412008-08-22 21:47:24 +000039
40
jadmanskiedf33e02009-05-22 16:47:27 +000041 def __repr__(self):
42 return "<remote host: %s>" % self.hostname
43
44
jadmanski53aaf382008-11-17 16:22:31 +000045 def close(self):
46 super(RemoteHost, self).close()
mblighf2c33762008-10-18 14:42:34 +000047 self.stop_loggers()
48
49 if hasattr(self, 'tmp_dirs'):
50 for dir in self.tmp_dirs:
51 try:
Allen Liad719c12017-06-27 23:48:04 +000052 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
mblighf2c33762008-10-18 14:42:34 +000053 except error.AutoservRunError:
54 pass
55
56
jadmanskid60321a2008-10-28 20:32:05 +000057 def job_start(self):
58 """
59 Abstract method, called the first time a remote host object
60 is created for a specific host after a job starts.
61
62 This method depends on the create_host factory being used to
63 construct your host object. If you directly construct host objects
64 you will need to call this method yourself (and enforce the
65 single-call rule).
66 """
jadmanski4900b3b2009-07-02 22:12:08 +000067 try:
Andrey Ulanovad472902016-01-11 17:31:18 -080068 cmd = ('test ! -e /var/log/messages || cp -f /var/log/messages '
69 '%s') % self.VAR_LOG_MESSAGES_COPY_PATH
Allen Liad719c12017-06-27 23:48:04 +000070 self.run(cmd)
jadmanski4900b3b2009-07-02 22:12:08 +000071 except Exception, e:
72 # Non-fatal error
73 logging.info('Failed to copy /var/log/messages at startup: %s', e)
jadmanskid60321a2008-10-28 20:32:05 +000074
75
mblighf2c33762008-10-18 14:42:34 +000076 def get_autodir(self):
77 return self.autodir
78
79
80 def set_autodir(self, autodir):
jadmanskid60321a2008-10-28 20:32:05 +000081 """
mblighf2c33762008-10-18 14:42:34 +000082 This method is called to make the host object aware of the
83 where autotest is installed. Called in server/autotest.py
84 after a successful install
jadmanskid60321a2008-10-28 20:32:05 +000085 """
mblighf2c33762008-10-18 14:42:34 +000086 self.autodir = autodir
87
88
89 def sysrq_reboot(self):
J. Richard Barnette9af19632015-09-25 12:18:03 -070090 self.run_background('echo b > /proc/sysrq-trigger')
mblighf2c33762008-10-18 14:42:34 +000091
92
Eric Li6f27d4f2010-09-29 10:55:17 -070093 def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True):
Richard Barnetteab9769f2016-06-01 15:01:44 -070094 """
95 Shut down the remote host.
96
97 N.B. This method makes no provision to bring the target back
98 up. The target will be offline indefinitely if there's no
99 independent hardware (servo, RPM, etc.) to force the target to
100 power on.
101
102 @param timeout Maximum time to wait for host down, in seconds.
103 @param wait Whether to wait for the host to go offline.
104 """
J. Richard Barnette9af19632015-09-25 12:18:03 -0700105 self.run_background('sleep 1 ; halt')
Eric Li6f27d4f2010-09-29 10:55:17 -0700106 if wait:
107 self.wait_down(timeout=timeout)
108
109
Richard Barnetteab9769f2016-06-01 15:01:44 -0700110 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True,
111 fastsync=False, reboot_cmd=None, **dargs):
mblighf2c33762008-10-18 14:42:34 +0000112 """
113 Reboot the remote host.
114
115 Args:
116 timeout - How long to wait for the reboot.
mblighf2c33762008-10-18 14:42:34 +0000117 wait - Should we wait to see if the machine comes back up.
Hidehiko Abe3d512d32017-04-27 15:11:33 +0900118 If this is set to True, ignores reboot_cmd's error
119 even if occurs.
mbligh2b949772009-02-26 00:59:36 +0000120 fastsync - Don't wait for the sync to complete, just start one
121 and move on. This is for cases where rebooting prompty
122 is more important than data integrity and/or the
123 machine may have disks that cause sync to never return.
mbligh959ed872009-04-17 22:18:25 +0000124 reboot_cmd - Reboot command to execute.
mblighf2c33762008-10-18 14:42:34 +0000125 """
Richard Barnetteab9769f2016-06-01 15:01:44 -0700126 self.reboot_setup(**dargs)
J. Richard Barnette9af19632015-09-25 12:18:03 -0700127 if not reboot_cmd:
128 reboot_cmd = ('sync & sleep 5; '
129 'reboot & sleep 60; '
130 'reboot -f & sleep 10; '
131 'reboot -nf & sleep 10; '
132 'telinit 6')
133
mblighf2c33762008-10-18 14:42:34 +0000134 def reboot():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700135 # pylint: disable=missing-docstring
mblighf2c33762008-10-18 14:42:34 +0000136 self.record("GOOD", None, "reboot.start")
137 try:
jadmanskic0354912010-01-12 15:57:29 +0000138 current_boot_id = self.get_boot_id()
139
jadmanskid544a352009-01-14 23:36:28 +0000140 # sync before starting the reboot, so that a long sync during
141 # shutdown isn't timed out by wait_down's short timeout
mbligh2b949772009-02-26 00:59:36 +0000142 if not fastsync:
mbligh959ed872009-04-17 22:18:25 +0000143 self.run('sync; sync', timeout=timeout, ignore_status=True)
jadmanskid544a352009-01-14 23:36:28 +0000144
J. Richard Barnette9af19632015-09-25 12:18:03 -0700145 self.run_background(reboot_cmd)
mblighf2c33762008-10-18 14:42:34 +0000146 except error.AutoservRunError:
Hidehiko Abe3d512d32017-04-27 15:11:33 +0900147 # If wait is set, ignore the error here, and rely on the
148 # wait_for_restart() for stability, instead.
149 # reboot_cmd sometimes causes an error even if reboot is
150 # successfully in progress. This is difficult to be avoided,
151 # because we have no much control on remote machine after
152 # "reboot" starts.
153 if not wait:
154 # TODO(b/37652392): Revisit no-wait case, later.
155 self.record("ABORT", None, "reboot.start",
156 "reboot command failed")
157 raise
mblighf2c33762008-10-18 14:42:34 +0000158 if wait:
jadmanskic0354912010-01-12 15:57:29 +0000159 self.wait_for_restart(timeout, old_boot_id=current_boot_id,
160 **dargs)
mblighf2c33762008-10-18 14:42:34 +0000161
162 # if this is a full reboot-and-wait, run the reboot inside a group
163 if wait:
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700164 self.log_op(self.OP_REBOOT, reboot)
mblighf2c33762008-10-18 14:42:34 +0000165 else:
166 reboot()
167
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700168 def suspend(self, timeout, suspend_cmd, **dargs):
169 """
170 Suspend the remote host.
171
172 Args:
173 timeout - How long to wait for the suspend.
174 susped_cmd - suspend command to execute.
175 """
176 # define a function for the supend and run it in a group
177 def suspend():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700178 # pylint: disable=missing-docstring
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700179 self.record("GOOD", None, "suspend.start for %d seconds" % (timeout))
180 try:
J. Richard Barnette9af19632015-09-25 12:18:03 -0700181 self.run_background(suspend_cmd)
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700182 except error.AutoservRunError:
183 self.record("ABORT", None, "suspend.start",
184 "suspend command failed")
185 raise error.AutoservSuspendError("suspend command failed")
186
187 # Wait for some time, to ensure the machine is going to sleep.
188 # Not too long to check if the machine really suspended.
189 time_slice = min(timeout / 2, 300)
190 time.sleep(time_slice)
191 time_counter = time_slice
192 while time_counter < timeout + 60:
193 # Check if the machine is back. We check regularely to
194 # ensure the machine was suspended long enough.
195 if utils.ping(self.hostname, tries=1, deadline=1) == 0:
196 return
197 else:
198 if time_counter > timeout - 10:
199 time_slice = 5
200 time.sleep(time_slice)
201 time_counter += time_slice
202
203 if utils.ping(self.hostname, tries=1, deadline=1) != 0:
204 raise error.AutoservSuspendError(
205 "DUT is not responding after %d seconds" % (time_counter))
206
207 start_time = time.time()
208 self.log_op(self.OP_SUSPEND, suspend)
209 lasted = time.time() - start_time
210 if (lasted < timeout):
211 raise error.AutoservSuspendError(
212 "Suspend did not last long enough: %d instead of %d" % (
213 lasted, timeout))
mblighf2c33762008-10-18 14:42:34 +0000214
jadmanski4f909252008-12-01 20:47:10 +0000215 def reboot_followup(self, *args, **dargs):
216 super(RemoteHost, self).reboot_followup(*args, **dargs)
217 if self.job:
218 self.job.profilers.handle_reboot(self)
219
220
jadmanskid778ae42009-01-07 15:07:36 +0000221 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs):
jadmanskid60321a2008-10-28 20:32:05 +0000222 """
223 Wait for the host to come back from a reboot. This wraps the
224 generic wait_for_restart implementation in a reboot group.
225 """
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700226 def op_func():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700227 # pylint: disable=missing-docstring
jadmanskid778ae42009-01-07 15:07:36 +0000228 super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs)
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700229 self.log_op(self.OP_REBOOT, op_func)
mblighf2c33762008-10-18 14:42:34 +0000230
231
mbligh1264b512008-11-05 22:21:49 +0000232 def cleanup(self):
233 super(RemoteHost, self).cleanup()
234 self.reboot()
235
236
mblighe48bcfb2008-11-11 17:09:44 +0000237 def get_tmp_dir(self, parent='/tmp'):
mblighf2c33762008-10-18 14:42:34 +0000238 """
239 Return the pathname of a directory on the host suitable
240 for temporary file storage.
241
242 The directory and its content will be deleted automatically
243 on the destruction of the Host object that was used to obtain
244 it.
245 """
Allen Liad719c12017-06-27 23:48:04 +0000246 self.run("mkdir -p %s" % parent)
mblighe48bcfb2008-11-11 17:09:44 +0000247 template = os.path.join(parent, 'autoserv-XXXXXX')
Allen Liad719c12017-06-27 23:48:04 +0000248 dir_name = self.run("mktemp -d %s" % template).stdout.rstrip()
mblighf2c33762008-10-18 14:42:34 +0000249 self.tmp_dirs.append(dir_name)
250 return dir_name
251
252
mbligh6b95b522010-02-19 19:17:41 +0000253 def get_platform_label(self):
254 """
255 Return the platform label, or None if platform label is not set.
256 """
257
258 if self.job:
259 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
260 self.hostname)
261 keyvals = utils.read_keyval(keyval_path)
262 return keyvals.get('platform', None)
263 else:
264 return None
265
266
Eric Li6f27d4f2010-09-29 10:55:17 -0700267 def get_all_labels(self):
268 """
269 Return all labels, or empty list if label is not set.
270 """
271 if self.job:
272 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
273 self.hostname)
274 keyvals = utils.read_keyval(keyval_path)
275 all_labels = keyvals.get('labels', '')
276 if all_labels:
Eric Li861b2d52011-02-04 14:50:35 -0800277 all_labels = all_labels.split(',')
278 return [urllib.unquote(label) for label in all_labels]
Eric Li6f27d4f2010-09-29 10:55:17 -0700279 return []
280
281
jadmanskiea455662009-03-25 22:25:39 +0000282 def delete_tmp_dir(self, tmpdir):
283 """
284 Delete the given temporary directory on the remote machine.
Richard Barnetteab9769f2016-06-01 15:01:44 -0700285
286 @param tmpdir The directory to delete.
jadmanskiea455662009-03-25 22:25:39 +0000287 """
288 self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True)
289 self.tmp_dirs.remove(tmpdir)
290
291
mblighf2c33762008-10-18 14:42:34 +0000292 def check_uptime(self):
293 """
294 Check that uptime is available and monotonically increasing.
295 """
mbligha43f6d22009-08-24 22:09:44 +0000296 if not self.is_up():
297 raise error.AutoservHostError('Client does not appear to be up')
mblighf2c33762008-10-18 14:42:34 +0000298 result = self.run("/bin/cat /proc/uptime", 30)
299 return result.stdout.strip().split()[0]
300
301
Puthikorn Voravootivatdf29d642014-04-25 11:45:36 -0700302 def check_for_lkdtm(self):
303 """
304 Check for kernel dump test module. return True if exist.
305 """
306 cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT'
307 return self.run(cmd, ignore_status=True).exit_status == 0
308
309
jadmanskica7da372008-10-21 16:26:52 +0000310 def are_wait_up_processes_up(self):
mblighf2c33762008-10-18 14:42:34 +0000311 """
312 Checks if any HOSTS waitup processes are running yet on the
313 remote host.
314
315 Returns True if any the waitup processes are running, False
316 otherwise.
317 """
318 processes = self.get_wait_up_processes()
319 if len(processes) == 0:
320 return True # wait up processes aren't being used
321 for procname in processes:
322 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
323 ignore_status=True).exit_status
324 if exit_status == 0:
325 return True
326 return False
Kevin Cheng3a4a57a2015-09-30 12:09:50 -0700327
328
329 def get_labels(self):
330 """Return a list of labels for this given host.
331
332 This is the main way to retrieve all the automatic labels for a host
333 as it will run through all the currently implemented label functions.
334 """
335 labels = []
336 for label_function in self._LABEL_FUNCTIONS:
337 try:
338 label = label_function(self)
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700339 except Exception:
340 logging.exception('Label function %s failed; ignoring it.',
341 label_function.__name__)
Kevin Cheng3a4a57a2015-09-30 12:09:50 -0700342 label = None
343 if label:
344 if type(label) is str:
345 labels.append(label)
346 elif type(label) is list:
347 labels.extend(label)
348 return labels