blob: 2c5dcb7453ebfbcad7ea3b43bb8c0c15595f5af6 [file] [log] [blame]
Fang Deng0ca40e22013-08-27 17:47:44 -07001"""This class defines the Remote host class."""
mbligh321b1f52008-04-09 16:23:43 +00002
Andrey Ulanovad472902016-01-11 17:31:18 -08003import os, logging, urllib, time
mblighf2c33762008-10-18 14:42:34 +00004from autotest_lib.client.common_lib import error
jadmanski96b78072009-05-21 22:21:04 +00005from autotest_lib.server import utils
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -07006from autotest_lib.server.hosts import base_classes
mbligh321b1f52008-04-09 16:23:43 +00007
8
jadmanski1c5e3a12008-08-15 23:08:20 +00009class RemoteHost(base_classes.Host):
jadmanskid60321a2008-10-28 20:32:05 +000010 """
11 This class represents a remote machine on which you can run
jadmanski0afbb632008-06-06 21:10:57 +000012 programs.
mbligh321b1f52008-04-09 16:23:43 +000013
jadmanski0afbb632008-06-06 21:10:57 +000014 It may be accessed through a network, a serial line, ...
15 It is not the machine autoserv is running on.
mbligh321b1f52008-04-09 16:23:43 +000016
jadmanski0afbb632008-06-06 21:10:57 +000017 Implementation details:
18 This is an abstract class, leaf subclasses must implement the methods
19 listed here and in parent classes which have no implementation. They
20 may reimplement methods which already have an implementation. You
21 must not instantiate this class but should instantiate one of those
jadmanskid60321a2008-10-28 20:32:05 +000022 leaf subclasses.
23 """
mbligh321b1f52008-04-09 16:23:43 +000024
mblighf2c33762008-10-18 14:42:34 +000025 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
Eric Li6f27d4f2010-09-29 10:55:17 -070026 DEFAULT_HALT_TIMEOUT = 2 * 60
Kevin Cheng3a4a57a2015-09-30 12:09:50 -070027 _LABEL_FUNCTIONS = []
28 _DETECTABLE_LABELS = []
mblighf2c33762008-10-18 14:42:34 +000029
Eric Li861b2d52011-02-04 14:50:35 -080030 VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start"
jadmanski4900b3b2009-07-02 22:12:08 +000031
Kevin Cheng3a4a57a2015-09-30 12:09:50 -070032
jadmanskif6562912008-10-21 17:59:01 +000033 def _initialize(self, hostname, autodir=None, *args, **dargs):
34 super(RemoteHost, self)._initialize(*args, **dargs)
mbligh321b1f52008-04-09 16:23:43 +000035
jadmanski1c5e3a12008-08-15 23:08:20 +000036 self.hostname = hostname
mblighf2c33762008-10-18 14:42:34 +000037 self.autodir = autodir
38 self.tmp_dirs = []
jadmanskia2db9412008-08-22 21:47:24 +000039
40
jadmanskiedf33e02009-05-22 16:47:27 +000041 def __repr__(self):
42 return "<remote host: %s>" % self.hostname
43
44
jadmanski53aaf382008-11-17 16:22:31 +000045 def close(self):
46 super(RemoteHost, self).close()
mblighf2c33762008-10-18 14:42:34 +000047 self.stop_loggers()
48
49 if hasattr(self, 'tmp_dirs'):
50 for dir in self.tmp_dirs:
51 try:
Po-Hsien Wang672fb5f2017-06-22 12:03:09 -070052 self.run_very_slowly('rm -rf "%s"' % (utils.sh_escape(dir)))
mblighf2c33762008-10-18 14:42:34 +000053 except error.AutoservRunError:
54 pass
55
56
jadmanskid60321a2008-10-28 20:32:05 +000057 def job_start(self):
58 """
59 Abstract method, called the first time a remote host object
60 is created for a specific host after a job starts.
61
62 This method depends on the create_host factory being used to
63 construct your host object. If you directly construct host objects
64 you will need to call this method yourself (and enforce the
65 single-call rule).
66 """
jadmanski4900b3b2009-07-02 22:12:08 +000067 try:
Andrey Ulanovad472902016-01-11 17:31:18 -080068 cmd = ('test ! -e /var/log/messages || cp -f /var/log/messages '
69 '%s') % self.VAR_LOG_MESSAGES_COPY_PATH
Po-Hsien Wang672fb5f2017-06-22 12:03:09 -070070 self.run_very_slowly(cmd)
jadmanski4900b3b2009-07-02 22:12:08 +000071 except Exception, e:
72 # Non-fatal error
73 logging.info('Failed to copy /var/log/messages at startup: %s', e)
jadmanskid60321a2008-10-28 20:32:05 +000074
75
mblighf2c33762008-10-18 14:42:34 +000076 def get_autodir(self):
77 return self.autodir
78
79
80 def set_autodir(self, autodir):
jadmanskid60321a2008-10-28 20:32:05 +000081 """
mblighf2c33762008-10-18 14:42:34 +000082 This method is called to make the host object aware of the
83 where autotest is installed. Called in server/autotest.py
84 after a successful install
jadmanskid60321a2008-10-28 20:32:05 +000085 """
mblighf2c33762008-10-18 14:42:34 +000086 self.autodir = autodir
87
88
89 def sysrq_reboot(self):
J. Richard Barnette9af19632015-09-25 12:18:03 -070090 self.run_background('echo b > /proc/sysrq-trigger')
mblighf2c33762008-10-18 14:42:34 +000091
92
Eric Li6f27d4f2010-09-29 10:55:17 -070093 def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True):
Richard Barnetteab9769f2016-06-01 15:01:44 -070094 """
95 Shut down the remote host.
96
97 N.B. This method makes no provision to bring the target back
98 up. The target will be offline indefinitely if there's no
99 independent hardware (servo, RPM, etc.) to force the target to
100 power on.
101
102 @param timeout Maximum time to wait for host down, in seconds.
103 @param wait Whether to wait for the host to go offline.
104 """
J. Richard Barnette9af19632015-09-25 12:18:03 -0700105 self.run_background('sleep 1 ; halt')
Eric Li6f27d4f2010-09-29 10:55:17 -0700106 if wait:
107 self.wait_down(timeout=timeout)
108
109
Richard Barnetteab9769f2016-06-01 15:01:44 -0700110 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True,
111 fastsync=False, reboot_cmd=None, **dargs):
mblighf2c33762008-10-18 14:42:34 +0000112 """
113 Reboot the remote host.
114
115 Args:
116 timeout - How long to wait for the reboot.
mblighf2c33762008-10-18 14:42:34 +0000117 wait - Should we wait to see if the machine comes back up.
Hidehiko Abe3d512d32017-04-27 15:11:33 +0900118 If this is set to True, ignores reboot_cmd's error
119 even if occurs.
mbligh2b949772009-02-26 00:59:36 +0000120 fastsync - Don't wait for the sync to complete, just start one
121 and move on. This is for cases where rebooting prompty
122 is more important than data integrity and/or the
123 machine may have disks that cause sync to never return.
mbligh959ed872009-04-17 22:18:25 +0000124 reboot_cmd - Reboot command to execute.
mblighf2c33762008-10-18 14:42:34 +0000125 """
Richard Barnetteab9769f2016-06-01 15:01:44 -0700126 self.reboot_setup(**dargs)
J. Richard Barnette9af19632015-09-25 12:18:03 -0700127 if not reboot_cmd:
128 reboot_cmd = ('sync & sleep 5; '
129 'reboot & sleep 60; '
130 'reboot -f & sleep 10; '
131 'reboot -nf & sleep 10; '
132 'telinit 6')
133
mblighf2c33762008-10-18 14:42:34 +0000134 def reboot():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700135 # pylint: disable=missing-docstring
mblighf2c33762008-10-18 14:42:34 +0000136 self.record("GOOD", None, "reboot.start")
137 try:
jadmanskic0354912010-01-12 15:57:29 +0000138 current_boot_id = self.get_boot_id()
139
jadmanskid544a352009-01-14 23:36:28 +0000140 # sync before starting the reboot, so that a long sync during
141 # shutdown isn't timed out by wait_down's short timeout
mbligh2b949772009-02-26 00:59:36 +0000142 if not fastsync:
mbligh959ed872009-04-17 22:18:25 +0000143 self.run('sync; sync', timeout=timeout, ignore_status=True)
jadmanskid544a352009-01-14 23:36:28 +0000144
J. Richard Barnette9af19632015-09-25 12:18:03 -0700145 self.run_background(reboot_cmd)
mblighf2c33762008-10-18 14:42:34 +0000146 except error.AutoservRunError:
Hidehiko Abe3d512d32017-04-27 15:11:33 +0900147 # If wait is set, ignore the error here, and rely on the
148 # wait_for_restart() for stability, instead.
149 # reboot_cmd sometimes causes an error even if reboot is
150 # successfully in progress. This is difficult to be avoided,
151 # because we have no much control on remote machine after
152 # "reboot" starts.
153 if not wait:
154 # TODO(b/37652392): Revisit no-wait case, later.
155 self.record("ABORT", None, "reboot.start",
156 "reboot command failed")
157 raise
mblighf2c33762008-10-18 14:42:34 +0000158 if wait:
jadmanskic0354912010-01-12 15:57:29 +0000159 self.wait_for_restart(timeout, old_boot_id=current_boot_id,
160 **dargs)
mblighf2c33762008-10-18 14:42:34 +0000161
162 # if this is a full reboot-and-wait, run the reboot inside a group
163 if wait:
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700164 self.log_op(self.OP_REBOOT, reboot)
mblighf2c33762008-10-18 14:42:34 +0000165 else:
166 reboot()
167
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700168 def suspend(self, timeout, suspend_cmd, **dargs):
169 """
170 Suspend the remote host.
171
172 Args:
173 timeout - How long to wait for the suspend.
174 susped_cmd - suspend command to execute.
175 """
176 # define a function for the supend and run it in a group
177 def suspend():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700178 # pylint: disable=missing-docstring
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700179 self.record("GOOD", None, "suspend.start for %d seconds" % (timeout))
180 try:
J. Richard Barnette9af19632015-09-25 12:18:03 -0700181 self.run_background(suspend_cmd)
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700182 except error.AutoservRunError:
183 self.record("ABORT", None, "suspend.start",
184 "suspend command failed")
185 raise error.AutoservSuspendError("suspend command failed")
186
187 # Wait for some time, to ensure the machine is going to sleep.
188 # Not too long to check if the machine really suspended.
189 time_slice = min(timeout / 2, 300)
190 time.sleep(time_slice)
191 time_counter = time_slice
192 while time_counter < timeout + 60:
193 # Check if the machine is back. We check regularely to
194 # ensure the machine was suspended long enough.
195 if utils.ping(self.hostname, tries=1, deadline=1) == 0:
196 return
197 else:
198 if time_counter > timeout - 10:
199 time_slice = 5
200 time.sleep(time_slice)
201 time_counter += time_slice
202
203 if utils.ping(self.hostname, tries=1, deadline=1) != 0:
204 raise error.AutoservSuspendError(
205 "DUT is not responding after %d seconds" % (time_counter))
206
207 start_time = time.time()
208 self.log_op(self.OP_SUSPEND, suspend)
209 lasted = time.time() - start_time
210 if (lasted < timeout):
211 raise error.AutoservSuspendError(
212 "Suspend did not last long enough: %d instead of %d" % (
213 lasted, timeout))
mblighf2c33762008-10-18 14:42:34 +0000214
jadmanski4f909252008-12-01 20:47:10 +0000215 def reboot_followup(self, *args, **dargs):
216 super(RemoteHost, self).reboot_followup(*args, **dargs)
217 if self.job:
218 self.job.profilers.handle_reboot(self)
219
220
jadmanskid778ae42009-01-07 15:07:36 +0000221 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs):
jadmanskid60321a2008-10-28 20:32:05 +0000222 """
223 Wait for the host to come back from a reboot. This wraps the
224 generic wait_for_restart implementation in a reboot group.
225 """
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700226 def op_func():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700227 # pylint: disable=missing-docstring
jadmanskid778ae42009-01-07 15:07:36 +0000228 super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs)
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700229 self.log_op(self.OP_REBOOT, op_func)
mblighf2c33762008-10-18 14:42:34 +0000230
231
mbligh1264b512008-11-05 22:21:49 +0000232 def cleanup(self):
233 super(RemoteHost, self).cleanup()
234 self.reboot()
235
236
mblighe48bcfb2008-11-11 17:09:44 +0000237 def get_tmp_dir(self, parent='/tmp'):
mblighf2c33762008-10-18 14:42:34 +0000238 """
239 Return the pathname of a directory on the host suitable
240 for temporary file storage.
241
242 The directory and its content will be deleted automatically
243 on the destruction of the Host object that was used to obtain
244 it.
245 """
Po-Hsien Wang672fb5f2017-06-22 12:03:09 -0700246 self.run_very_slowly("mkdir -p %s" % parent)
mblighe48bcfb2008-11-11 17:09:44 +0000247 template = os.path.join(parent, 'autoserv-XXXXXX')
Po-Hsien Wang672fb5f2017-06-22 12:03:09 -0700248 dir_name = self.run_very_slowly("mktemp -d %s"
249 % template).stdout.rstrip()
mblighf2c33762008-10-18 14:42:34 +0000250 self.tmp_dirs.append(dir_name)
251 return dir_name
252
253
mbligh6b95b522010-02-19 19:17:41 +0000254 def get_platform_label(self):
255 """
256 Return the platform label, or None if platform label is not set.
257 """
258
259 if self.job:
260 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
261 self.hostname)
262 keyvals = utils.read_keyval(keyval_path)
263 return keyvals.get('platform', None)
264 else:
265 return None
266
267
Eric Li6f27d4f2010-09-29 10:55:17 -0700268 def get_all_labels(self):
269 """
270 Return all labels, or empty list if label is not set.
271 """
272 if self.job:
273 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
274 self.hostname)
275 keyvals = utils.read_keyval(keyval_path)
276 all_labels = keyvals.get('labels', '')
277 if all_labels:
Eric Li861b2d52011-02-04 14:50:35 -0800278 all_labels = all_labels.split(',')
279 return [urllib.unquote(label) for label in all_labels]
Eric Li6f27d4f2010-09-29 10:55:17 -0700280 return []
281
282
jadmanskiea455662009-03-25 22:25:39 +0000283 def delete_tmp_dir(self, tmpdir):
284 """
285 Delete the given temporary directory on the remote machine.
Richard Barnetteab9769f2016-06-01 15:01:44 -0700286
287 @param tmpdir The directory to delete.
jadmanskiea455662009-03-25 22:25:39 +0000288 """
289 self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True)
290 self.tmp_dirs.remove(tmpdir)
291
292
mblighf2c33762008-10-18 14:42:34 +0000293 def check_uptime(self):
294 """
295 Check that uptime is available and monotonically increasing.
296 """
mbligha43f6d22009-08-24 22:09:44 +0000297 if not self.is_up():
298 raise error.AutoservHostError('Client does not appear to be up')
mblighf2c33762008-10-18 14:42:34 +0000299 result = self.run("/bin/cat /proc/uptime", 30)
300 return result.stdout.strip().split()[0]
301
302
Puthikorn Voravootivatdf29d642014-04-25 11:45:36 -0700303 def check_for_lkdtm(self):
304 """
305 Check for kernel dump test module. return True if exist.
306 """
307 cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT'
308 return self.run(cmd, ignore_status=True).exit_status == 0
309
310
jadmanskica7da372008-10-21 16:26:52 +0000311 def are_wait_up_processes_up(self):
mblighf2c33762008-10-18 14:42:34 +0000312 """
313 Checks if any HOSTS waitup processes are running yet on the
314 remote host.
315
316 Returns True if any the waitup processes are running, False
317 otherwise.
318 """
319 processes = self.get_wait_up_processes()
320 if len(processes) == 0:
321 return True # wait up processes aren't being used
322 for procname in processes:
323 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
324 ignore_status=True).exit_status
325 if exit_status == 0:
326 return True
327 return False
Kevin Cheng3a4a57a2015-09-30 12:09:50 -0700328
329
330 def get_labels(self):
331 """Return a list of labels for this given host.
332
333 This is the main way to retrieve all the automatic labels for a host
334 as it will run through all the currently implemented label functions.
335 """
336 labels = []
337 for label_function in self._LABEL_FUNCTIONS:
338 try:
339 label = label_function(self)
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700340 except Exception:
341 logging.exception('Label function %s failed; ignoring it.',
342 label_function.__name__)
Kevin Cheng3a4a57a2015-09-30 12:09:50 -0700343 label = None
344 if label:
345 if type(label) is str:
346 labels.append(label)
347 elif type(label) is list:
348 labels.extend(label)
349 return labels