blob: 5412727bbbf183629b2b4b469f985cea8fd29cfc [file] [log] [blame]
Fang Deng0ca40e22013-08-27 17:47:44 -07001"""This class defines the Remote host class."""
mbligh321b1f52008-04-09 16:23:43 +00002
Andrey Ulanovad472902016-01-11 17:31:18 -08003import os, logging, urllib, time
mblighf2c33762008-10-18 14:42:34 +00004from autotest_lib.client.common_lib import error
jadmanski96b78072009-05-21 22:21:04 +00005from autotest_lib.server import utils
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -07006from autotest_lib.server.hosts import base_classes
mbligh321b1f52008-04-09 16:23:43 +00007
8
jadmanski1c5e3a12008-08-15 23:08:20 +00009class RemoteHost(base_classes.Host):
jadmanskid60321a2008-10-28 20:32:05 +000010 """
11 This class represents a remote machine on which you can run
jadmanski0afbb632008-06-06 21:10:57 +000012 programs.
mbligh321b1f52008-04-09 16:23:43 +000013
jadmanski0afbb632008-06-06 21:10:57 +000014 It may be accessed through a network, a serial line, ...
15 It is not the machine autoserv is running on.
mbligh321b1f52008-04-09 16:23:43 +000016
jadmanski0afbb632008-06-06 21:10:57 +000017 Implementation details:
18 This is an abstract class, leaf subclasses must implement the methods
19 listed here and in parent classes which have no implementation. They
20 may reimplement methods which already have an implementation. You
21 must not instantiate this class but should instantiate one of those
jadmanskid60321a2008-10-28 20:32:05 +000022 leaf subclasses.
23 """
mbligh321b1f52008-04-09 16:23:43 +000024
mblighf2c33762008-10-18 14:42:34 +000025 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
26 LAST_BOOT_TAG = object()
Eric Li6f27d4f2010-09-29 10:55:17 -070027 DEFAULT_HALT_TIMEOUT = 2 * 60
Kevin Cheng3a4a57a2015-09-30 12:09:50 -070028 _LABEL_FUNCTIONS = []
29 _DETECTABLE_LABELS = []
mblighf2c33762008-10-18 14:42:34 +000030
Eric Li861b2d52011-02-04 14:50:35 -080031 VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start"
jadmanski4900b3b2009-07-02 22:12:08 +000032
Kevin Cheng3a4a57a2015-09-30 12:09:50 -070033
jadmanskif6562912008-10-21 17:59:01 +000034 def _initialize(self, hostname, autodir=None, *args, **dargs):
35 super(RemoteHost, self)._initialize(*args, **dargs)
mbligh321b1f52008-04-09 16:23:43 +000036
jadmanski1c5e3a12008-08-15 23:08:20 +000037 self.hostname = hostname
mblighf2c33762008-10-18 14:42:34 +000038 self.autodir = autodir
39 self.tmp_dirs = []
jadmanskia2db9412008-08-22 21:47:24 +000040
41
jadmanskiedf33e02009-05-22 16:47:27 +000042 def __repr__(self):
43 return "<remote host: %s>" % self.hostname
44
45
jadmanski53aaf382008-11-17 16:22:31 +000046 def close(self):
47 super(RemoteHost, self).close()
mblighf2c33762008-10-18 14:42:34 +000048 self.stop_loggers()
49
50 if hasattr(self, 'tmp_dirs'):
51 for dir in self.tmp_dirs:
52 try:
53 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
54 except error.AutoservRunError:
55 pass
56
57
jadmanskid60321a2008-10-28 20:32:05 +000058 def job_start(self):
59 """
60 Abstract method, called the first time a remote host object
61 is created for a specific host after a job starts.
62
63 This method depends on the create_host factory being used to
64 construct your host object. If you directly construct host objects
65 you will need to call this method yourself (and enforce the
66 single-call rule).
67 """
jadmanski4900b3b2009-07-02 22:12:08 +000068 try:
Andrey Ulanovad472902016-01-11 17:31:18 -080069 cmd = ('test ! -e /var/log/messages || cp -f /var/log/messages '
70 '%s') % self.VAR_LOG_MESSAGES_COPY_PATH
71 self.run(cmd)
jadmanski4900b3b2009-07-02 22:12:08 +000072 except Exception, e:
73 # Non-fatal error
74 logging.info('Failed to copy /var/log/messages at startup: %s', e)
jadmanskid60321a2008-10-28 20:32:05 +000075
76
mblighf2c33762008-10-18 14:42:34 +000077 def get_autodir(self):
78 return self.autodir
79
80
81 def set_autodir(self, autodir):
jadmanskid60321a2008-10-28 20:32:05 +000082 """
mblighf2c33762008-10-18 14:42:34 +000083 This method is called to make the host object aware of the
84 where autotest is installed. Called in server/autotest.py
85 after a successful install
jadmanskid60321a2008-10-28 20:32:05 +000086 """
mblighf2c33762008-10-18 14:42:34 +000087 self.autodir = autodir
88
89
90 def sysrq_reboot(self):
J. Richard Barnette9af19632015-09-25 12:18:03 -070091 self.run_background('echo b > /proc/sysrq-trigger')
mblighf2c33762008-10-18 14:42:34 +000092
93
Eric Li6f27d4f2010-09-29 10:55:17 -070094 def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True):
J. Richard Barnette9af19632015-09-25 12:18:03 -070095 self.run_background('sleep 1 ; halt')
Eric Li6f27d4f2010-09-29 10:55:17 -070096 if wait:
97 self.wait_down(timeout=timeout)
98
99
mblighf2c33762008-10-18 14:42:34 +0000100 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, label=LAST_BOOT_TAG,
mbligh959ed872009-04-17 22:18:25 +0000101 kernel_args=None, wait=True, fastsync=False,
102 reboot_cmd=None, **dargs):
mblighf2c33762008-10-18 14:42:34 +0000103 """
104 Reboot the remote host.
105
106 Args:
107 timeout - How long to wait for the reboot.
108 label - The label we should boot into. If None, we will
109 boot into the default kernel. If it's LAST_BOOT_TAG,
110 we'll boot into whichever kernel was .boot'ed last
111 (or the default kernel if we haven't .boot'ed in this
112 job). If it's None, we'll boot into the default kernel.
113 If it's something else, we'll boot into that.
114 wait - Should we wait to see if the machine comes back up.
mbligh2b949772009-02-26 00:59:36 +0000115 fastsync - Don't wait for the sync to complete, just start one
116 and move on. This is for cases where rebooting prompty
117 is more important than data integrity and/or the
118 machine may have disks that cause sync to never return.
mbligh959ed872009-04-17 22:18:25 +0000119 reboot_cmd - Reboot command to execute.
mblighf2c33762008-10-18 14:42:34 +0000120 """
121 if self.job:
122 if label == self.LAST_BOOT_TAG:
123 label = self.job.last_boot_tag
124 else:
125 self.job.last_boot_tag = label
126
127 self.reboot_setup(label=label, kernel_args=kernel_args, **dargs)
128
129 if label or kernel_args:
mblighf2c33762008-10-18 14:42:34 +0000130 if not label:
mblighc2ebea02009-10-02 00:02:33 +0000131 label = self.bootloader.get_default_title()
mblighf2c33762008-10-18 14:42:34 +0000132 self.bootloader.boot_once(label)
133 if kernel_args:
134 self.bootloader.add_args(label, kernel_args)
135
J. Richard Barnette9af19632015-09-25 12:18:03 -0700136 if not reboot_cmd:
137 reboot_cmd = ('sync & sleep 5; '
138 'reboot & sleep 60; '
139 'reboot -f & sleep 10; '
140 'reboot -nf & sleep 10; '
141 'telinit 6')
142
mblighf2c33762008-10-18 14:42:34 +0000143 def reboot():
144 self.record("GOOD", None, "reboot.start")
145 try:
jadmanskic0354912010-01-12 15:57:29 +0000146 current_boot_id = self.get_boot_id()
147
jadmanskid544a352009-01-14 23:36:28 +0000148 # sync before starting the reboot, so that a long sync during
149 # shutdown isn't timed out by wait_down's short timeout
mbligh2b949772009-02-26 00:59:36 +0000150 if not fastsync:
mbligh959ed872009-04-17 22:18:25 +0000151 self.run('sync; sync', timeout=timeout, ignore_status=True)
jadmanskid544a352009-01-14 23:36:28 +0000152
J. Richard Barnette9af19632015-09-25 12:18:03 -0700153 self.run_background(reboot_cmd)
mblighf2c33762008-10-18 14:42:34 +0000154 except error.AutoservRunError:
155 self.record("ABORT", None, "reboot.start",
156 "reboot command failed")
157 raise
158 if wait:
jadmanskic0354912010-01-12 15:57:29 +0000159 self.wait_for_restart(timeout, old_boot_id=current_boot_id,
160 **dargs)
mblighf2c33762008-10-18 14:42:34 +0000161
162 # if this is a full reboot-and-wait, run the reboot inside a group
163 if wait:
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700164 self.log_op(self.OP_REBOOT, reboot)
mblighf2c33762008-10-18 14:42:34 +0000165 else:
166 reboot()
167
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700168 def suspend(self, timeout, suspend_cmd, **dargs):
169 """
170 Suspend the remote host.
171
172 Args:
173 timeout - How long to wait for the suspend.
174 susped_cmd - suspend command to execute.
175 """
176 # define a function for the supend and run it in a group
177 def suspend():
178 self.record("GOOD", None, "suspend.start for %d seconds" % (timeout))
179 try:
J. Richard Barnette9af19632015-09-25 12:18:03 -0700180 self.run_background(suspend_cmd)
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700181 except error.AutoservRunError:
182 self.record("ABORT", None, "suspend.start",
183 "suspend command failed")
184 raise error.AutoservSuspendError("suspend command failed")
185
186 # Wait for some time, to ensure the machine is going to sleep.
187 # Not too long to check if the machine really suspended.
188 time_slice = min(timeout / 2, 300)
189 time.sleep(time_slice)
190 time_counter = time_slice
191 while time_counter < timeout + 60:
192 # Check if the machine is back. We check regularely to
193 # ensure the machine was suspended long enough.
194 if utils.ping(self.hostname, tries=1, deadline=1) == 0:
195 return
196 else:
197 if time_counter > timeout - 10:
198 time_slice = 5
199 time.sleep(time_slice)
200 time_counter += time_slice
201
202 if utils.ping(self.hostname, tries=1, deadline=1) != 0:
203 raise error.AutoservSuspendError(
204 "DUT is not responding after %d seconds" % (time_counter))
205
206 start_time = time.time()
207 self.log_op(self.OP_SUSPEND, suspend)
208 lasted = time.time() - start_time
209 if (lasted < timeout):
210 raise error.AutoservSuspendError(
211 "Suspend did not last long enough: %d instead of %d" % (
212 lasted, timeout))
mblighf2c33762008-10-18 14:42:34 +0000213
jadmanski4f909252008-12-01 20:47:10 +0000214 def reboot_followup(self, *args, **dargs):
215 super(RemoteHost, self).reboot_followup(*args, **dargs)
216 if self.job:
217 self.job.profilers.handle_reboot(self)
218
219
jadmanskid778ae42009-01-07 15:07:36 +0000220 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs):
jadmanskid60321a2008-10-28 20:32:05 +0000221 """
222 Wait for the host to come back from a reboot. This wraps the
223 generic wait_for_restart implementation in a reboot group.
224 """
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700225 def op_func():
jadmanskid778ae42009-01-07 15:07:36 +0000226 super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs)
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700227 self.log_op(self.OP_REBOOT, op_func)
mblighf2c33762008-10-18 14:42:34 +0000228
229
mbligh1264b512008-11-05 22:21:49 +0000230 def cleanup(self):
231 super(RemoteHost, self).cleanup()
232 self.reboot()
233
234
mblighe48bcfb2008-11-11 17:09:44 +0000235 def get_tmp_dir(self, parent='/tmp'):
mblighf2c33762008-10-18 14:42:34 +0000236 """
237 Return the pathname of a directory on the host suitable
238 for temporary file storage.
239
240 The directory and its content will be deleted automatically
241 on the destruction of the Host object that was used to obtain
242 it.
243 """
jadmanski9f7dd112008-11-17 16:40:05 +0000244 self.run("mkdir -p %s" % parent)
mblighe48bcfb2008-11-11 17:09:44 +0000245 template = os.path.join(parent, 'autoserv-XXXXXX')
jadmanski9f7dd112008-11-17 16:40:05 +0000246 dir_name = self.run("mktemp -d %s" % template).stdout.rstrip()
mblighf2c33762008-10-18 14:42:34 +0000247 self.tmp_dirs.append(dir_name)
248 return dir_name
249
250
mbligh6b95b522010-02-19 19:17:41 +0000251 def get_platform_label(self):
252 """
253 Return the platform label, or None if platform label is not set.
254 """
255
256 if self.job:
257 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
258 self.hostname)
259 keyvals = utils.read_keyval(keyval_path)
260 return keyvals.get('platform', None)
261 else:
262 return None
263
264
Eric Li6f27d4f2010-09-29 10:55:17 -0700265 def get_all_labels(self):
266 """
267 Return all labels, or empty list if label is not set.
268 """
269 if self.job:
270 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
271 self.hostname)
272 keyvals = utils.read_keyval(keyval_path)
273 all_labels = keyvals.get('labels', '')
274 if all_labels:
Eric Li861b2d52011-02-04 14:50:35 -0800275 all_labels = all_labels.split(',')
276 return [urllib.unquote(label) for label in all_labels]
Eric Li6f27d4f2010-09-29 10:55:17 -0700277 return []
278
279
jadmanskiea455662009-03-25 22:25:39 +0000280 def delete_tmp_dir(self, tmpdir):
281 """
282 Delete the given temporary directory on the remote machine.
283 """
284 self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True)
285 self.tmp_dirs.remove(tmpdir)
286
287
mblighf2c33762008-10-18 14:42:34 +0000288 def check_uptime(self):
289 """
290 Check that uptime is available and monotonically increasing.
291 """
mbligha43f6d22009-08-24 22:09:44 +0000292 if not self.is_up():
293 raise error.AutoservHostError('Client does not appear to be up')
mblighf2c33762008-10-18 14:42:34 +0000294 result = self.run("/bin/cat /proc/uptime", 30)
295 return result.stdout.strip().split()[0]
296
297
Puthikorn Voravootivatdf29d642014-04-25 11:45:36 -0700298 def check_for_lkdtm(self):
299 """
300 Check for kernel dump test module. return True if exist.
301 """
302 cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT'
303 return self.run(cmd, ignore_status=True).exit_status == 0
304
305
jadmanskica7da372008-10-21 16:26:52 +0000306 def are_wait_up_processes_up(self):
mblighf2c33762008-10-18 14:42:34 +0000307 """
308 Checks if any HOSTS waitup processes are running yet on the
309 remote host.
310
311 Returns True if any the waitup processes are running, False
312 otherwise.
313 """
314 processes = self.get_wait_up_processes()
315 if len(processes) == 0:
316 return True # wait up processes aren't being used
317 for procname in processes:
318 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
319 ignore_status=True).exit_status
320 if exit_status == 0:
321 return True
322 return False
Kevin Cheng3a4a57a2015-09-30 12:09:50 -0700323
324
325 def get_labels(self):
326 """Return a list of labels for this given host.
327
328 This is the main way to retrieve all the automatic labels for a host
329 as it will run through all the currently implemented label functions.
330 """
331 labels = []
332 for label_function in self._LABEL_FUNCTIONS:
333 try:
334 label = label_function(self)
335 except Exception as e:
336 logging.error('Label function %s failed; ignoring it.',
337 label_function.__name__)
338 logging.exception(e)
339 label = None
340 if label:
341 if type(label) is str:
342 labels.append(label)
343 elif type(label) is list:
344 labels.extend(label)
345 return labels