blob: 67d3d93b478344aa8226535d80528b38e77d5e7c [file] [log] [blame]
Fang Deng0ca40e22013-08-27 17:47:44 -07001"""This class defines the Remote host class."""
mbligh321b1f52008-04-09 16:23:43 +00002
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -07003import os, logging, urllib, time
mblighf2c33762008-10-18 14:42:34 +00004from autotest_lib.client.common_lib import error
jadmanski96b78072009-05-21 22:21:04 +00005from autotest_lib.server import utils
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -07006from autotest_lib.server.hosts import base_classes
mbligh321b1f52008-04-09 16:23:43 +00007
8
jadmanski1c5e3a12008-08-15 23:08:20 +00009class RemoteHost(base_classes.Host):
jadmanskid60321a2008-10-28 20:32:05 +000010 """
11 This class represents a remote machine on which you can run
jadmanski0afbb632008-06-06 21:10:57 +000012 programs.
mbligh321b1f52008-04-09 16:23:43 +000013
jadmanski0afbb632008-06-06 21:10:57 +000014 It may be accessed through a network, a serial line, ...
15 It is not the machine autoserv is running on.
mbligh321b1f52008-04-09 16:23:43 +000016
jadmanski0afbb632008-06-06 21:10:57 +000017 Implementation details:
18 This is an abstract class, leaf subclasses must implement the methods
19 listed here and in parent classes which have no implementation. They
20 may reimplement methods which already have an implementation. You
21 must not instantiate this class but should instantiate one of those
jadmanskid60321a2008-10-28 20:32:05 +000022 leaf subclasses.
23 """
mbligh321b1f52008-04-09 16:23:43 +000024
mblighf2c33762008-10-18 14:42:34 +000025 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
26 LAST_BOOT_TAG = object()
Eric Li6f27d4f2010-09-29 10:55:17 -070027 DEFAULT_HALT_TIMEOUT = 2 * 60
mblighf2c33762008-10-18 14:42:34 +000028
Eric Li861b2d52011-02-04 14:50:35 -080029 VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start"
jadmanski4900b3b2009-07-02 22:12:08 +000030
jadmanskif6562912008-10-21 17:59:01 +000031 def _initialize(self, hostname, autodir=None, *args, **dargs):
32 super(RemoteHost, self)._initialize(*args, **dargs)
mbligh321b1f52008-04-09 16:23:43 +000033
jadmanski1c5e3a12008-08-15 23:08:20 +000034 self.hostname = hostname
mblighf2c33762008-10-18 14:42:34 +000035 self.autodir = autodir
36 self.tmp_dirs = []
jadmanskia2db9412008-08-22 21:47:24 +000037
38
jadmanskiedf33e02009-05-22 16:47:27 +000039 def __repr__(self):
40 return "<remote host: %s>" % self.hostname
41
42
jadmanski53aaf382008-11-17 16:22:31 +000043 def close(self):
44 super(RemoteHost, self).close()
mblighf2c33762008-10-18 14:42:34 +000045 self.stop_loggers()
46
47 if hasattr(self, 'tmp_dirs'):
48 for dir in self.tmp_dirs:
49 try:
50 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
51 except error.AutoservRunError:
52 pass
53
54
jadmanskid60321a2008-10-28 20:32:05 +000055 def job_start(self):
56 """
57 Abstract method, called the first time a remote host object
58 is created for a specific host after a job starts.
59
60 This method depends on the create_host factory being used to
61 construct your host object. If you directly construct host objects
62 you will need to call this method yourself (and enforce the
63 single-call rule).
64 """
jadmanski4900b3b2009-07-02 22:12:08 +000065 try:
66 self.run('rm -f %s' % self.VAR_LOG_MESSAGES_COPY_PATH)
67 self.run('cp /var/log/messages %s' %
68 self.VAR_LOG_MESSAGES_COPY_PATH)
69 except Exception, e:
70 # Non-fatal error
71 logging.info('Failed to copy /var/log/messages at startup: %s', e)
jadmanskid60321a2008-10-28 20:32:05 +000072
73
mblighf2c33762008-10-18 14:42:34 +000074 def get_autodir(self):
75 return self.autodir
76
77
78 def set_autodir(self, autodir):
jadmanskid60321a2008-10-28 20:32:05 +000079 """
mblighf2c33762008-10-18 14:42:34 +000080 This method is called to make the host object aware of the
81 where autotest is installed. Called in server/autotest.py
82 after a successful install
jadmanskid60321a2008-10-28 20:32:05 +000083 """
mblighf2c33762008-10-18 14:42:34 +000084 self.autodir = autodir
85
86
87 def sysrq_reboot(self):
88 self.run('echo b > /proc/sysrq-trigger &')
89
90
Eric Li6f27d4f2010-09-29 10:55:17 -070091 def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True):
92 self.run('/sbin/halt')
93 if wait:
94 self.wait_down(timeout=timeout)
95
96
mblighf2c33762008-10-18 14:42:34 +000097 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, label=LAST_BOOT_TAG,
mbligh959ed872009-04-17 22:18:25 +000098 kernel_args=None, wait=True, fastsync=False,
99 reboot_cmd=None, **dargs):
mblighf2c33762008-10-18 14:42:34 +0000100 """
101 Reboot the remote host.
102
103 Args:
104 timeout - How long to wait for the reboot.
105 label - The label we should boot into. If None, we will
106 boot into the default kernel. If it's LAST_BOOT_TAG,
107 we'll boot into whichever kernel was .boot'ed last
108 (or the default kernel if we haven't .boot'ed in this
109 job). If it's None, we'll boot into the default kernel.
110 If it's something else, we'll boot into that.
111 wait - Should we wait to see if the machine comes back up.
mbligh2b949772009-02-26 00:59:36 +0000112 fastsync - Don't wait for the sync to complete, just start one
113 and move on. This is for cases where rebooting prompty
114 is more important than data integrity and/or the
115 machine may have disks that cause sync to never return.
mbligh959ed872009-04-17 22:18:25 +0000116 reboot_cmd - Reboot command to execute.
mblighf2c33762008-10-18 14:42:34 +0000117 """
118 if self.job:
119 if label == self.LAST_BOOT_TAG:
120 label = self.job.last_boot_tag
121 else:
122 self.job.last_boot_tag = label
123
124 self.reboot_setup(label=label, kernel_args=kernel_args, **dargs)
125
126 if label or kernel_args:
mblighf2c33762008-10-18 14:42:34 +0000127 if not label:
mblighc2ebea02009-10-02 00:02:33 +0000128 label = self.bootloader.get_default_title()
mblighf2c33762008-10-18 14:42:34 +0000129 self.bootloader.boot_once(label)
130 if kernel_args:
131 self.bootloader.add_args(label, kernel_args)
132
mblighf2c33762008-10-18 14:42:34 +0000133 def reboot():
134 self.record("GOOD", None, "reboot.start")
135 try:
jadmanskic0354912010-01-12 15:57:29 +0000136 current_boot_id = self.get_boot_id()
137
jadmanskid544a352009-01-14 23:36:28 +0000138 # sync before starting the reboot, so that a long sync during
139 # shutdown isn't timed out by wait_down's short timeout
mbligh2b949772009-02-26 00:59:36 +0000140 if not fastsync:
mbligh959ed872009-04-17 22:18:25 +0000141 self.run('sync; sync', timeout=timeout, ignore_status=True)
jadmanskid544a352009-01-14 23:36:28 +0000142
mbligh959ed872009-04-17 22:18:25 +0000143 if reboot_cmd:
144 self.run(reboot_cmd)
145 else:
146 # Try several methods of rebooting in increasing harshness.
147 self.run('(('
148 ' sync &'
149 ' sleep 5; reboot &'
150 ' sleep 60; reboot -f &'
151 ' sleep 10; reboot -nf &'
152 ' sleep 10; telinit 6 &'
153 ') </dev/null >/dev/null 2>&1 &)')
mblighf2c33762008-10-18 14:42:34 +0000154 except error.AutoservRunError:
155 self.record("ABORT", None, "reboot.start",
156 "reboot command failed")
157 raise
158 if wait:
jadmanskic0354912010-01-12 15:57:29 +0000159 self.wait_for_restart(timeout, old_boot_id=current_boot_id,
160 **dargs)
mblighf2c33762008-10-18 14:42:34 +0000161
162 # if this is a full reboot-and-wait, run the reboot inside a group
163 if wait:
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700164 self.log_op(self.OP_REBOOT, reboot)
mblighf2c33762008-10-18 14:42:34 +0000165 else:
166 reboot()
167
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700168 def suspend(self, timeout, suspend_cmd, **dargs):
169 """
170 Suspend the remote host.
171
172 Args:
173 timeout - How long to wait for the suspend.
174 susped_cmd - suspend command to execute.
175 """
176 # define a function for the supend and run it in a group
177 def suspend():
178 self.record("GOOD", None, "suspend.start for %d seconds" % (timeout))
179 try:
180 self.run(suspend_cmd)
181 except error.AutoservRunError:
182 self.record("ABORT", None, "suspend.start",
183 "suspend command failed")
184 raise error.AutoservSuspendError("suspend command failed")
185
186 # Wait for some time, to ensure the machine is going to sleep.
187 # Not too long to check if the machine really suspended.
188 time_slice = min(timeout / 2, 300)
189 time.sleep(time_slice)
190 time_counter = time_slice
191 while time_counter < timeout + 60:
192 # Check if the machine is back. We check regularely to
193 # ensure the machine was suspended long enough.
194 if utils.ping(self.hostname, tries=1, deadline=1) == 0:
195 return
196 else:
197 if time_counter > timeout - 10:
198 time_slice = 5
199 time.sleep(time_slice)
200 time_counter += time_slice
201
202 if utils.ping(self.hostname, tries=1, deadline=1) != 0:
203 raise error.AutoservSuspendError(
204 "DUT is not responding after %d seconds" % (time_counter))
205
206 start_time = time.time()
207 self.log_op(self.OP_SUSPEND, suspend)
208 lasted = time.time() - start_time
209 if (lasted < timeout):
210 raise error.AutoservSuspendError(
211 "Suspend did not last long enough: %d instead of %d" % (
212 lasted, timeout))
mblighf2c33762008-10-18 14:42:34 +0000213
jadmanski4f909252008-12-01 20:47:10 +0000214 def reboot_followup(self, *args, **dargs):
215 super(RemoteHost, self).reboot_followup(*args, **dargs)
216 if self.job:
217 self.job.profilers.handle_reboot(self)
218
219
jadmanskid778ae42009-01-07 15:07:36 +0000220 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs):
jadmanskid60321a2008-10-28 20:32:05 +0000221 """
222 Wait for the host to come back from a reboot. This wraps the
223 generic wait_for_restart implementation in a reboot group.
224 """
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700225 def op_func():
jadmanskid778ae42009-01-07 15:07:36 +0000226 super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs)
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700227 self.log_op(self.OP_REBOOT, op_func)
mblighf2c33762008-10-18 14:42:34 +0000228
229
mbligh1264b512008-11-05 22:21:49 +0000230 def cleanup(self):
231 super(RemoteHost, self).cleanup()
232 self.reboot()
233
234
mblighe48bcfb2008-11-11 17:09:44 +0000235 def get_tmp_dir(self, parent='/tmp'):
mblighf2c33762008-10-18 14:42:34 +0000236 """
237 Return the pathname of a directory on the host suitable
238 for temporary file storage.
239
240 The directory and its content will be deleted automatically
241 on the destruction of the Host object that was used to obtain
242 it.
243 """
jadmanski9f7dd112008-11-17 16:40:05 +0000244 self.run("mkdir -p %s" % parent)
mblighe48bcfb2008-11-11 17:09:44 +0000245 template = os.path.join(parent, 'autoserv-XXXXXX')
jadmanski9f7dd112008-11-17 16:40:05 +0000246 dir_name = self.run("mktemp -d %s" % template).stdout.rstrip()
mblighf2c33762008-10-18 14:42:34 +0000247 self.tmp_dirs.append(dir_name)
248 return dir_name
249
250
mbligh6b95b522010-02-19 19:17:41 +0000251 def get_platform_label(self):
252 """
253 Return the platform label, or None if platform label is not set.
254 """
255
256 if self.job:
257 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
258 self.hostname)
259 keyvals = utils.read_keyval(keyval_path)
260 return keyvals.get('platform', None)
261 else:
262 return None
263
264
Eric Li6f27d4f2010-09-29 10:55:17 -0700265 def get_all_labels(self):
266 """
267 Return all labels, or empty list if label is not set.
268 """
269 if self.job:
270 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
271 self.hostname)
272 keyvals = utils.read_keyval(keyval_path)
273 all_labels = keyvals.get('labels', '')
274 if all_labels:
Eric Li861b2d52011-02-04 14:50:35 -0800275 all_labels = all_labels.split(',')
276 return [urllib.unquote(label) for label in all_labels]
Eric Li6f27d4f2010-09-29 10:55:17 -0700277 return []
278
279
jadmanskiea455662009-03-25 22:25:39 +0000280 def delete_tmp_dir(self, tmpdir):
281 """
282 Delete the given temporary directory on the remote machine.
283 """
284 self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True)
285 self.tmp_dirs.remove(tmpdir)
286
287
mblighf2c33762008-10-18 14:42:34 +0000288 def check_uptime(self):
289 """
290 Check that uptime is available and monotonically increasing.
291 """
mbligha43f6d22009-08-24 22:09:44 +0000292 if not self.is_up():
293 raise error.AutoservHostError('Client does not appear to be up')
mblighf2c33762008-10-18 14:42:34 +0000294 result = self.run("/bin/cat /proc/uptime", 30)
295 return result.stdout.strip().split()[0]
296
297
Puthikorn Voravootivatdf29d642014-04-25 11:45:36 -0700298 def check_for_lkdtm(self):
299 """
300 Check for kernel dump test module. return True if exist.
301 """
302 cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT'
303 return self.run(cmd, ignore_status=True).exit_status == 0
304
305
jadmanskica7da372008-10-21 16:26:52 +0000306 def are_wait_up_processes_up(self):
mblighf2c33762008-10-18 14:42:34 +0000307 """
308 Checks if any HOSTS waitup processes are running yet on the
309 remote host.
310
311 Returns True if any the waitup processes are running, False
312 otherwise.
313 """
314 processes = self.get_wait_up_processes()
315 if len(processes) == 0:
316 return True # wait up processes aren't being used
317 for procname in processes:
318 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
319 ignore_status=True).exit_status
320 if exit_status == 0:
321 return True
322 return False