blob: 0992fac49a856d7ff87668e7e8c0fd577f2ea50d [file] [log] [blame]
Fang Deng0ca40e22013-08-27 17:47:44 -07001"""This class defines the Remote host class."""
mbligh321b1f52008-04-09 16:23:43 +00002
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -07003import os, logging, urllib, time
mblighf2c33762008-10-18 14:42:34 +00004from autotest_lib.client.common_lib import error
jadmanski96b78072009-05-21 22:21:04 +00005from autotest_lib.server import utils
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -07006from autotest_lib.server.hosts import base_classes
mbligh321b1f52008-04-09 16:23:43 +00007
8
jadmanski1c5e3a12008-08-15 23:08:20 +00009class RemoteHost(base_classes.Host):
jadmanskid60321a2008-10-28 20:32:05 +000010 """
11 This class represents a remote machine on which you can run
jadmanski0afbb632008-06-06 21:10:57 +000012 programs.
mbligh321b1f52008-04-09 16:23:43 +000013
jadmanski0afbb632008-06-06 21:10:57 +000014 It may be accessed through a network, a serial line, ...
15 It is not the machine autoserv is running on.
mbligh321b1f52008-04-09 16:23:43 +000016
jadmanski0afbb632008-06-06 21:10:57 +000017 Implementation details:
18 This is an abstract class, leaf subclasses must implement the methods
19 listed here and in parent classes which have no implementation. They
20 may reimplement methods which already have an implementation. You
21 must not instantiate this class but should instantiate one of those
jadmanskid60321a2008-10-28 20:32:05 +000022 leaf subclasses.
23 """
mbligh321b1f52008-04-09 16:23:43 +000024
mblighf2c33762008-10-18 14:42:34 +000025 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
26 LAST_BOOT_TAG = object()
Eric Li6f27d4f2010-09-29 10:55:17 -070027 DEFAULT_HALT_TIMEOUT = 2 * 60
mblighf2c33762008-10-18 14:42:34 +000028
Eric Li861b2d52011-02-04 14:50:35 -080029 VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start"
jadmanski4900b3b2009-07-02 22:12:08 +000030
jadmanskif6562912008-10-21 17:59:01 +000031 def _initialize(self, hostname, autodir=None, *args, **dargs):
32 super(RemoteHost, self)._initialize(*args, **dargs)
mbligh321b1f52008-04-09 16:23:43 +000033
jadmanski1c5e3a12008-08-15 23:08:20 +000034 self.hostname = hostname
mblighf2c33762008-10-18 14:42:34 +000035 self.autodir = autodir
36 self.tmp_dirs = []
jadmanskia2db9412008-08-22 21:47:24 +000037
38
jadmanskiedf33e02009-05-22 16:47:27 +000039 def __repr__(self):
40 return "<remote host: %s>" % self.hostname
41
42
jadmanski53aaf382008-11-17 16:22:31 +000043 def close(self):
44 super(RemoteHost, self).close()
mblighf2c33762008-10-18 14:42:34 +000045 self.stop_loggers()
46
47 if hasattr(self, 'tmp_dirs'):
48 for dir in self.tmp_dirs:
49 try:
50 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
51 except error.AutoservRunError:
52 pass
53
54
jadmanskid60321a2008-10-28 20:32:05 +000055 def job_start(self):
56 """
57 Abstract method, called the first time a remote host object
58 is created for a specific host after a job starts.
59
60 This method depends on the create_host factory being used to
61 construct your host object. If you directly construct host objects
62 you will need to call this method yourself (and enforce the
63 single-call rule).
64 """
jadmanski4900b3b2009-07-02 22:12:08 +000065 try:
66 self.run('rm -f %s' % self.VAR_LOG_MESSAGES_COPY_PATH)
67 self.run('cp /var/log/messages %s' %
68 self.VAR_LOG_MESSAGES_COPY_PATH)
69 except Exception, e:
70 # Non-fatal error
71 logging.info('Failed to copy /var/log/messages at startup: %s', e)
jadmanskid60321a2008-10-28 20:32:05 +000072
73
mblighf2c33762008-10-18 14:42:34 +000074 def get_autodir(self):
75 return self.autodir
76
77
78 def set_autodir(self, autodir):
jadmanskid60321a2008-10-28 20:32:05 +000079 """
mblighf2c33762008-10-18 14:42:34 +000080 This method is called to make the host object aware of the
81 where autotest is installed. Called in server/autotest.py
82 after a successful install
jadmanskid60321a2008-10-28 20:32:05 +000083 """
mblighf2c33762008-10-18 14:42:34 +000084 self.autodir = autodir
85
86
87 def sysrq_reboot(self):
J. Richard Barnette9af19632015-09-25 12:18:03 -070088 self.run_background('echo b > /proc/sysrq-trigger')
mblighf2c33762008-10-18 14:42:34 +000089
90
Eric Li6f27d4f2010-09-29 10:55:17 -070091 def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True):
J. Richard Barnette9af19632015-09-25 12:18:03 -070092 self.run_background('sleep 1 ; halt')
Eric Li6f27d4f2010-09-29 10:55:17 -070093 if wait:
94 self.wait_down(timeout=timeout)
95
96
mblighf2c33762008-10-18 14:42:34 +000097 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, label=LAST_BOOT_TAG,
mbligh959ed872009-04-17 22:18:25 +000098 kernel_args=None, wait=True, fastsync=False,
99 reboot_cmd=None, **dargs):
mblighf2c33762008-10-18 14:42:34 +0000100 """
101 Reboot the remote host.
102
103 Args:
104 timeout - How long to wait for the reboot.
105 label - The label we should boot into. If None, we will
106 boot into the default kernel. If it's LAST_BOOT_TAG,
107 we'll boot into whichever kernel was .boot'ed last
108 (or the default kernel if we haven't .boot'ed in this
109 job). If it's None, we'll boot into the default kernel.
110 If it's something else, we'll boot into that.
111 wait - Should we wait to see if the machine comes back up.
mbligh2b949772009-02-26 00:59:36 +0000112 fastsync - Don't wait for the sync to complete, just start one
113 and move on. This is for cases where rebooting prompty
114 is more important than data integrity and/or the
115 machine may have disks that cause sync to never return.
mbligh959ed872009-04-17 22:18:25 +0000116 reboot_cmd - Reboot command to execute.
mblighf2c33762008-10-18 14:42:34 +0000117 """
118 if self.job:
119 if label == self.LAST_BOOT_TAG:
120 label = self.job.last_boot_tag
121 else:
122 self.job.last_boot_tag = label
123
124 self.reboot_setup(label=label, kernel_args=kernel_args, **dargs)
125
126 if label or kernel_args:
mblighf2c33762008-10-18 14:42:34 +0000127 if not label:
mblighc2ebea02009-10-02 00:02:33 +0000128 label = self.bootloader.get_default_title()
mblighf2c33762008-10-18 14:42:34 +0000129 self.bootloader.boot_once(label)
130 if kernel_args:
131 self.bootloader.add_args(label, kernel_args)
132
J. Richard Barnette9af19632015-09-25 12:18:03 -0700133 if not reboot_cmd:
134 reboot_cmd = ('sync & sleep 5; '
135 'reboot & sleep 60; '
136 'reboot -f & sleep 10; '
137 'reboot -nf & sleep 10; '
138 'telinit 6')
139
mblighf2c33762008-10-18 14:42:34 +0000140 def reboot():
141 self.record("GOOD", None, "reboot.start")
142 try:
jadmanskic0354912010-01-12 15:57:29 +0000143 current_boot_id = self.get_boot_id()
144
jadmanskid544a352009-01-14 23:36:28 +0000145 # sync before starting the reboot, so that a long sync during
146 # shutdown isn't timed out by wait_down's short timeout
mbligh2b949772009-02-26 00:59:36 +0000147 if not fastsync:
mbligh959ed872009-04-17 22:18:25 +0000148 self.run('sync; sync', timeout=timeout, ignore_status=True)
jadmanskid544a352009-01-14 23:36:28 +0000149
J. Richard Barnette9af19632015-09-25 12:18:03 -0700150 self.run_background(reboot_cmd)
mblighf2c33762008-10-18 14:42:34 +0000151 except error.AutoservRunError:
152 self.record("ABORT", None, "reboot.start",
153 "reboot command failed")
154 raise
155 if wait:
jadmanskic0354912010-01-12 15:57:29 +0000156 self.wait_for_restart(timeout, old_boot_id=current_boot_id,
157 **dargs)
mblighf2c33762008-10-18 14:42:34 +0000158
159 # if this is a full reboot-and-wait, run the reboot inside a group
160 if wait:
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700161 self.log_op(self.OP_REBOOT, reboot)
mblighf2c33762008-10-18 14:42:34 +0000162 else:
163 reboot()
164
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700165 def suspend(self, timeout, suspend_cmd, **dargs):
166 """
167 Suspend the remote host.
168
169 Args:
170 timeout - How long to wait for the suspend.
171 susped_cmd - suspend command to execute.
172 """
173 # define a function for the supend and run it in a group
174 def suspend():
175 self.record("GOOD", None, "suspend.start for %d seconds" % (timeout))
176 try:
J. Richard Barnette9af19632015-09-25 12:18:03 -0700177 self.run_background(suspend_cmd)
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700178 except error.AutoservRunError:
179 self.record("ABORT", None, "suspend.start",
180 "suspend command failed")
181 raise error.AutoservSuspendError("suspend command failed")
182
183 # Wait for some time, to ensure the machine is going to sleep.
184 # Not too long to check if the machine really suspended.
185 time_slice = min(timeout / 2, 300)
186 time.sleep(time_slice)
187 time_counter = time_slice
188 while time_counter < timeout + 60:
189 # Check if the machine is back. We check regularely to
190 # ensure the machine was suspended long enough.
191 if utils.ping(self.hostname, tries=1, deadline=1) == 0:
192 return
193 else:
194 if time_counter > timeout - 10:
195 time_slice = 5
196 time.sleep(time_slice)
197 time_counter += time_slice
198
199 if utils.ping(self.hostname, tries=1, deadline=1) != 0:
200 raise error.AutoservSuspendError(
201 "DUT is not responding after %d seconds" % (time_counter))
202
203 start_time = time.time()
204 self.log_op(self.OP_SUSPEND, suspend)
205 lasted = time.time() - start_time
206 if (lasted < timeout):
207 raise error.AutoservSuspendError(
208 "Suspend did not last long enough: %d instead of %d" % (
209 lasted, timeout))
mblighf2c33762008-10-18 14:42:34 +0000210
jadmanski4f909252008-12-01 20:47:10 +0000211 def reboot_followup(self, *args, **dargs):
212 super(RemoteHost, self).reboot_followup(*args, **dargs)
213 if self.job:
214 self.job.profilers.handle_reboot(self)
215
216
jadmanskid778ae42009-01-07 15:07:36 +0000217 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs):
jadmanskid60321a2008-10-28 20:32:05 +0000218 """
219 Wait for the host to come back from a reboot. This wraps the
220 generic wait_for_restart implementation in a reboot group.
221 """
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700222 def op_func():
jadmanskid778ae42009-01-07 15:07:36 +0000223 super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs)
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700224 self.log_op(self.OP_REBOOT, op_func)
mblighf2c33762008-10-18 14:42:34 +0000225
226
mbligh1264b512008-11-05 22:21:49 +0000227 def cleanup(self):
228 super(RemoteHost, self).cleanup()
229 self.reboot()
230
231
mblighe48bcfb2008-11-11 17:09:44 +0000232 def get_tmp_dir(self, parent='/tmp'):
mblighf2c33762008-10-18 14:42:34 +0000233 """
234 Return the pathname of a directory on the host suitable
235 for temporary file storage.
236
237 The directory and its content will be deleted automatically
238 on the destruction of the Host object that was used to obtain
239 it.
240 """
jadmanski9f7dd112008-11-17 16:40:05 +0000241 self.run("mkdir -p %s" % parent)
mblighe48bcfb2008-11-11 17:09:44 +0000242 template = os.path.join(parent, 'autoserv-XXXXXX')
jadmanski9f7dd112008-11-17 16:40:05 +0000243 dir_name = self.run("mktemp -d %s" % template).stdout.rstrip()
mblighf2c33762008-10-18 14:42:34 +0000244 self.tmp_dirs.append(dir_name)
245 return dir_name
246
247
mbligh6b95b522010-02-19 19:17:41 +0000248 def get_platform_label(self):
249 """
250 Return the platform label, or None if platform label is not set.
251 """
252
253 if self.job:
254 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
255 self.hostname)
256 keyvals = utils.read_keyval(keyval_path)
257 return keyvals.get('platform', None)
258 else:
259 return None
260
261
Eric Li6f27d4f2010-09-29 10:55:17 -0700262 def get_all_labels(self):
263 """
264 Return all labels, or empty list if label is not set.
265 """
266 if self.job:
267 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
268 self.hostname)
269 keyvals = utils.read_keyval(keyval_path)
270 all_labels = keyvals.get('labels', '')
271 if all_labels:
Eric Li861b2d52011-02-04 14:50:35 -0800272 all_labels = all_labels.split(',')
273 return [urllib.unquote(label) for label in all_labels]
Eric Li6f27d4f2010-09-29 10:55:17 -0700274 return []
275
276
jadmanskiea455662009-03-25 22:25:39 +0000277 def delete_tmp_dir(self, tmpdir):
278 """
279 Delete the given temporary directory on the remote machine.
280 """
281 self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True)
282 self.tmp_dirs.remove(tmpdir)
283
284
mblighf2c33762008-10-18 14:42:34 +0000285 def check_uptime(self):
286 """
287 Check that uptime is available and monotonically increasing.
288 """
mbligha43f6d22009-08-24 22:09:44 +0000289 if not self.is_up():
290 raise error.AutoservHostError('Client does not appear to be up')
mblighf2c33762008-10-18 14:42:34 +0000291 result = self.run("/bin/cat /proc/uptime", 30)
292 return result.stdout.strip().split()[0]
293
294
Puthikorn Voravootivatdf29d642014-04-25 11:45:36 -0700295 def check_for_lkdtm(self):
296 """
297 Check for kernel dump test module. return True if exist.
298 """
299 cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT'
300 return self.run(cmd, ignore_status=True).exit_status == 0
301
302
jadmanskica7da372008-10-21 16:26:52 +0000303 def are_wait_up_processes_up(self):
mblighf2c33762008-10-18 14:42:34 +0000304 """
305 Checks if any HOSTS waitup processes are running yet on the
306 remote host.
307
308 Returns True if any the waitup processes are running, False
309 otherwise.
310 """
311 processes = self.get_wait_up_processes()
312 if len(processes) == 0:
313 return True # wait up processes aren't being used
314 for procname in processes:
315 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
316 ignore_status=True).exit_status
317 if exit_status == 0:
318 return True
319 return False