blob: 81a4d0f83f4188d29186c376b8e2849e805f2e9e [file] [log] [blame]
Fang Deng0ca40e22013-08-27 17:47:44 -07001"""This class defines the Remote host class."""
mbligh321b1f52008-04-09 16:23:43 +00002
Andrey Ulanovad472902016-01-11 17:31:18 -08003import os, logging, urllib, time
mblighf2c33762008-10-18 14:42:34 +00004from autotest_lib.client.common_lib import error
jadmanski96b78072009-05-21 22:21:04 +00005from autotest_lib.server import utils
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -07006from autotest_lib.server.hosts import base_classes
mbligh321b1f52008-04-09 16:23:43 +00007
8
jadmanski1c5e3a12008-08-15 23:08:20 +00009class RemoteHost(base_classes.Host):
jadmanskid60321a2008-10-28 20:32:05 +000010 """
11 This class represents a remote machine on which you can run
jadmanski0afbb632008-06-06 21:10:57 +000012 programs.
mbligh321b1f52008-04-09 16:23:43 +000013
jadmanski0afbb632008-06-06 21:10:57 +000014 It may be accessed through a network, a serial line, ...
15 It is not the machine autoserv is running on.
mbligh321b1f52008-04-09 16:23:43 +000016
jadmanski0afbb632008-06-06 21:10:57 +000017 Implementation details:
18 This is an abstract class, leaf subclasses must implement the methods
19 listed here and in parent classes which have no implementation. They
20 may reimplement methods which already have an implementation. You
21 must not instantiate this class but should instantiate one of those
jadmanskid60321a2008-10-28 20:32:05 +000022 leaf subclasses.
23 """
mbligh321b1f52008-04-09 16:23:43 +000024
mblighf2c33762008-10-18 14:42:34 +000025 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
Eric Li6f27d4f2010-09-29 10:55:17 -070026 DEFAULT_HALT_TIMEOUT = 2 * 60
Kevin Cheng3a4a57a2015-09-30 12:09:50 -070027 _LABEL_FUNCTIONS = []
28 _DETECTABLE_LABELS = []
mblighf2c33762008-10-18 14:42:34 +000029
Eric Li861b2d52011-02-04 14:50:35 -080030 VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start"
jadmanski4900b3b2009-07-02 22:12:08 +000031
Kevin Cheng3a4a57a2015-09-30 12:09:50 -070032
jadmanskif6562912008-10-21 17:59:01 +000033 def _initialize(self, hostname, autodir=None, *args, **dargs):
34 super(RemoteHost, self)._initialize(*args, **dargs)
mbligh321b1f52008-04-09 16:23:43 +000035
jadmanski1c5e3a12008-08-15 23:08:20 +000036 self.hostname = hostname
mblighf2c33762008-10-18 14:42:34 +000037 self.autodir = autodir
38 self.tmp_dirs = []
jadmanskia2db9412008-08-22 21:47:24 +000039
40
jadmanskiedf33e02009-05-22 16:47:27 +000041 def __repr__(self):
42 return "<remote host: %s>" % self.hostname
43
44
jadmanski53aaf382008-11-17 16:22:31 +000045 def close(self):
Richard Barnette73b35172018-07-27 10:59:01 -070046 # pylint: disable=missing-docstring
jadmanski53aaf382008-11-17 16:22:31 +000047 super(RemoteHost, self).close()
mblighf2c33762008-10-18 14:42:34 +000048 self.stop_loggers()
49
50 if hasattr(self, 'tmp_dirs'):
51 for dir in self.tmp_dirs:
52 try:
Allen Liad719c12017-06-27 23:48:04 +000053 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
mblighf2c33762008-10-18 14:42:34 +000054 except error.AutoservRunError:
55 pass
56
57
jadmanskid60321a2008-10-28 20:32:05 +000058 def job_start(self):
59 """
60 Abstract method, called the first time a remote host object
61 is created for a specific host after a job starts.
62
63 This method depends on the create_host factory being used to
64 construct your host object. If you directly construct host objects
65 you will need to call this method yourself (and enforce the
66 single-call rule).
67 """
jadmanski4900b3b2009-07-02 22:12:08 +000068 try:
Andrey Ulanovad472902016-01-11 17:31:18 -080069 cmd = ('test ! -e /var/log/messages || cp -f /var/log/messages '
70 '%s') % self.VAR_LOG_MESSAGES_COPY_PATH
Allen Liad719c12017-06-27 23:48:04 +000071 self.run(cmd)
jadmanski4900b3b2009-07-02 22:12:08 +000072 except Exception, e:
73 # Non-fatal error
74 logging.info('Failed to copy /var/log/messages at startup: %s', e)
jadmanskid60321a2008-10-28 20:32:05 +000075
76
mblighf2c33762008-10-18 14:42:34 +000077 def get_autodir(self):
78 return self.autodir
79
80
81 def set_autodir(self, autodir):
jadmanskid60321a2008-10-28 20:32:05 +000082 """
mblighf2c33762008-10-18 14:42:34 +000083 This method is called to make the host object aware of the
84 where autotest is installed. Called in server/autotest.py
85 after a successful install
jadmanskid60321a2008-10-28 20:32:05 +000086 """
mblighf2c33762008-10-18 14:42:34 +000087 self.autodir = autodir
88
89
90 def sysrq_reboot(self):
Richard Barnette73b35172018-07-27 10:59:01 -070091 # pylint: disable=missing-docstring
J. Richard Barnette9af19632015-09-25 12:18:03 -070092 self.run_background('echo b > /proc/sysrq-trigger')
mblighf2c33762008-10-18 14:42:34 +000093
94
Eric Li6f27d4f2010-09-29 10:55:17 -070095 def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True):
Richard Barnetteab9769f2016-06-01 15:01:44 -070096 """
97 Shut down the remote host.
98
99 N.B. This method makes no provision to bring the target back
100 up. The target will be offline indefinitely if there's no
101 independent hardware (servo, RPM, etc.) to force the target to
102 power on.
103
104 @param timeout Maximum time to wait for host down, in seconds.
105 @param wait Whether to wait for the host to go offline.
106 """
J. Richard Barnette9af19632015-09-25 12:18:03 -0700107 self.run_background('sleep 1 ; halt')
Eric Li6f27d4f2010-09-29 10:55:17 -0700108 if wait:
109 self.wait_down(timeout=timeout)
110
111
Richard Barnetteab9769f2016-06-01 15:01:44 -0700112 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True,
113 fastsync=False, reboot_cmd=None, **dargs):
mblighf2c33762008-10-18 14:42:34 +0000114 """
115 Reboot the remote host.
116
117 Args:
118 timeout - How long to wait for the reboot.
mblighf2c33762008-10-18 14:42:34 +0000119 wait - Should we wait to see if the machine comes back up.
Hidehiko Abe3d512d32017-04-27 15:11:33 +0900120 If this is set to True, ignores reboot_cmd's error
121 even if occurs.
mbligh2b949772009-02-26 00:59:36 +0000122 fastsync - Don't wait for the sync to complete, just start one
123 and move on. This is for cases where rebooting prompty
124 is more important than data integrity and/or the
125 machine may have disks that cause sync to never return.
mbligh959ed872009-04-17 22:18:25 +0000126 reboot_cmd - Reboot command to execute.
mblighf2c33762008-10-18 14:42:34 +0000127 """
Richard Barnetteab9769f2016-06-01 15:01:44 -0700128 self.reboot_setup(**dargs)
J. Richard Barnette9af19632015-09-25 12:18:03 -0700129 if not reboot_cmd:
130 reboot_cmd = ('sync & sleep 5; '
131 'reboot & sleep 60; '
132 'reboot -f & sleep 10; '
133 'reboot -nf & sleep 10; '
134 'telinit 6')
135
mblighf2c33762008-10-18 14:42:34 +0000136 def reboot():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700137 # pylint: disable=missing-docstring
mblighf2c33762008-10-18 14:42:34 +0000138 self.record("GOOD", None, "reboot.start")
Richard Barnette73b35172018-07-27 10:59:01 -0700139 current_boot_id = None
mblighf2c33762008-10-18 14:42:34 +0000140 try:
jadmanskic0354912010-01-12 15:57:29 +0000141 current_boot_id = self.get_boot_id()
142
jadmanskid544a352009-01-14 23:36:28 +0000143 # sync before starting the reboot, so that a long sync during
144 # shutdown isn't timed out by wait_down's short timeout
mbligh2b949772009-02-26 00:59:36 +0000145 if not fastsync:
mbligh959ed872009-04-17 22:18:25 +0000146 self.run('sync; sync', timeout=timeout, ignore_status=True)
jadmanskid544a352009-01-14 23:36:28 +0000147
J. Richard Barnette9af19632015-09-25 12:18:03 -0700148 self.run_background(reboot_cmd)
mblighf2c33762008-10-18 14:42:34 +0000149 except error.AutoservRunError:
Hidehiko Abe3d512d32017-04-27 15:11:33 +0900150 # If wait is set, ignore the error here, and rely on the
151 # wait_for_restart() for stability, instead.
152 # reboot_cmd sometimes causes an error even if reboot is
153 # successfully in progress. This is difficult to be avoided,
154 # because we have no much control on remote machine after
155 # "reboot" starts.
Richard Barnette73b35172018-07-27 10:59:01 -0700156 if not wait or current_boot_id is None:
Hidehiko Abe3d512d32017-04-27 15:11:33 +0900157 # TODO(b/37652392): Revisit no-wait case, later.
158 self.record("ABORT", None, "reboot.start",
159 "reboot command failed")
160 raise
mblighf2c33762008-10-18 14:42:34 +0000161 if wait:
jadmanskic0354912010-01-12 15:57:29 +0000162 self.wait_for_restart(timeout, old_boot_id=current_boot_id,
163 **dargs)
mblighf2c33762008-10-18 14:42:34 +0000164
165 # if this is a full reboot-and-wait, run the reboot inside a group
166 if wait:
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700167 self.log_op(self.OP_REBOOT, reboot)
mblighf2c33762008-10-18 14:42:34 +0000168 else:
169 reboot()
170
Ravi Chandra Sadineni812e61b2018-07-09 11:16:50 -0700171 def suspend(self, timeout, suspend_cmd,
172 allow_early_resume=False):
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700173 """
174 Suspend the remote host.
175
176 Args:
Ravi Chandra Sadineni812e61b2018-07-09 11:16:50 -0700177 timeout - How long to wait for the suspend in integer seconds.
178 suspend_cmd - suspend command to execute.
179 allow_early_resume - Boolean that indicate whether resume
180 before |timeout| is ok.
181 Raises:
182 error.AutoservSuspendError - If |allow_early_resume| is False
183 and if device resumes before
184 |timeout|.
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700185 """
186 # define a function for the supend and run it in a group
187 def suspend():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700188 # pylint: disable=missing-docstring
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700189 self.record("GOOD", None, "suspend.start for %d seconds" % (timeout))
190 try:
J. Richard Barnette9af19632015-09-25 12:18:03 -0700191 self.run_background(suspend_cmd)
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700192 except error.AutoservRunError:
193 self.record("ABORT", None, "suspend.start",
194 "suspend command failed")
195 raise error.AutoservSuspendError("suspend command failed")
196
197 # Wait for some time, to ensure the machine is going to sleep.
198 # Not too long to check if the machine really suspended.
199 time_slice = min(timeout / 2, 300)
200 time.sleep(time_slice)
201 time_counter = time_slice
202 while time_counter < timeout + 60:
203 # Check if the machine is back. We check regularely to
204 # ensure the machine was suspended long enough.
205 if utils.ping(self.hostname, tries=1, deadline=1) == 0:
206 return
207 else:
208 if time_counter > timeout - 10:
209 time_slice = 5
210 time.sleep(time_slice)
211 time_counter += time_slice
212
213 if utils.ping(self.hostname, tries=1, deadline=1) != 0:
214 raise error.AutoservSuspendError(
215 "DUT is not responding after %d seconds" % (time_counter))
216
217 start_time = time.time()
218 self.log_op(self.OP_SUSPEND, suspend)
219 lasted = time.time() - start_time
Ravi Chandra Sadineni812e61b2018-07-09 11:16:50 -0700220 logging.info("Device resumed after %d secs", lasted)
221 if (lasted < timeout and not allow_early_resume):
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700222 raise error.AutoservSuspendError(
223 "Suspend did not last long enough: %d instead of %d" % (
224 lasted, timeout))
mblighf2c33762008-10-18 14:42:34 +0000225
jadmanski4f909252008-12-01 20:47:10 +0000226 def reboot_followup(self, *args, **dargs):
Richard Barnette73b35172018-07-27 10:59:01 -0700227 # pylint: disable=missing-docstring
jadmanski4f909252008-12-01 20:47:10 +0000228 super(RemoteHost, self).reboot_followup(*args, **dargs)
229 if self.job:
230 self.job.profilers.handle_reboot(self)
231
232
jadmanskid778ae42009-01-07 15:07:36 +0000233 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs):
jadmanskid60321a2008-10-28 20:32:05 +0000234 """
235 Wait for the host to come back from a reboot. This wraps the
236 generic wait_for_restart implementation in a reboot group.
237 """
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700238 def op_func():
Richard Barnetteab9769f2016-06-01 15:01:44 -0700239 # pylint: disable=missing-docstring
jadmanskid778ae42009-01-07 15:07:36 +0000240 super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs)
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -0700241 self.log_op(self.OP_REBOOT, op_func)
mblighf2c33762008-10-18 14:42:34 +0000242
243
mbligh1264b512008-11-05 22:21:49 +0000244 def cleanup(self):
Richard Barnette73b35172018-07-27 10:59:01 -0700245 # pylint: disable=missing-docstring
mbligh1264b512008-11-05 22:21:49 +0000246 super(RemoteHost, self).cleanup()
247 self.reboot()
248
249
mblighe48bcfb2008-11-11 17:09:44 +0000250 def get_tmp_dir(self, parent='/tmp'):
mblighf2c33762008-10-18 14:42:34 +0000251 """
252 Return the pathname of a directory on the host suitable
253 for temporary file storage.
254
255 The directory and its content will be deleted automatically
256 on the destruction of the Host object that was used to obtain
257 it.
258 """
Allen Liad719c12017-06-27 23:48:04 +0000259 self.run("mkdir -p %s" % parent)
mblighe48bcfb2008-11-11 17:09:44 +0000260 template = os.path.join(parent, 'autoserv-XXXXXX')
Allen Liad719c12017-06-27 23:48:04 +0000261 dir_name = self.run("mktemp -d %s" % template).stdout.rstrip()
mblighf2c33762008-10-18 14:42:34 +0000262 self.tmp_dirs.append(dir_name)
263 return dir_name
264
265
mbligh6b95b522010-02-19 19:17:41 +0000266 def get_platform_label(self):
267 """
268 Return the platform label, or None if platform label is not set.
269 """
270
271 if self.job:
272 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
273 self.hostname)
274 keyvals = utils.read_keyval(keyval_path)
275 return keyvals.get('platform', None)
276 else:
277 return None
278
279
Eric Li6f27d4f2010-09-29 10:55:17 -0700280 def get_all_labels(self):
281 """
282 Return all labels, or empty list if label is not set.
283 """
284 if self.job:
285 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
286 self.hostname)
287 keyvals = utils.read_keyval(keyval_path)
288 all_labels = keyvals.get('labels', '')
289 if all_labels:
Eric Li861b2d52011-02-04 14:50:35 -0800290 all_labels = all_labels.split(',')
291 return [urllib.unquote(label) for label in all_labels]
Eric Li6f27d4f2010-09-29 10:55:17 -0700292 return []
293
294
jadmanskiea455662009-03-25 22:25:39 +0000295 def delete_tmp_dir(self, tmpdir):
296 """
297 Delete the given temporary directory on the remote machine.
Richard Barnetteab9769f2016-06-01 15:01:44 -0700298
299 @param tmpdir The directory to delete.
jadmanskiea455662009-03-25 22:25:39 +0000300 """
301 self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True)
302 self.tmp_dirs.remove(tmpdir)
303
304
mblighf2c33762008-10-18 14:42:34 +0000305 def check_uptime(self):
306 """
307 Check that uptime is available and monotonically increasing.
308 """
mbligha43f6d22009-08-24 22:09:44 +0000309 if not self.is_up():
310 raise error.AutoservHostError('Client does not appear to be up')
mblighf2c33762008-10-18 14:42:34 +0000311 result = self.run("/bin/cat /proc/uptime", 30)
312 return result.stdout.strip().split()[0]
313
314
Puthikorn Voravootivatdf29d642014-04-25 11:45:36 -0700315 def check_for_lkdtm(self):
316 """
317 Check for kernel dump test module. return True if exist.
318 """
319 cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT'
320 return self.run(cmd, ignore_status=True).exit_status == 0
321
322
jadmanskica7da372008-10-21 16:26:52 +0000323 def are_wait_up_processes_up(self):
mblighf2c33762008-10-18 14:42:34 +0000324 """
325 Checks if any HOSTS waitup processes are running yet on the
326 remote host.
327
328 Returns True if any the waitup processes are running, False
329 otherwise.
330 """
331 processes = self.get_wait_up_processes()
332 if len(processes) == 0:
333 return True # wait up processes aren't being used
334 for procname in processes:
335 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
336 ignore_status=True).exit_status
337 if exit_status == 0:
338 return True
339 return False
Kevin Cheng3a4a57a2015-09-30 12:09:50 -0700340
341
342 def get_labels(self):
343 """Return a list of labels for this given host.
344
345 This is the main way to retrieve all the automatic labels for a host
346 as it will run through all the currently implemented label functions.
347 """
348 labels = []
349 for label_function in self._LABEL_FUNCTIONS:
350 try:
351 label = label_function(self)
Aviv Keshet5ae0a002017-05-05 10:23:33 -0700352 except Exception:
353 logging.exception('Label function %s failed; ignoring it.',
354 label_function.__name__)
Kevin Cheng3a4a57a2015-09-30 12:09:50 -0700355 label = None
356 if label:
357 if type(label) is str:
358 labels.append(label)
359 elif type(label) is list:
360 labels.extend(label)
361 return labels