blob: b98fad6c376c3de71f6a09726ef235ba35c5b878 [file] [log] [blame]
mbligh321b1f52008-04-09 16:23:43 +00001"""This class defines the Remote host class, mixing in the SiteHost class
2if it is available."""
3
mblighf2c33762008-10-18 14:42:34 +00004import os, time
5from autotest_lib.client.common_lib import error
6from autotest_lib.server import utils
7from autotest_lib.server.hosts import base_classes, bootloader
mbligh321b1f52008-04-09 16:23:43 +00008
9
jadmanski1c5e3a12008-08-15 23:08:20 +000010class RemoteHost(base_classes.Host):
jadmanskid60321a2008-10-28 20:32:05 +000011 """
12 This class represents a remote machine on which you can run
jadmanski0afbb632008-06-06 21:10:57 +000013 programs.
mbligh321b1f52008-04-09 16:23:43 +000014
jadmanski0afbb632008-06-06 21:10:57 +000015 It may be accessed through a network, a serial line, ...
16 It is not the machine autoserv is running on.
mbligh321b1f52008-04-09 16:23:43 +000017
jadmanski0afbb632008-06-06 21:10:57 +000018 Implementation details:
19 This is an abstract class, leaf subclasses must implement the methods
20 listed here and in parent classes which have no implementation. They
21 may reimplement methods which already have an implementation. You
22 must not instantiate this class but should instantiate one of those
jadmanskid60321a2008-10-28 20:32:05 +000023 leaf subclasses.
24 """
mbligh321b1f52008-04-09 16:23:43 +000025
mblighf2c33762008-10-18 14:42:34 +000026 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
27 LAST_BOOT_TAG = object()
28
jadmanskif6562912008-10-21 17:59:01 +000029 def _initialize(self, hostname, autodir=None, *args, **dargs):
30 super(RemoteHost, self)._initialize(*args, **dargs)
mbligh321b1f52008-04-09 16:23:43 +000031
jadmanski1c5e3a12008-08-15 23:08:20 +000032 self.hostname = hostname
mblighf2c33762008-10-18 14:42:34 +000033 self.autodir = autodir
34 self.tmp_dirs = []
jadmanskia2db9412008-08-22 21:47:24 +000035
36
jadmanski53aaf382008-11-17 16:22:31 +000037 def close(self):
38 super(RemoteHost, self).close()
mblighf2c33762008-10-18 14:42:34 +000039 self.stop_loggers()
40
41 if hasattr(self, 'tmp_dirs'):
42 for dir in self.tmp_dirs:
43 try:
44 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
45 except error.AutoservRunError:
46 pass
47
48
jadmanskid60321a2008-10-28 20:32:05 +000049 def job_start(self):
50 """
51 Abstract method, called the first time a remote host object
52 is created for a specific host after a job starts.
53
54 This method depends on the create_host factory being used to
55 construct your host object. If you directly construct host objects
56 you will need to call this method yourself (and enforce the
57 single-call rule).
58 """
59 pass
60
61
mblighf2c33762008-10-18 14:42:34 +000062 def get_autodir(self):
63 return self.autodir
64
65
66 def set_autodir(self, autodir):
jadmanskid60321a2008-10-28 20:32:05 +000067 """
mblighf2c33762008-10-18 14:42:34 +000068 This method is called to make the host object aware of the
69 where autotest is installed. Called in server/autotest.py
70 after a successful install
jadmanskid60321a2008-10-28 20:32:05 +000071 """
mblighf2c33762008-10-18 14:42:34 +000072 self.autodir = autodir
73
74
75 def sysrq_reboot(self):
76 self.run('echo b > /proc/sysrq-trigger &')
77
78
79 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, label=LAST_BOOT_TAG,
80 kernel_args=None, wait=True, **dargs):
81 """
82 Reboot the remote host.
83
84 Args:
85 timeout - How long to wait for the reboot.
86 label - The label we should boot into. If None, we will
87 boot into the default kernel. If it's LAST_BOOT_TAG,
88 we'll boot into whichever kernel was .boot'ed last
89 (or the default kernel if we haven't .boot'ed in this
90 job). If it's None, we'll boot into the default kernel.
91 If it's something else, we'll boot into that.
92 wait - Should we wait to see if the machine comes back up.
93 """
94 if self.job:
95 if label == self.LAST_BOOT_TAG:
96 label = self.job.last_boot_tag
97 else:
98 self.job.last_boot_tag = label
99
100 self.reboot_setup(label=label, kernel_args=kernel_args, **dargs)
101
102 if label or kernel_args:
103 self.bootloader.install_boottool()
104 if not label:
105 default = int(self.bootloader.get_default())
106 label = self.bootloader.get_titles()[default]
107 self.bootloader.boot_once(label)
108 if kernel_args:
109 self.bootloader.add_args(label, kernel_args)
110
111 # define a function for the reboot and run it in a group
112 print "Reboot: initiating reboot"
113 def reboot():
114 self.record("GOOD", None, "reboot.start")
115 try:
jadmanskid544a352009-01-14 23:36:28 +0000116 # sync before starting the reboot, so that a long sync during
117 # shutdown isn't timed out by wait_down's short timeout
118 self.run('sync; sync', timeout=timeout, ignore_status=True)
119
jadmanski0e1881e2009-01-14 23:33:12 +0000120 # Try several methods of rebooting in increasing harshness.
121 self.run('('
122 ' sleep 5; reboot &'
123 ' sleep 60; reboot -f &'
124 ' sleep 10; reboot -nf &'
125 ' sleep 10; telinit 6 &'
126 ') </dev/null >/dev/null 2>&1 &')
mblighf2c33762008-10-18 14:42:34 +0000127 except error.AutoservRunError:
128 self.record("ABORT", None, "reboot.start",
129 "reboot command failed")
130 raise
131 if wait:
jadmanskid778ae42009-01-07 15:07:36 +0000132 self.wait_for_restart(timeout, **dargs)
mblighf2c33762008-10-18 14:42:34 +0000133
134 # if this is a full reboot-and-wait, run the reboot inside a group
135 if wait:
136 self.log_reboot(reboot)
137 else:
138 reboot()
139
140
jadmanski4f909252008-12-01 20:47:10 +0000141 def reboot_followup(self, *args, **dargs):
142 super(RemoteHost, self).reboot_followup(*args, **dargs)
143 if self.job:
144 self.job.profilers.handle_reboot(self)
145
146
jadmanskid778ae42009-01-07 15:07:36 +0000147 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs):
jadmanskid60321a2008-10-28 20:32:05 +0000148 """
149 Wait for the host to come back from a reboot. This wraps the
150 generic wait_for_restart implementation in a reboot group.
151 """
mblighf2c33762008-10-18 14:42:34 +0000152 def reboot_func():
jadmanskid778ae42009-01-07 15:07:36 +0000153 super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs)
mblighf2c33762008-10-18 14:42:34 +0000154 self.log_reboot(reboot_func)
155
156
mbligh1264b512008-11-05 22:21:49 +0000157 def cleanup(self):
158 super(RemoteHost, self).cleanup()
159 self.reboot()
160
161
mblighe48bcfb2008-11-11 17:09:44 +0000162 def get_tmp_dir(self, parent='/tmp'):
mblighf2c33762008-10-18 14:42:34 +0000163 """
164 Return the pathname of a directory on the host suitable
165 for temporary file storage.
166
167 The directory and its content will be deleted automatically
168 on the destruction of the Host object that was used to obtain
169 it.
170 """
jadmanski9f7dd112008-11-17 16:40:05 +0000171 self.run("mkdir -p %s" % parent)
mblighe48bcfb2008-11-11 17:09:44 +0000172 template = os.path.join(parent, 'autoserv-XXXXXX')
jadmanski9f7dd112008-11-17 16:40:05 +0000173 dir_name = self.run("mktemp -d %s" % template).stdout.rstrip()
mblighf2c33762008-10-18 14:42:34 +0000174 self.tmp_dirs.append(dir_name)
175 return dir_name
176
177
178 def ping(self):
179 """
180 Ping the remote system, and return whether it's available
181 """
182 fpingcmd = "%s -q %s" % ('/usr/bin/fping', self.hostname)
183 rc = utils.system(fpingcmd, ignore_status = 1)
184 return (rc == 0)
185
186
187 def check_uptime(self):
188 """
189 Check that uptime is available and monotonically increasing.
190 """
191 if not self.ping():
192 raise error.AutoservHostError('Client is not pingable')
193 result = self.run("/bin/cat /proc/uptime", 30)
194 return result.stdout.strip().split()[0]
195
196
197 def get_crashinfo(self, test_start_time):
198 print "Collecting crash information..."
199 super(RemoteHost, self).get_crashinfo(test_start_time)
200
201 # wait for four hours, to see if the machine comes back up
202 current_time = time.strftime("%b %d %H:%M:%S", time.localtime())
203 print "Waiting four hours for %s to come up (%s)" % (self.hostname,
204 current_time)
205 if not self.wait_up(timeout=4*60*60):
206 print "%s down, unable to collect crash info" % self.hostname
207 return
208 else:
209 print "%s is back up, collecting crash info" % self.hostname
210
211 # find a directory to put the crashinfo into
212 if self.job:
213 infodir = self.job.resultdir
214 else:
215 infodir = os.path.abspath(os.getcwd())
216 infodir = os.path.join(infodir, "crashinfo.%s" % self.hostname)
217 if not os.path.exists(infodir):
218 os.mkdir(infodir)
219
220 # collect various log files
221 log_files = ["/var/log/messages", "/var/log/monitor-ssh-reboots"]
222 for log in log_files:
223 print "Collecting %s..." % log
224 try:
225 self.get_file(log, infodir)
mbligha2c940d2009-01-30 22:35:19 +0000226 except Exception:
227 print "Collection of %s failed. Non-fatal, continuing." % log
mblighf2c33762008-10-18 14:42:34 +0000228
229 # collect dmesg
mbligh78a013a2009-01-13 19:34:28 +0000230 print "Collecting dmesg (saved to crashinfo/dmesg)..."
231 devnull = open("/dev/null", "w")
mblighf2c33762008-10-18 14:42:34 +0000232 try:
mbligh78a013a2009-01-13 19:34:28 +0000233 try:
234 result = self.run("dmesg", stdout_tee=devnull).stdout
235 file(os.path.join(infodir, "dmesg"), "w").write(result)
236 except Exception, e:
237 print "crashinfo collection of dmesg failed with:\n%s" % e
238 finally:
239 devnull.close()
mblighf2c33762008-10-18 14:42:34 +0000240
241
jadmanskica7da372008-10-21 16:26:52 +0000242 def are_wait_up_processes_up(self):
mblighf2c33762008-10-18 14:42:34 +0000243 """
244 Checks if any HOSTS waitup processes are running yet on the
245 remote host.
246
247 Returns True if any the waitup processes are running, False
248 otherwise.
249 """
250 processes = self.get_wait_up_processes()
251 if len(processes) == 0:
252 return True # wait up processes aren't being used
253 for procname in processes:
254 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
255 ignore_status=True).exit_status
256 if exit_status == 0:
257 return True
258 return False