blob: d8333ef65c34a2cdb0188a5bbf1f4b055e6eef35 [file] [log] [blame]
mbligh321b1f52008-04-09 16:23:43 +00001"""This class defines the Remote host class, mixing in the SiteHost class
2if it is available."""
3
mblighf2c33762008-10-18 14:42:34 +00004import os, time
5from autotest_lib.client.common_lib import error
6from autotest_lib.server import utils
7from autotest_lib.server.hosts import base_classes, bootloader
mbligh321b1f52008-04-09 16:23:43 +00008
9
jadmanski1c5e3a12008-08-15 23:08:20 +000010class RemoteHost(base_classes.Host):
jadmanskid60321a2008-10-28 20:32:05 +000011 """
12 This class represents a remote machine on which you can run
jadmanski0afbb632008-06-06 21:10:57 +000013 programs.
mbligh321b1f52008-04-09 16:23:43 +000014
jadmanski0afbb632008-06-06 21:10:57 +000015 It may be accessed through a network, a serial line, ...
16 It is not the machine autoserv is running on.
mbligh321b1f52008-04-09 16:23:43 +000017
jadmanski0afbb632008-06-06 21:10:57 +000018 Implementation details:
19 This is an abstract class, leaf subclasses must implement the methods
20 listed here and in parent classes which have no implementation. They
21 may reimplement methods which already have an implementation. You
22 must not instantiate this class but should instantiate one of those
jadmanskid60321a2008-10-28 20:32:05 +000023 leaf subclasses.
24 """
mbligh321b1f52008-04-09 16:23:43 +000025
mblighf2c33762008-10-18 14:42:34 +000026 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
27 LAST_BOOT_TAG = object()
28
jadmanskif6562912008-10-21 17:59:01 +000029 def _initialize(self, hostname, autodir=None, *args, **dargs):
30 super(RemoteHost, self)._initialize(*args, **dargs)
mbligh321b1f52008-04-09 16:23:43 +000031
jadmanski1c5e3a12008-08-15 23:08:20 +000032 self.hostname = hostname
mblighf2c33762008-10-18 14:42:34 +000033 self.autodir = autodir
34 self.tmp_dirs = []
jadmanskia2db9412008-08-22 21:47:24 +000035
36
jadmanski53aaf382008-11-17 16:22:31 +000037 def close(self):
38 super(RemoteHost, self).close()
mblighf2c33762008-10-18 14:42:34 +000039 self.stop_loggers()
40
41 if hasattr(self, 'tmp_dirs'):
42 for dir in self.tmp_dirs:
43 try:
44 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
45 except error.AutoservRunError:
46 pass
47
48
jadmanskid60321a2008-10-28 20:32:05 +000049 def job_start(self):
50 """
51 Abstract method, called the first time a remote host object
52 is created for a specific host after a job starts.
53
54 This method depends on the create_host factory being used to
55 construct your host object. If you directly construct host objects
56 you will need to call this method yourself (and enforce the
57 single-call rule).
58 """
59 pass
60
61
mblighf2c33762008-10-18 14:42:34 +000062 def get_autodir(self):
63 return self.autodir
64
65
66 def set_autodir(self, autodir):
jadmanskid60321a2008-10-28 20:32:05 +000067 """
mblighf2c33762008-10-18 14:42:34 +000068 This method is called to make the host object aware of the
69 where autotest is installed. Called in server/autotest.py
70 after a successful install
jadmanskid60321a2008-10-28 20:32:05 +000071 """
mblighf2c33762008-10-18 14:42:34 +000072 self.autodir = autodir
73
74
75 def sysrq_reboot(self):
76 self.run('echo b > /proc/sysrq-trigger &')
77
78
79 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, label=LAST_BOOT_TAG,
80 kernel_args=None, wait=True, **dargs):
81 """
82 Reboot the remote host.
83
84 Args:
85 timeout - How long to wait for the reboot.
86 label - The label we should boot into. If None, we will
87 boot into the default kernel. If it's LAST_BOOT_TAG,
88 we'll boot into whichever kernel was .boot'ed last
89 (or the default kernel if we haven't .boot'ed in this
90 job). If it's None, we'll boot into the default kernel.
91 If it's something else, we'll boot into that.
92 wait - Should we wait to see if the machine comes back up.
93 """
94 if self.job:
95 if label == self.LAST_BOOT_TAG:
96 label = self.job.last_boot_tag
97 else:
98 self.job.last_boot_tag = label
99
100 self.reboot_setup(label=label, kernel_args=kernel_args, **dargs)
101
102 if label or kernel_args:
103 self.bootloader.install_boottool()
104 if not label:
105 default = int(self.bootloader.get_default())
106 label = self.bootloader.get_titles()[default]
107 self.bootloader.boot_once(label)
108 if kernel_args:
109 self.bootloader.add_args(label, kernel_args)
110
111 # define a function for the reboot and run it in a group
112 print "Reboot: initiating reboot"
113 def reboot():
114 self.record("GOOD", None, "reboot.start")
115 try:
jadmanski0e1881e2009-01-14 23:33:12 +0000116 # Try several methods of rebooting in increasing harshness.
117 self.run('('
118 ' sleep 5; reboot &'
119 ' sleep 60; reboot -f &'
120 ' sleep 10; reboot -nf &'
121 ' sleep 10; telinit 6 &'
122 ') </dev/null >/dev/null 2>&1 &')
mblighf2c33762008-10-18 14:42:34 +0000123 except error.AutoservRunError:
124 self.record("ABORT", None, "reboot.start",
125 "reboot command failed")
126 raise
127 if wait:
jadmanskid778ae42009-01-07 15:07:36 +0000128 self.wait_for_restart(timeout, **dargs)
mblighf2c33762008-10-18 14:42:34 +0000129
130 # if this is a full reboot-and-wait, run the reboot inside a group
131 if wait:
132 self.log_reboot(reboot)
133 else:
134 reboot()
135
136
jadmanski4f909252008-12-01 20:47:10 +0000137 def reboot_followup(self, *args, **dargs):
138 super(RemoteHost, self).reboot_followup(*args, **dargs)
139 if self.job:
140 self.job.profilers.handle_reboot(self)
141
142
jadmanskid778ae42009-01-07 15:07:36 +0000143 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs):
jadmanskid60321a2008-10-28 20:32:05 +0000144 """
145 Wait for the host to come back from a reboot. This wraps the
146 generic wait_for_restart implementation in a reboot group.
147 """
mblighf2c33762008-10-18 14:42:34 +0000148 def reboot_func():
jadmanskid778ae42009-01-07 15:07:36 +0000149 super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs)
mblighf2c33762008-10-18 14:42:34 +0000150 self.log_reboot(reboot_func)
151
152
mbligh1264b512008-11-05 22:21:49 +0000153 def cleanup(self):
154 super(RemoteHost, self).cleanup()
155 self.reboot()
156
157
mblighe48bcfb2008-11-11 17:09:44 +0000158 def get_tmp_dir(self, parent='/tmp'):
mblighf2c33762008-10-18 14:42:34 +0000159 """
160 Return the pathname of a directory on the host suitable
161 for temporary file storage.
162
163 The directory and its content will be deleted automatically
164 on the destruction of the Host object that was used to obtain
165 it.
166 """
jadmanski9f7dd112008-11-17 16:40:05 +0000167 self.run("mkdir -p %s" % parent)
mblighe48bcfb2008-11-11 17:09:44 +0000168 template = os.path.join(parent, 'autoserv-XXXXXX')
jadmanski9f7dd112008-11-17 16:40:05 +0000169 dir_name = self.run("mktemp -d %s" % template).stdout.rstrip()
mblighf2c33762008-10-18 14:42:34 +0000170 self.tmp_dirs.append(dir_name)
171 return dir_name
172
173
174 def ping(self):
175 """
176 Ping the remote system, and return whether it's available
177 """
178 fpingcmd = "%s -q %s" % ('/usr/bin/fping', self.hostname)
179 rc = utils.system(fpingcmd, ignore_status = 1)
180 return (rc == 0)
181
182
183 def check_uptime(self):
184 """
185 Check that uptime is available and monotonically increasing.
186 """
187 if not self.ping():
188 raise error.AutoservHostError('Client is not pingable')
189 result = self.run("/bin/cat /proc/uptime", 30)
190 return result.stdout.strip().split()[0]
191
192
193 def get_crashinfo(self, test_start_time):
194 print "Collecting crash information..."
195 super(RemoteHost, self).get_crashinfo(test_start_time)
196
197 # wait for four hours, to see if the machine comes back up
198 current_time = time.strftime("%b %d %H:%M:%S", time.localtime())
199 print "Waiting four hours for %s to come up (%s)" % (self.hostname,
200 current_time)
201 if not self.wait_up(timeout=4*60*60):
202 print "%s down, unable to collect crash info" % self.hostname
203 return
204 else:
205 print "%s is back up, collecting crash info" % self.hostname
206
207 # find a directory to put the crashinfo into
208 if self.job:
209 infodir = self.job.resultdir
210 else:
211 infodir = os.path.abspath(os.getcwd())
212 infodir = os.path.join(infodir, "crashinfo.%s" % self.hostname)
213 if not os.path.exists(infodir):
214 os.mkdir(infodir)
215
216 # collect various log files
217 log_files = ["/var/log/messages", "/var/log/monitor-ssh-reboots"]
218 for log in log_files:
219 print "Collecting %s..." % log
220 try:
221 self.get_file(log, infodir)
222 except Exception, e:
223 print "crashinfo collection of %s failed with:\n%s" % (log, e)
224
225 # collect dmesg
mbligh78a013a2009-01-13 19:34:28 +0000226 print "Collecting dmesg (saved to crashinfo/dmesg)..."
227 devnull = open("/dev/null", "w")
mblighf2c33762008-10-18 14:42:34 +0000228 try:
mbligh78a013a2009-01-13 19:34:28 +0000229 try:
230 result = self.run("dmesg", stdout_tee=devnull).stdout
231 file(os.path.join(infodir, "dmesg"), "w").write(result)
232 except Exception, e:
233 print "crashinfo collection of dmesg failed with:\n%s" % e
234 finally:
235 devnull.close()
mblighf2c33762008-10-18 14:42:34 +0000236
237
jadmanskica7da372008-10-21 16:26:52 +0000238 def are_wait_up_processes_up(self):
mblighf2c33762008-10-18 14:42:34 +0000239 """
240 Checks if any HOSTS waitup processes are running yet on the
241 remote host.
242
243 Returns True if any the waitup processes are running, False
244 otherwise.
245 """
246 processes = self.get_wait_up_processes()
247 if len(processes) == 0:
248 return True # wait up processes aren't being used
249 for procname in processes:
250 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
251 ignore_status=True).exit_status
252 if exit_status == 0:
253 return True
254 return False