blob: 4af6923598d727162c9097102cbf387975b71c71 [file] [log] [blame]
mbligh321b1f52008-04-09 16:23:43 +00001"""This class defines the Remote host class, mixing in the SiteHost class
2if it is available."""
3
mblighf2c33762008-10-18 14:42:34 +00004import os, time
5from autotest_lib.client.common_lib import error
6from autotest_lib.server import utils
7from autotest_lib.server.hosts import base_classes, bootloader
mbligh321b1f52008-04-09 16:23:43 +00008
9
jadmanski1c5e3a12008-08-15 23:08:20 +000010class RemoteHost(base_classes.Host):
jadmanskid60321a2008-10-28 20:32:05 +000011 """
12 This class represents a remote machine on which you can run
jadmanski0afbb632008-06-06 21:10:57 +000013 programs.
mbligh321b1f52008-04-09 16:23:43 +000014
jadmanski0afbb632008-06-06 21:10:57 +000015 It may be accessed through a network, a serial line, ...
16 It is not the machine autoserv is running on.
mbligh321b1f52008-04-09 16:23:43 +000017
jadmanski0afbb632008-06-06 21:10:57 +000018 Implementation details:
19 This is an abstract class, leaf subclasses must implement the methods
20 listed here and in parent classes which have no implementation. They
21 may reimplement methods which already have an implementation. You
22 must not instantiate this class but should instantiate one of those
jadmanskid60321a2008-10-28 20:32:05 +000023 leaf subclasses.
24 """
mbligh321b1f52008-04-09 16:23:43 +000025
mblighf2c33762008-10-18 14:42:34 +000026 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
27 LAST_BOOT_TAG = object()
28
jadmanskif6562912008-10-21 17:59:01 +000029 def _initialize(self, hostname, autodir=None, *args, **dargs):
30 super(RemoteHost, self)._initialize(*args, **dargs)
mbligh321b1f52008-04-09 16:23:43 +000031
jadmanski1c5e3a12008-08-15 23:08:20 +000032 self.hostname = hostname
mblighf2c33762008-10-18 14:42:34 +000033 self.autodir = autodir
34 self.tmp_dirs = []
jadmanskia2db9412008-08-22 21:47:24 +000035
36
jadmanski53aaf382008-11-17 16:22:31 +000037 def close(self):
38 super(RemoteHost, self).close()
mblighf2c33762008-10-18 14:42:34 +000039 self.stop_loggers()
40
41 if hasattr(self, 'tmp_dirs'):
42 for dir in self.tmp_dirs:
43 try:
44 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
45 except error.AutoservRunError:
46 pass
47
48
jadmanskid60321a2008-10-28 20:32:05 +000049 def job_start(self):
50 """
51 Abstract method, called the first time a remote host object
52 is created for a specific host after a job starts.
53
54 This method depends on the create_host factory being used to
55 construct your host object. If you directly construct host objects
56 you will need to call this method yourself (and enforce the
57 single-call rule).
58 """
59 pass
60
61
mblighf2c33762008-10-18 14:42:34 +000062 def get_autodir(self):
63 return self.autodir
64
65
66 def set_autodir(self, autodir):
jadmanskid60321a2008-10-28 20:32:05 +000067 """
mblighf2c33762008-10-18 14:42:34 +000068 This method is called to make the host object aware of the
69 where autotest is installed. Called in server/autotest.py
70 after a successful install
jadmanskid60321a2008-10-28 20:32:05 +000071 """
mblighf2c33762008-10-18 14:42:34 +000072 self.autodir = autodir
73
74
75 def sysrq_reboot(self):
76 self.run('echo b > /proc/sysrq-trigger &')
77
78
79 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, label=LAST_BOOT_TAG,
80 kernel_args=None, wait=True, **dargs):
81 """
82 Reboot the remote host.
83
84 Args:
85 timeout - How long to wait for the reboot.
86 label - The label we should boot into. If None, we will
87 boot into the default kernel. If it's LAST_BOOT_TAG,
88 we'll boot into whichever kernel was .boot'ed last
89 (or the default kernel if we haven't .boot'ed in this
90 job). If it's None, we'll boot into the default kernel.
91 If it's something else, we'll boot into that.
92 wait - Should we wait to see if the machine comes back up.
93 """
94 if self.job:
95 if label == self.LAST_BOOT_TAG:
96 label = self.job.last_boot_tag
97 else:
98 self.job.last_boot_tag = label
99
100 self.reboot_setup(label=label, kernel_args=kernel_args, **dargs)
101
102 if label or kernel_args:
103 self.bootloader.install_boottool()
104 if not label:
105 default = int(self.bootloader.get_default())
106 label = self.bootloader.get_titles()[default]
107 self.bootloader.boot_once(label)
108 if kernel_args:
109 self.bootloader.add_args(label, kernel_args)
110
111 # define a function for the reboot and run it in a group
112 print "Reboot: initiating reboot"
113 def reboot():
114 self.record("GOOD", None, "reboot.start")
115 try:
116 self.run('(sleep 5; reboot) '
117 '</dev/null >/dev/null 2>&1 &')
118 except error.AutoservRunError:
119 self.record("ABORT", None, "reboot.start",
120 "reboot command failed")
121 raise
122 if wait:
123 self.wait_for_restart(timeout)
124 self.reboot_followup(**dargs)
125
126 # if this is a full reboot-and-wait, run the reboot inside a group
127 if wait:
128 self.log_reboot(reboot)
129 else:
130 reboot()
131
132
jadmanski4f909252008-12-01 20:47:10 +0000133 def reboot_followup(self, *args, **dargs):
134 super(RemoteHost, self).reboot_followup(*args, **dargs)
135 if self.job:
136 self.job.profilers.handle_reboot(self)
137
138
mblighf2c33762008-10-18 14:42:34 +0000139 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT):
jadmanskid60321a2008-10-28 20:32:05 +0000140 """
141 Wait for the host to come back from a reboot. This wraps the
142 generic wait_for_restart implementation in a reboot group.
143 """
mblighf2c33762008-10-18 14:42:34 +0000144 def reboot_func():
145 super(RemoteHost, self).wait_for_restart(timeout=timeout)
146 self.log_reboot(reboot_func)
147
148
mbligh1264b512008-11-05 22:21:49 +0000149 def cleanup(self):
150 super(RemoteHost, self).cleanup()
151 self.reboot()
152
153
mblighe48bcfb2008-11-11 17:09:44 +0000154 def get_tmp_dir(self, parent='/tmp'):
mblighf2c33762008-10-18 14:42:34 +0000155 """
156 Return the pathname of a directory on the host suitable
157 for temporary file storage.
158
159 The directory and its content will be deleted automatically
160 on the destruction of the Host object that was used to obtain
161 it.
162 """
jadmanski9f7dd112008-11-17 16:40:05 +0000163 self.run("mkdir -p %s" % parent)
mblighe48bcfb2008-11-11 17:09:44 +0000164 template = os.path.join(parent, 'autoserv-XXXXXX')
jadmanski9f7dd112008-11-17 16:40:05 +0000165 dir_name = self.run("mktemp -d %s" % template).stdout.rstrip()
mblighf2c33762008-10-18 14:42:34 +0000166 self.tmp_dirs.append(dir_name)
167 return dir_name
168
169
170 def ping(self):
171 """
172 Ping the remote system, and return whether it's available
173 """
174 fpingcmd = "%s -q %s" % ('/usr/bin/fping', self.hostname)
175 rc = utils.system(fpingcmd, ignore_status = 1)
176 return (rc == 0)
177
178
179 def check_uptime(self):
180 """
181 Check that uptime is available and monotonically increasing.
182 """
183 if not self.ping():
184 raise error.AutoservHostError('Client is not pingable')
185 result = self.run("/bin/cat /proc/uptime", 30)
186 return result.stdout.strip().split()[0]
187
188
189 def get_crashinfo(self, test_start_time):
190 print "Collecting crash information..."
191 super(RemoteHost, self).get_crashinfo(test_start_time)
192
193 # wait for four hours, to see if the machine comes back up
194 current_time = time.strftime("%b %d %H:%M:%S", time.localtime())
195 print "Waiting four hours for %s to come up (%s)" % (self.hostname,
196 current_time)
197 if not self.wait_up(timeout=4*60*60):
198 print "%s down, unable to collect crash info" % self.hostname
199 return
200 else:
201 print "%s is back up, collecting crash info" % self.hostname
202
203 # find a directory to put the crashinfo into
204 if self.job:
205 infodir = self.job.resultdir
206 else:
207 infodir = os.path.abspath(os.getcwd())
208 infodir = os.path.join(infodir, "crashinfo.%s" % self.hostname)
209 if not os.path.exists(infodir):
210 os.mkdir(infodir)
211
212 # collect various log files
213 log_files = ["/var/log/messages", "/var/log/monitor-ssh-reboots"]
214 for log in log_files:
215 print "Collecting %s..." % log
216 try:
217 self.get_file(log, infodir)
218 except Exception, e:
219 print "crashinfo collection of %s failed with:\n%s" % (log, e)
220
221 # collect dmesg
222 print "Collecting dmesg..."
223 try:
224 result = self.run("dmesg").stdout
225 file(os.path.join(infodir, "dmesg"), "w").write(result)
226 except Exception, e:
227 print "crashinfo collection of dmesg failed with:\n%s" % e
228
229
jadmanskica7da372008-10-21 16:26:52 +0000230 def are_wait_up_processes_up(self):
mblighf2c33762008-10-18 14:42:34 +0000231 """
232 Checks if any HOSTS waitup processes are running yet on the
233 remote host.
234
235 Returns True if any the waitup processes are running, False
236 otherwise.
237 """
238 processes = self.get_wait_up_processes()
239 if len(processes) == 0:
240 return True # wait up processes aren't being used
241 for procname in processes:
242 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
243 ignore_status=True).exit_status
244 if exit_status == 0:
245 return True
246 return False