blob: 7659a3f27e81acc8e73f5b3bb413f675fbf5f422 [file] [log] [blame]
mbligh321b1f52008-04-09 16:23:43 +00001"""This class defines the Remote host class, mixing in the SiteHost class
2if it is available."""
3
mblighf2c33762008-10-18 14:42:34 +00004import os, time
5from autotest_lib.client.common_lib import error
6from autotest_lib.server import utils
7from autotest_lib.server.hosts import base_classes, bootloader
mbligh321b1f52008-04-09 16:23:43 +00008
9
jadmanski1c5e3a12008-08-15 23:08:20 +000010class RemoteHost(base_classes.Host):
jadmanskid60321a2008-10-28 20:32:05 +000011 """
12 This class represents a remote machine on which you can run
jadmanski0afbb632008-06-06 21:10:57 +000013 programs.
mbligh321b1f52008-04-09 16:23:43 +000014
jadmanski0afbb632008-06-06 21:10:57 +000015 It may be accessed through a network, a serial line, ...
16 It is not the machine autoserv is running on.
mbligh321b1f52008-04-09 16:23:43 +000017
jadmanski0afbb632008-06-06 21:10:57 +000018 Implementation details:
19 This is an abstract class, leaf subclasses must implement the methods
20 listed here and in parent classes which have no implementation. They
21 may reimplement methods which already have an implementation. You
22 must not instantiate this class but should instantiate one of those
jadmanskid60321a2008-10-28 20:32:05 +000023 leaf subclasses.
24 """
mbligh321b1f52008-04-09 16:23:43 +000025
mblighf2c33762008-10-18 14:42:34 +000026 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
27 LAST_BOOT_TAG = object()
28
jadmanskif6562912008-10-21 17:59:01 +000029 def _initialize(self, hostname, autodir=None, *args, **dargs):
30 super(RemoteHost, self)._initialize(*args, **dargs)
mbligh321b1f52008-04-09 16:23:43 +000031
jadmanski1c5e3a12008-08-15 23:08:20 +000032 self.hostname = hostname
mblighf2c33762008-10-18 14:42:34 +000033 self.autodir = autodir
34 self.tmp_dirs = []
jadmanskia2db9412008-08-22 21:47:24 +000035
36
jadmanski53aaf382008-11-17 16:22:31 +000037 def close(self):
38 super(RemoteHost, self).close()
mblighf2c33762008-10-18 14:42:34 +000039 self.stop_loggers()
40
41 if hasattr(self, 'tmp_dirs'):
42 for dir in self.tmp_dirs:
43 try:
44 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
45 except error.AutoservRunError:
46 pass
47
48
jadmanskid60321a2008-10-28 20:32:05 +000049 def job_start(self):
50 """
51 Abstract method, called the first time a remote host object
52 is created for a specific host after a job starts.
53
54 This method depends on the create_host factory being used to
55 construct your host object. If you directly construct host objects
56 you will need to call this method yourself (and enforce the
57 single-call rule).
58 """
59 pass
60
61
mblighf2c33762008-10-18 14:42:34 +000062 def get_autodir(self):
63 return self.autodir
64
65
66 def set_autodir(self, autodir):
jadmanskid60321a2008-10-28 20:32:05 +000067 """
mblighf2c33762008-10-18 14:42:34 +000068 This method is called to make the host object aware of the
69 where autotest is installed. Called in server/autotest.py
70 after a successful install
jadmanskid60321a2008-10-28 20:32:05 +000071 """
mblighf2c33762008-10-18 14:42:34 +000072 self.autodir = autodir
73
74
75 def sysrq_reboot(self):
76 self.run('echo b > /proc/sysrq-trigger &')
77
78
79 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, label=LAST_BOOT_TAG,
80 kernel_args=None, wait=True, **dargs):
81 """
82 Reboot the remote host.
83
84 Args:
85 timeout - How long to wait for the reboot.
86 label - The label we should boot into. If None, we will
87 boot into the default kernel. If it's LAST_BOOT_TAG,
88 we'll boot into whichever kernel was .boot'ed last
89 (or the default kernel if we haven't .boot'ed in this
90 job). If it's None, we'll boot into the default kernel.
91 If it's something else, we'll boot into that.
92 wait - Should we wait to see if the machine comes back up.
93 """
94 if self.job:
95 if label == self.LAST_BOOT_TAG:
96 label = self.job.last_boot_tag
97 else:
98 self.job.last_boot_tag = label
99
100 self.reboot_setup(label=label, kernel_args=kernel_args, **dargs)
101
102 if label or kernel_args:
103 self.bootloader.install_boottool()
104 if not label:
105 default = int(self.bootloader.get_default())
106 label = self.bootloader.get_titles()[default]
107 self.bootloader.boot_once(label)
108 if kernel_args:
109 self.bootloader.add_args(label, kernel_args)
110
111 # define a function for the reboot and run it in a group
112 print "Reboot: initiating reboot"
113 def reboot():
114 self.record("GOOD", None, "reboot.start")
115 try:
116 self.run('(sleep 5; reboot) '
117 '</dev/null >/dev/null 2>&1 &')
118 except error.AutoservRunError:
119 self.record("ABORT", None, "reboot.start",
120 "reboot command failed")
121 raise
122 if wait:
123 self.wait_for_restart(timeout)
124 self.reboot_followup(**dargs)
125
126 # if this is a full reboot-and-wait, run the reboot inside a group
127 if wait:
128 self.log_reboot(reboot)
129 else:
130 reboot()
131
132
133 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT):
jadmanskid60321a2008-10-28 20:32:05 +0000134 """
135 Wait for the host to come back from a reboot. This wraps the
136 generic wait_for_restart implementation in a reboot group.
137 """
mblighf2c33762008-10-18 14:42:34 +0000138 def reboot_func():
139 super(RemoteHost, self).wait_for_restart(timeout=timeout)
140 self.log_reboot(reboot_func)
141
142
mbligh1264b512008-11-05 22:21:49 +0000143 def cleanup(self):
144 super(RemoteHost, self).cleanup()
145 self.reboot()
146
147
mblighe48bcfb2008-11-11 17:09:44 +0000148 def get_tmp_dir(self, parent='/tmp'):
mblighf2c33762008-10-18 14:42:34 +0000149 """
150 Return the pathname of a directory on the host suitable
151 for temporary file storage.
152
153 The directory and its content will be deleted automatically
154 on the destruction of the Host object that was used to obtain
155 it.
156 """
mblighe48bcfb2008-11-11 17:09:44 +0000157 template = os.path.join(parent, 'autoserv-XXXXXX')
158 dir_name= self.run("mktemp -d %s" % template).stdout.rstrip()
mblighf2c33762008-10-18 14:42:34 +0000159 self.tmp_dirs.append(dir_name)
160 return dir_name
161
162
163 def ping(self):
164 """
165 Ping the remote system, and return whether it's available
166 """
167 fpingcmd = "%s -q %s" % ('/usr/bin/fping', self.hostname)
168 rc = utils.system(fpingcmd, ignore_status = 1)
169 return (rc == 0)
170
171
172 def check_uptime(self):
173 """
174 Check that uptime is available and monotonically increasing.
175 """
176 if not self.ping():
177 raise error.AutoservHostError('Client is not pingable')
178 result = self.run("/bin/cat /proc/uptime", 30)
179 return result.stdout.strip().split()[0]
180
181
182 def get_crashinfo(self, test_start_time):
183 print "Collecting crash information..."
184 super(RemoteHost, self).get_crashinfo(test_start_time)
185
186 # wait for four hours, to see if the machine comes back up
187 current_time = time.strftime("%b %d %H:%M:%S", time.localtime())
188 print "Waiting four hours for %s to come up (%s)" % (self.hostname,
189 current_time)
190 if not self.wait_up(timeout=4*60*60):
191 print "%s down, unable to collect crash info" % self.hostname
192 return
193 else:
194 print "%s is back up, collecting crash info" % self.hostname
195
196 # find a directory to put the crashinfo into
197 if self.job:
198 infodir = self.job.resultdir
199 else:
200 infodir = os.path.abspath(os.getcwd())
201 infodir = os.path.join(infodir, "crashinfo.%s" % self.hostname)
202 if not os.path.exists(infodir):
203 os.mkdir(infodir)
204
205 # collect various log files
206 log_files = ["/var/log/messages", "/var/log/monitor-ssh-reboots"]
207 for log in log_files:
208 print "Collecting %s..." % log
209 try:
210 self.get_file(log, infodir)
211 except Exception, e:
212 print "crashinfo collection of %s failed with:\n%s" % (log, e)
213
214 # collect dmesg
215 print "Collecting dmesg..."
216 try:
217 result = self.run("dmesg").stdout
218 file(os.path.join(infodir, "dmesg"), "w").write(result)
219 except Exception, e:
220 print "crashinfo collection of dmesg failed with:\n%s" % e
221
222
jadmanskica7da372008-10-21 16:26:52 +0000223 def are_wait_up_processes_up(self):
mblighf2c33762008-10-18 14:42:34 +0000224 """
225 Checks if any HOSTS waitup processes are running yet on the
226 remote host.
227
228 Returns True if any the waitup processes are running, False
229 otherwise.
230 """
231 processes = self.get_wait_up_processes()
232 if len(processes) == 0:
233 return True # wait up processes aren't being used
234 for procname in processes:
235 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
236 ignore_status=True).exit_status
237 if exit_status == 0:
238 return True
239 return False