blob: 1cde00e986f90728c3e4348de81c071fccf976b7 [file] [log] [blame]
mbligh321b1f52008-04-09 16:23:43 +00001"""This class defines the Remote host class, mixing in the SiteHost class
2if it is available."""
3
mblighf2c33762008-10-18 14:42:34 +00004import os, time
5from autotest_lib.client.common_lib import error
6from autotest_lib.server import utils
7from autotest_lib.server.hosts import base_classes, bootloader
mbligh321b1f52008-04-09 16:23:43 +00008
9
jadmanski1c5e3a12008-08-15 23:08:20 +000010class RemoteHost(base_classes.Host):
jadmanskid60321a2008-10-28 20:32:05 +000011 """
12 This class represents a remote machine on which you can run
jadmanski0afbb632008-06-06 21:10:57 +000013 programs.
mbligh321b1f52008-04-09 16:23:43 +000014
jadmanski0afbb632008-06-06 21:10:57 +000015 It may be accessed through a network, a serial line, ...
16 It is not the machine autoserv is running on.
mbligh321b1f52008-04-09 16:23:43 +000017
jadmanski0afbb632008-06-06 21:10:57 +000018 Implementation details:
19 This is an abstract class, leaf subclasses must implement the methods
20 listed here and in parent classes which have no implementation. They
21 may reimplement methods which already have an implementation. You
22 must not instantiate this class but should instantiate one of those
jadmanskid60321a2008-10-28 20:32:05 +000023 leaf subclasses.
24 """
mbligh321b1f52008-04-09 16:23:43 +000025
mblighf2c33762008-10-18 14:42:34 +000026 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
27 LAST_BOOT_TAG = object()
28
jadmanskif6562912008-10-21 17:59:01 +000029 def _initialize(self, hostname, autodir=None, *args, **dargs):
30 super(RemoteHost, self)._initialize(*args, **dargs)
mbligh321b1f52008-04-09 16:23:43 +000031
jadmanski1c5e3a12008-08-15 23:08:20 +000032 self.hostname = hostname
mblighf2c33762008-10-18 14:42:34 +000033 self.autodir = autodir
34 self.tmp_dirs = []
jadmanskia2db9412008-08-22 21:47:24 +000035
36
37 def __del__(self):
mblighf2c33762008-10-18 14:42:34 +000038 self.stop_loggers()
39
40 if hasattr(self, 'tmp_dirs'):
41 for dir in self.tmp_dirs:
42 try:
43 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
44 except error.AutoservRunError:
45 pass
46
47
jadmanskid60321a2008-10-28 20:32:05 +000048 def job_start(self):
49 """
50 Abstract method, called the first time a remote host object
51 is created for a specific host after a job starts.
52
53 This method depends on the create_host factory being used to
54 construct your host object. If you directly construct host objects
55 you will need to call this method yourself (and enforce the
56 single-call rule).
57 """
58 pass
59
60
mblighf2c33762008-10-18 14:42:34 +000061 def get_autodir(self):
62 return self.autodir
63
64
65 def set_autodir(self, autodir):
jadmanskid60321a2008-10-28 20:32:05 +000066 """
mblighf2c33762008-10-18 14:42:34 +000067 This method is called to make the host object aware of the
68 where autotest is installed. Called in server/autotest.py
69 after a successful install
jadmanskid60321a2008-10-28 20:32:05 +000070 """
mblighf2c33762008-10-18 14:42:34 +000071 self.autodir = autodir
72
73
74 def sysrq_reboot(self):
75 self.run('echo b > /proc/sysrq-trigger &')
76
77
78 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, label=LAST_BOOT_TAG,
79 kernel_args=None, wait=True, **dargs):
80 """
81 Reboot the remote host.
82
83 Args:
84 timeout - How long to wait for the reboot.
85 label - The label we should boot into. If None, we will
86 boot into the default kernel. If it's LAST_BOOT_TAG,
87 we'll boot into whichever kernel was .boot'ed last
88 (or the default kernel if we haven't .boot'ed in this
89 job). If it's None, we'll boot into the default kernel.
90 If it's something else, we'll boot into that.
91 wait - Should we wait to see if the machine comes back up.
92 """
93 if self.job:
94 if label == self.LAST_BOOT_TAG:
95 label = self.job.last_boot_tag
96 else:
97 self.job.last_boot_tag = label
98
99 self.reboot_setup(label=label, kernel_args=kernel_args, **dargs)
100
101 if label or kernel_args:
102 self.bootloader.install_boottool()
103 if not label:
104 default = int(self.bootloader.get_default())
105 label = self.bootloader.get_titles()[default]
106 self.bootloader.boot_once(label)
107 if kernel_args:
108 self.bootloader.add_args(label, kernel_args)
109
110 # define a function for the reboot and run it in a group
111 print "Reboot: initiating reboot"
112 def reboot():
113 self.record("GOOD", None, "reboot.start")
114 try:
115 self.run('(sleep 5; reboot) '
116 '</dev/null >/dev/null 2>&1 &')
117 except error.AutoservRunError:
118 self.record("ABORT", None, "reboot.start",
119 "reboot command failed")
120 raise
121 if wait:
122 self.wait_for_restart(timeout)
123 self.reboot_followup(**dargs)
124
125 # if this is a full reboot-and-wait, run the reboot inside a group
126 if wait:
127 self.log_reboot(reboot)
128 else:
129 reboot()
130
131
132 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT):
jadmanskid60321a2008-10-28 20:32:05 +0000133 """
134 Wait for the host to come back from a reboot. This wraps the
135 generic wait_for_restart implementation in a reboot group.
136 """
mblighf2c33762008-10-18 14:42:34 +0000137 def reboot_func():
138 super(RemoteHost, self).wait_for_restart(timeout=timeout)
139 self.log_reboot(reboot_func)
140
141
mbligh1264b512008-11-05 22:21:49 +0000142 def cleanup(self):
143 super(RemoteHost, self).cleanup()
144 self.reboot()
145
146
mblighe48bcfb2008-11-11 17:09:44 +0000147 def get_tmp_dir(self, parent='/tmp'):
mblighf2c33762008-10-18 14:42:34 +0000148 """
149 Return the pathname of a directory on the host suitable
150 for temporary file storage.
151
152 The directory and its content will be deleted automatically
153 on the destruction of the Host object that was used to obtain
154 it.
155 """
mblighe48bcfb2008-11-11 17:09:44 +0000156 template = os.path.join(parent, 'autoserv-XXXXXX')
157 dir_name= self.run("mktemp -d %s" % template).stdout.rstrip()
mblighf2c33762008-10-18 14:42:34 +0000158 self.tmp_dirs.append(dir_name)
159 return dir_name
160
161
162 def ping(self):
163 """
164 Ping the remote system, and return whether it's available
165 """
166 fpingcmd = "%s -q %s" % ('/usr/bin/fping', self.hostname)
167 rc = utils.system(fpingcmd, ignore_status = 1)
168 return (rc == 0)
169
170
171 def check_uptime(self):
172 """
173 Check that uptime is available and monotonically increasing.
174 """
175 if not self.ping():
176 raise error.AutoservHostError('Client is not pingable')
177 result = self.run("/bin/cat /proc/uptime", 30)
178 return result.stdout.strip().split()[0]
179
180
181 def get_crashinfo(self, test_start_time):
182 print "Collecting crash information..."
183 super(RemoteHost, self).get_crashinfo(test_start_time)
184
185 # wait for four hours, to see if the machine comes back up
186 current_time = time.strftime("%b %d %H:%M:%S", time.localtime())
187 print "Waiting four hours for %s to come up (%s)" % (self.hostname,
188 current_time)
189 if not self.wait_up(timeout=4*60*60):
190 print "%s down, unable to collect crash info" % self.hostname
191 return
192 else:
193 print "%s is back up, collecting crash info" % self.hostname
194
195 # find a directory to put the crashinfo into
196 if self.job:
197 infodir = self.job.resultdir
198 else:
199 infodir = os.path.abspath(os.getcwd())
200 infodir = os.path.join(infodir, "crashinfo.%s" % self.hostname)
201 if not os.path.exists(infodir):
202 os.mkdir(infodir)
203
204 # collect various log files
205 log_files = ["/var/log/messages", "/var/log/monitor-ssh-reboots"]
206 for log in log_files:
207 print "Collecting %s..." % log
208 try:
209 self.get_file(log, infodir)
210 except Exception, e:
211 print "crashinfo collection of %s failed with:\n%s" % (log, e)
212
213 # collect dmesg
214 print "Collecting dmesg..."
215 try:
216 result = self.run("dmesg").stdout
217 file(os.path.join(infodir, "dmesg"), "w").write(result)
218 except Exception, e:
219 print "crashinfo collection of dmesg failed with:\n%s" % e
220
221
jadmanskica7da372008-10-21 16:26:52 +0000222 def are_wait_up_processes_up(self):
mblighf2c33762008-10-18 14:42:34 +0000223 """
224 Checks if any HOSTS waitup processes are running yet on the
225 remote host.
226
227 Returns True if any the waitup processes are running, False
228 otherwise.
229 """
230 processes = self.get_wait_up_processes()
231 if len(processes) == 0:
232 return True # wait up processes aren't being used
233 for procname in processes:
234 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
235 ignore_status=True).exit_status
236 if exit_status == 0:
237 return True
238 return False