blob: f1d0be4d3f2a318d9e1e14b4b10f7eceef630957 [file] [log] [blame]
mbligh321b1f52008-04-09 16:23:43 +00001"""This class defines the Remote host class, mixing in the SiteHost class
2if it is available."""
3
mblighf2c33762008-10-18 14:42:34 +00004import os, time
5from autotest_lib.client.common_lib import error
6from autotest_lib.server import utils
7from autotest_lib.server.hosts import base_classes, bootloader
mbligh321b1f52008-04-09 16:23:43 +00008
9
jadmanski1c5e3a12008-08-15 23:08:20 +000010class RemoteHost(base_classes.Host):
jadmanskid60321a2008-10-28 20:32:05 +000011 """
12 This class represents a remote machine on which you can run
jadmanski0afbb632008-06-06 21:10:57 +000013 programs.
mbligh321b1f52008-04-09 16:23:43 +000014
jadmanski0afbb632008-06-06 21:10:57 +000015 It may be accessed through a network, a serial line, ...
16 It is not the machine autoserv is running on.
mbligh321b1f52008-04-09 16:23:43 +000017
jadmanski0afbb632008-06-06 21:10:57 +000018 Implementation details:
19 This is an abstract class, leaf subclasses must implement the methods
20 listed here and in parent classes which have no implementation. They
21 may reimplement methods which already have an implementation. You
22 must not instantiate this class but should instantiate one of those
jadmanskid60321a2008-10-28 20:32:05 +000023 leaf subclasses.
24 """
mbligh321b1f52008-04-09 16:23:43 +000025
mblighf2c33762008-10-18 14:42:34 +000026 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
27 LAST_BOOT_TAG = object()
28
jadmanskif6562912008-10-21 17:59:01 +000029 def _initialize(self, hostname, autodir=None, *args, **dargs):
30 super(RemoteHost, self)._initialize(*args, **dargs)
mbligh321b1f52008-04-09 16:23:43 +000031
jadmanski1c5e3a12008-08-15 23:08:20 +000032 self.hostname = hostname
mblighf2c33762008-10-18 14:42:34 +000033 self.autodir = autodir
34 self.tmp_dirs = []
jadmanskia2db9412008-08-22 21:47:24 +000035
36
37 def __del__(self):
mblighf2c33762008-10-18 14:42:34 +000038 self.stop_loggers()
39
40 if hasattr(self, 'tmp_dirs'):
41 for dir in self.tmp_dirs:
42 try:
43 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
44 except error.AutoservRunError:
45 pass
46
47
jadmanskid60321a2008-10-28 20:32:05 +000048 def job_start(self):
49 """
50 Abstract method, called the first time a remote host object
51 is created for a specific host after a job starts.
52
53 This method depends on the create_host factory being used to
54 construct your host object. If you directly construct host objects
55 you will need to call this method yourself (and enforce the
56 single-call rule).
57 """
58 pass
59
60
mblighf2c33762008-10-18 14:42:34 +000061 def get_autodir(self):
62 return self.autodir
63
64
65 def set_autodir(self, autodir):
jadmanskid60321a2008-10-28 20:32:05 +000066 """
mblighf2c33762008-10-18 14:42:34 +000067 This method is called to make the host object aware of the
68 where autotest is installed. Called in server/autotest.py
69 after a successful install
jadmanskid60321a2008-10-28 20:32:05 +000070 """
mblighf2c33762008-10-18 14:42:34 +000071 self.autodir = autodir
72
73
74 def sysrq_reboot(self):
75 self.run('echo b > /proc/sysrq-trigger &')
76
77
78 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, label=LAST_BOOT_TAG,
79 kernel_args=None, wait=True, **dargs):
80 """
81 Reboot the remote host.
82
83 Args:
84 timeout - How long to wait for the reboot.
85 label - The label we should boot into. If None, we will
86 boot into the default kernel. If it's LAST_BOOT_TAG,
87 we'll boot into whichever kernel was .boot'ed last
88 (or the default kernel if we haven't .boot'ed in this
89 job). If it's None, we'll boot into the default kernel.
90 If it's something else, we'll boot into that.
91 wait - Should we wait to see if the machine comes back up.
92 """
93 if self.job:
94 if label == self.LAST_BOOT_TAG:
95 label = self.job.last_boot_tag
96 else:
97 self.job.last_boot_tag = label
98
99 self.reboot_setup(label=label, kernel_args=kernel_args, **dargs)
100
101 if label or kernel_args:
102 self.bootloader.install_boottool()
103 if not label:
104 default = int(self.bootloader.get_default())
105 label = self.bootloader.get_titles()[default]
106 self.bootloader.boot_once(label)
107 if kernel_args:
108 self.bootloader.add_args(label, kernel_args)
109
110 # define a function for the reboot and run it in a group
111 print "Reboot: initiating reboot"
112 def reboot():
113 self.record("GOOD", None, "reboot.start")
114 try:
115 self.run('(sleep 5; reboot) '
116 '</dev/null >/dev/null 2>&1 &')
117 except error.AutoservRunError:
118 self.record("ABORT", None, "reboot.start",
119 "reboot command failed")
120 raise
121 if wait:
122 self.wait_for_restart(timeout)
123 self.reboot_followup(**dargs)
124
125 # if this is a full reboot-and-wait, run the reboot inside a group
126 if wait:
127 self.log_reboot(reboot)
128 else:
129 reboot()
130
131
132 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT):
jadmanskid60321a2008-10-28 20:32:05 +0000133 """
134 Wait for the host to come back from a reboot. This wraps the
135 generic wait_for_restart implementation in a reboot group.
136 """
mblighf2c33762008-10-18 14:42:34 +0000137 def reboot_func():
138 super(RemoteHost, self).wait_for_restart(timeout=timeout)
139 self.log_reboot(reboot_func)
140
141
mbligh1264b512008-11-05 22:21:49 +0000142 def cleanup(self):
143 super(RemoteHost, self).cleanup()
144 self.reboot()
145
146
mblighf2c33762008-10-18 14:42:34 +0000147 def get_tmp_dir(self):
148 """
149 Return the pathname of a directory on the host suitable
150 for temporary file storage.
151
152 The directory and its content will be deleted automatically
153 on the destruction of the Host object that was used to obtain
154 it.
155 """
156 dir_name= self.run("mktemp -d /tmp/autoserv-XXXXXX").stdout.rstrip()
157 self.tmp_dirs.append(dir_name)
158 return dir_name
159
160
161 def ping(self):
162 """
163 Ping the remote system, and return whether it's available
164 """
165 fpingcmd = "%s -q %s" % ('/usr/bin/fping', self.hostname)
166 rc = utils.system(fpingcmd, ignore_status = 1)
167 return (rc == 0)
168
169
170 def check_uptime(self):
171 """
172 Check that uptime is available and monotonically increasing.
173 """
174 if not self.ping():
175 raise error.AutoservHostError('Client is not pingable')
176 result = self.run("/bin/cat /proc/uptime", 30)
177 return result.stdout.strip().split()[0]
178
179
180 def get_crashinfo(self, test_start_time):
181 print "Collecting crash information..."
182 super(RemoteHost, self).get_crashinfo(test_start_time)
183
184 # wait for four hours, to see if the machine comes back up
185 current_time = time.strftime("%b %d %H:%M:%S", time.localtime())
186 print "Waiting four hours for %s to come up (%s)" % (self.hostname,
187 current_time)
188 if not self.wait_up(timeout=4*60*60):
189 print "%s down, unable to collect crash info" % self.hostname
190 return
191 else:
192 print "%s is back up, collecting crash info" % self.hostname
193
194 # find a directory to put the crashinfo into
195 if self.job:
196 infodir = self.job.resultdir
197 else:
198 infodir = os.path.abspath(os.getcwd())
199 infodir = os.path.join(infodir, "crashinfo.%s" % self.hostname)
200 if not os.path.exists(infodir):
201 os.mkdir(infodir)
202
203 # collect various log files
204 log_files = ["/var/log/messages", "/var/log/monitor-ssh-reboots"]
205 for log in log_files:
206 print "Collecting %s..." % log
207 try:
208 self.get_file(log, infodir)
209 except Exception, e:
210 print "crashinfo collection of %s failed with:\n%s" % (log, e)
211
212 # collect dmesg
213 print "Collecting dmesg..."
214 try:
215 result = self.run("dmesg").stdout
216 file(os.path.join(infodir, "dmesg"), "w").write(result)
217 except Exception, e:
218 print "crashinfo collection of dmesg failed with:\n%s" % e
219
220
jadmanskica7da372008-10-21 16:26:52 +0000221 def are_wait_up_processes_up(self):
mblighf2c33762008-10-18 14:42:34 +0000222 """
223 Checks if any HOSTS waitup processes are running yet on the
224 remote host.
225
226 Returns True if any the waitup processes are running, False
227 otherwise.
228 """
229 processes = self.get_wait_up_processes()
230 if len(processes) == 0:
231 return True # wait up processes aren't being used
232 for procname in processes:
233 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
234 ignore_status=True).exit_status
235 if exit_status == 0:
236 return True
237 return False