blob: 09475a80bef79e8f41b2c9847bf865bd2754664f [file] [log] [blame]
mbligh321b1f52008-04-09 16:23:43 +00001"""This class defines the Remote host class, mixing in the SiteHost class
2if it is available."""
3
mblighf2c33762008-10-18 14:42:34 +00004import os, time
5from autotest_lib.client.common_lib import error
6from autotest_lib.server import utils
7from autotest_lib.server.hosts import base_classes, bootloader
mbligh321b1f52008-04-09 16:23:43 +00008
9
jadmanski1c5e3a12008-08-15 23:08:20 +000010class RemoteHost(base_classes.Host):
jadmanskid60321a2008-10-28 20:32:05 +000011 """
12 This class represents a remote machine on which you can run
jadmanski0afbb632008-06-06 21:10:57 +000013 programs.
mbligh321b1f52008-04-09 16:23:43 +000014
jadmanski0afbb632008-06-06 21:10:57 +000015 It may be accessed through a network, a serial line, ...
16 It is not the machine autoserv is running on.
mbligh321b1f52008-04-09 16:23:43 +000017
jadmanski0afbb632008-06-06 21:10:57 +000018 Implementation details:
19 This is an abstract class, leaf subclasses must implement the methods
20 listed here and in parent classes which have no implementation. They
21 may reimplement methods which already have an implementation. You
22 must not instantiate this class but should instantiate one of those
jadmanskid60321a2008-10-28 20:32:05 +000023 leaf subclasses.
24 """
mbligh321b1f52008-04-09 16:23:43 +000025
mblighf2c33762008-10-18 14:42:34 +000026 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
27 LAST_BOOT_TAG = object()
28
jadmanskif6562912008-10-21 17:59:01 +000029 def _initialize(self, hostname, autodir=None, *args, **dargs):
30 super(RemoteHost, self)._initialize(*args, **dargs)
mbligh321b1f52008-04-09 16:23:43 +000031
jadmanski1c5e3a12008-08-15 23:08:20 +000032 self.hostname = hostname
mblighf2c33762008-10-18 14:42:34 +000033 self.autodir = autodir
34 self.tmp_dirs = []
jadmanskia2db9412008-08-22 21:47:24 +000035
36
37 def __del__(self):
mblighf2c33762008-10-18 14:42:34 +000038 self.stop_loggers()
39
40 if hasattr(self, 'tmp_dirs'):
41 for dir in self.tmp_dirs:
42 try:
43 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
44 except error.AutoservRunError:
45 pass
46
47
jadmanskid60321a2008-10-28 20:32:05 +000048 def job_start(self):
49 """
50 Abstract method, called the first time a remote host object
51 is created for a specific host after a job starts.
52
53 This method depends on the create_host factory being used to
54 construct your host object. If you directly construct host objects
55 you will need to call this method yourself (and enforce the
56 single-call rule).
57 """
58 pass
59
60
mblighf2c33762008-10-18 14:42:34 +000061 def get_autodir(self):
62 return self.autodir
63
64
65 def set_autodir(self, autodir):
jadmanskid60321a2008-10-28 20:32:05 +000066 """
mblighf2c33762008-10-18 14:42:34 +000067 This method is called to make the host object aware of the
68 where autotest is installed. Called in server/autotest.py
69 after a successful install
jadmanskid60321a2008-10-28 20:32:05 +000070 """
mblighf2c33762008-10-18 14:42:34 +000071 self.autodir = autodir
72
73
74 def sysrq_reboot(self):
75 self.run('echo b > /proc/sysrq-trigger &')
76
77
78 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, label=LAST_BOOT_TAG,
79 kernel_args=None, wait=True, **dargs):
80 """
81 Reboot the remote host.
82
83 Args:
84 timeout - How long to wait for the reboot.
85 label - The label we should boot into. If None, we will
86 boot into the default kernel. If it's LAST_BOOT_TAG,
87 we'll boot into whichever kernel was .boot'ed last
88 (or the default kernel if we haven't .boot'ed in this
89 job). If it's None, we'll boot into the default kernel.
90 If it's something else, we'll boot into that.
91 wait - Should we wait to see if the machine comes back up.
92 """
93 if self.job:
94 if label == self.LAST_BOOT_TAG:
95 label = self.job.last_boot_tag
96 else:
97 self.job.last_boot_tag = label
98
99 self.reboot_setup(label=label, kernel_args=kernel_args, **dargs)
100
101 if label or kernel_args:
102 self.bootloader.install_boottool()
103 if not label:
104 default = int(self.bootloader.get_default())
105 label = self.bootloader.get_titles()[default]
106 self.bootloader.boot_once(label)
107 if kernel_args:
108 self.bootloader.add_args(label, kernel_args)
109
110 # define a function for the reboot and run it in a group
111 print "Reboot: initiating reboot"
112 def reboot():
113 self.record("GOOD", None, "reboot.start")
114 try:
115 self.run('(sleep 5; reboot) '
116 '</dev/null >/dev/null 2>&1 &')
117 except error.AutoservRunError:
118 self.record("ABORT", None, "reboot.start",
119 "reboot command failed")
120 raise
121 if wait:
122 self.wait_for_restart(timeout)
123 self.reboot_followup(**dargs)
124
125 # if this is a full reboot-and-wait, run the reboot inside a group
126 if wait:
127 self.log_reboot(reboot)
128 else:
129 reboot()
130
131
132 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT):
jadmanskid60321a2008-10-28 20:32:05 +0000133 """
134 Wait for the host to come back from a reboot. This wraps the
135 generic wait_for_restart implementation in a reboot group.
136 """
mblighf2c33762008-10-18 14:42:34 +0000137 def reboot_func():
138 super(RemoteHost, self).wait_for_restart(timeout=timeout)
139 self.log_reboot(reboot_func)
140
141
142 def get_tmp_dir(self):
143 """
144 Return the pathname of a directory on the host suitable
145 for temporary file storage.
146
147 The directory and its content will be deleted automatically
148 on the destruction of the Host object that was used to obtain
149 it.
150 """
151 dir_name= self.run("mktemp -d /tmp/autoserv-XXXXXX").stdout.rstrip()
152 self.tmp_dirs.append(dir_name)
153 return dir_name
154
155
156 def ping(self):
157 """
158 Ping the remote system, and return whether it's available
159 """
160 fpingcmd = "%s -q %s" % ('/usr/bin/fping', self.hostname)
161 rc = utils.system(fpingcmd, ignore_status = 1)
162 return (rc == 0)
163
164
165 def check_uptime(self):
166 """
167 Check that uptime is available and monotonically increasing.
168 """
169 if not self.ping():
170 raise error.AutoservHostError('Client is not pingable')
171 result = self.run("/bin/cat /proc/uptime", 30)
172 return result.stdout.strip().split()[0]
173
174
175 def get_crashinfo(self, test_start_time):
176 print "Collecting crash information..."
177 super(RemoteHost, self).get_crashinfo(test_start_time)
178
179 # wait for four hours, to see if the machine comes back up
180 current_time = time.strftime("%b %d %H:%M:%S", time.localtime())
181 print "Waiting four hours for %s to come up (%s)" % (self.hostname,
182 current_time)
183 if not self.wait_up(timeout=4*60*60):
184 print "%s down, unable to collect crash info" % self.hostname
185 return
186 else:
187 print "%s is back up, collecting crash info" % self.hostname
188
189 # find a directory to put the crashinfo into
190 if self.job:
191 infodir = self.job.resultdir
192 else:
193 infodir = os.path.abspath(os.getcwd())
194 infodir = os.path.join(infodir, "crashinfo.%s" % self.hostname)
195 if not os.path.exists(infodir):
196 os.mkdir(infodir)
197
198 # collect various log files
199 log_files = ["/var/log/messages", "/var/log/monitor-ssh-reboots"]
200 for log in log_files:
201 print "Collecting %s..." % log
202 try:
203 self.get_file(log, infodir)
204 except Exception, e:
205 print "crashinfo collection of %s failed with:\n%s" % (log, e)
206
207 # collect dmesg
208 print "Collecting dmesg..."
209 try:
210 result = self.run("dmesg").stdout
211 file(os.path.join(infodir, "dmesg"), "w").write(result)
212 except Exception, e:
213 print "crashinfo collection of dmesg failed with:\n%s" % e
214
215
jadmanskica7da372008-10-21 16:26:52 +0000216 def are_wait_up_processes_up(self):
mblighf2c33762008-10-18 14:42:34 +0000217 """
218 Checks if any HOSTS waitup processes are running yet on the
219 remote host.
220
221 Returns True if any the waitup processes are running, False
222 otherwise.
223 """
224 processes = self.get_wait_up_processes()
225 if len(processes) == 0:
226 return True # wait up processes aren't being used
227 for procname in processes:
228 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
229 ignore_status=True).exit_status
230 if exit_status == 0:
231 return True
232 return False