blob: 2c72321cf87e85f6b3c7c7c283b152879eae2afd [file] [log] [blame]
mbligh321b1f52008-04-09 16:23:43 +00001"""This class defines the Remote host class, mixing in the SiteHost class
2if it is available."""
3
mblighf2c33762008-10-18 14:42:34 +00004import os, time
5from autotest_lib.client.common_lib import error
6from autotest_lib.server import utils
7from autotest_lib.server.hosts import base_classes, bootloader
mbligh321b1f52008-04-09 16:23:43 +00008
9
jadmanski1c5e3a12008-08-15 23:08:20 +000010class RemoteHost(base_classes.Host):
jadmanskid60321a2008-10-28 20:32:05 +000011 """
12 This class represents a remote machine on which you can run
jadmanski0afbb632008-06-06 21:10:57 +000013 programs.
mbligh321b1f52008-04-09 16:23:43 +000014
jadmanski0afbb632008-06-06 21:10:57 +000015 It may be accessed through a network, a serial line, ...
16 It is not the machine autoserv is running on.
mbligh321b1f52008-04-09 16:23:43 +000017
jadmanski0afbb632008-06-06 21:10:57 +000018 Implementation details:
19 This is an abstract class, leaf subclasses must implement the methods
20 listed here and in parent classes which have no implementation. They
21 may reimplement methods which already have an implementation. You
22 must not instantiate this class but should instantiate one of those
jadmanskid60321a2008-10-28 20:32:05 +000023 leaf subclasses.
24 """
mbligh321b1f52008-04-09 16:23:43 +000025
mblighf2c33762008-10-18 14:42:34 +000026 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
27 LAST_BOOT_TAG = object()
28
jadmanskif6562912008-10-21 17:59:01 +000029 def _initialize(self, hostname, autodir=None, *args, **dargs):
30 super(RemoteHost, self)._initialize(*args, **dargs)
mbligh321b1f52008-04-09 16:23:43 +000031
jadmanski1c5e3a12008-08-15 23:08:20 +000032 self.hostname = hostname
mblighf2c33762008-10-18 14:42:34 +000033 self.autodir = autodir
34 self.tmp_dirs = []
jadmanskia2db9412008-08-22 21:47:24 +000035
36
jadmanski53aaf382008-11-17 16:22:31 +000037 def close(self):
38 super(RemoteHost, self).close()
mblighf2c33762008-10-18 14:42:34 +000039 self.stop_loggers()
40
41 if hasattr(self, 'tmp_dirs'):
42 for dir in self.tmp_dirs:
43 try:
44 self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
45 except error.AutoservRunError:
46 pass
47
48
jadmanskid60321a2008-10-28 20:32:05 +000049 def job_start(self):
50 """
51 Abstract method, called the first time a remote host object
52 is created for a specific host after a job starts.
53
54 This method depends on the create_host factory being used to
55 construct your host object. If you directly construct host objects
56 you will need to call this method yourself (and enforce the
57 single-call rule).
58 """
59 pass
60
61
mblighf2c33762008-10-18 14:42:34 +000062 def get_autodir(self):
63 return self.autodir
64
65
66 def set_autodir(self, autodir):
jadmanskid60321a2008-10-28 20:32:05 +000067 """
mblighf2c33762008-10-18 14:42:34 +000068 This method is called to make the host object aware of the
69 where autotest is installed. Called in server/autotest.py
70 after a successful install
jadmanskid60321a2008-10-28 20:32:05 +000071 """
mblighf2c33762008-10-18 14:42:34 +000072 self.autodir = autodir
73
74
75 def sysrq_reboot(self):
76 self.run('echo b > /proc/sysrq-trigger &')
77
78
79 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, label=LAST_BOOT_TAG,
80 kernel_args=None, wait=True, **dargs):
81 """
82 Reboot the remote host.
83
84 Args:
85 timeout - How long to wait for the reboot.
86 label - The label we should boot into. If None, we will
87 boot into the default kernel. If it's LAST_BOOT_TAG,
88 we'll boot into whichever kernel was .boot'ed last
89 (or the default kernel if we haven't .boot'ed in this
90 job). If it's None, we'll boot into the default kernel.
91 If it's something else, we'll boot into that.
92 wait - Should we wait to see if the machine comes back up.
93 """
94 if self.job:
95 if label == self.LAST_BOOT_TAG:
96 label = self.job.last_boot_tag
97 else:
98 self.job.last_boot_tag = label
99
100 self.reboot_setup(label=label, kernel_args=kernel_args, **dargs)
101
102 if label or kernel_args:
103 self.bootloader.install_boottool()
104 if not label:
105 default = int(self.bootloader.get_default())
106 label = self.bootloader.get_titles()[default]
107 self.bootloader.boot_once(label)
108 if kernel_args:
109 self.bootloader.add_args(label, kernel_args)
110
111 # define a function for the reboot and run it in a group
112 print "Reboot: initiating reboot"
113 def reboot():
114 self.record("GOOD", None, "reboot.start")
115 try:
116 self.run('(sleep 5; reboot) '
117 '</dev/null >/dev/null 2>&1 &')
118 except error.AutoservRunError:
119 self.record("ABORT", None, "reboot.start",
120 "reboot command failed")
121 raise
122 if wait:
jadmanskid778ae42009-01-07 15:07:36 +0000123 self.wait_for_restart(timeout, **dargs)
mblighf2c33762008-10-18 14:42:34 +0000124
125 # if this is a full reboot-and-wait, run the reboot inside a group
126 if wait:
127 self.log_reboot(reboot)
128 else:
129 reboot()
130
131
jadmanski4f909252008-12-01 20:47:10 +0000132 def reboot_followup(self, *args, **dargs):
133 super(RemoteHost, self).reboot_followup(*args, **dargs)
134 if self.job:
135 self.job.profilers.handle_reboot(self)
136
137
jadmanskid778ae42009-01-07 15:07:36 +0000138 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs):
jadmanskid60321a2008-10-28 20:32:05 +0000139 """
140 Wait for the host to come back from a reboot. This wraps the
141 generic wait_for_restart implementation in a reboot group.
142 """
mblighf2c33762008-10-18 14:42:34 +0000143 def reboot_func():
jadmanskid778ae42009-01-07 15:07:36 +0000144 super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs)
mblighf2c33762008-10-18 14:42:34 +0000145 self.log_reboot(reboot_func)
146
147
mbligh1264b512008-11-05 22:21:49 +0000148 def cleanup(self):
149 super(RemoteHost, self).cleanup()
150 self.reboot()
151
152
mblighe48bcfb2008-11-11 17:09:44 +0000153 def get_tmp_dir(self, parent='/tmp'):
mblighf2c33762008-10-18 14:42:34 +0000154 """
155 Return the pathname of a directory on the host suitable
156 for temporary file storage.
157
158 The directory and its content will be deleted automatically
159 on the destruction of the Host object that was used to obtain
160 it.
161 """
jadmanski9f7dd112008-11-17 16:40:05 +0000162 self.run("mkdir -p %s" % parent)
mblighe48bcfb2008-11-11 17:09:44 +0000163 template = os.path.join(parent, 'autoserv-XXXXXX')
jadmanski9f7dd112008-11-17 16:40:05 +0000164 dir_name = self.run("mktemp -d %s" % template).stdout.rstrip()
mblighf2c33762008-10-18 14:42:34 +0000165 self.tmp_dirs.append(dir_name)
166 return dir_name
167
168
169 def ping(self):
170 """
171 Ping the remote system, and return whether it's available
172 """
173 fpingcmd = "%s -q %s" % ('/usr/bin/fping', self.hostname)
174 rc = utils.system(fpingcmd, ignore_status = 1)
175 return (rc == 0)
176
177
178 def check_uptime(self):
179 """
180 Check that uptime is available and monotonically increasing.
181 """
182 if not self.ping():
183 raise error.AutoservHostError('Client is not pingable')
184 result = self.run("/bin/cat /proc/uptime", 30)
185 return result.stdout.strip().split()[0]
186
187
188 def get_crashinfo(self, test_start_time):
189 print "Collecting crash information..."
190 super(RemoteHost, self).get_crashinfo(test_start_time)
191
192 # wait for four hours, to see if the machine comes back up
193 current_time = time.strftime("%b %d %H:%M:%S", time.localtime())
194 print "Waiting four hours for %s to come up (%s)" % (self.hostname,
195 current_time)
196 if not self.wait_up(timeout=4*60*60):
197 print "%s down, unable to collect crash info" % self.hostname
198 return
199 else:
200 print "%s is back up, collecting crash info" % self.hostname
201
202 # find a directory to put the crashinfo into
203 if self.job:
204 infodir = self.job.resultdir
205 else:
206 infodir = os.path.abspath(os.getcwd())
207 infodir = os.path.join(infodir, "crashinfo.%s" % self.hostname)
208 if not os.path.exists(infodir):
209 os.mkdir(infodir)
210
211 # collect various log files
212 log_files = ["/var/log/messages", "/var/log/monitor-ssh-reboots"]
213 for log in log_files:
214 print "Collecting %s..." % log
215 try:
216 self.get_file(log, infodir)
217 except Exception, e:
218 print "crashinfo collection of %s failed with:\n%s" % (log, e)
219
220 # collect dmesg
221 print "Collecting dmesg..."
222 try:
223 result = self.run("dmesg").stdout
224 file(os.path.join(infodir, "dmesg"), "w").write(result)
225 except Exception, e:
226 print "crashinfo collection of dmesg failed with:\n%s" % e
227
228
jadmanskica7da372008-10-21 16:26:52 +0000229 def are_wait_up_processes_up(self):
mblighf2c33762008-10-18 14:42:34 +0000230 """
231 Checks if any HOSTS waitup processes are running yet on the
232 remote host.
233
234 Returns True if any the waitup processes are running, False
235 otherwise.
236 """
237 processes = self.get_wait_up_processes()
238 if len(processes) == 0:
239 return True # wait up processes aren't being used
240 for procname in processes:
241 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
242 ignore_status=True).exit_status
243 if exit_status == 0:
244 return True
245 return False