blob: 0df6b41904319aa7955a505b904cbe4731eba076 [file] [log] [blame]
Fang Deng3af66202013-08-16 15:19:25 -07001import os, time, types, socket, shutil, glob, logging, traceback, tempfile
mblighefccc1b2010-01-11 19:08:42 +00002from autotest_lib.client.common_lib import autotemp, error, logging_manager
jadmanski31c49b72008-10-27 20:44:48 +00003from autotest_lib.server import utils, autotest
mblighe8b93af2009-01-30 00:45:53 +00004from autotest_lib.server.hosts import remote
mblighefccc1b2010-01-11 19:08:42 +00005from autotest_lib.client.common_lib.global_config import global_config
jadmanskica7da372008-10-21 16:26:52 +00006
7
mblighb86bfa12010-02-12 20:22:21 +00008get_value = global_config.get_config_value
9enable_master_ssh = get_value('AUTOSERV', 'enable_master_ssh', type=bool,
10 default=False)
mblighefccc1b2010-01-11 19:08:42 +000011
12
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070013def _make_ssh_cmd_default(user="root", port=22, opts='', hosts_file='/dev/null',
14 connect_timeout=30, alive_interval=300):
lmraf676f32010-02-04 03:36:26 +000015 base_command = ("/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no "
16 "-o UserKnownHostsFile=%s -o BatchMode=yes "
mblighefccc1b2010-01-11 19:08:42 +000017 "-o ConnectTimeout=%d -o ServerAliveInterval=%d "
jadmanskica7da372008-10-21 16:26:52 +000018 "-l %s -p %d")
19 assert isinstance(connect_timeout, (int, long))
20 assert connect_timeout > 0 # can't disable the timeout
lmraf676f32010-02-04 03:36:26 +000021 return base_command % (opts, hosts_file, connect_timeout,
22 alive_interval, user, port)
jadmanskica7da372008-10-21 16:26:52 +000023
24
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070025make_ssh_command = utils.import_site_function(
26 __file__, "autotest_lib.server.hosts.site_host", "make_ssh_command",
27 _make_ssh_cmd_default)
28
29
mblighe8b93af2009-01-30 00:45:53 +000030# import site specific Host class
31SiteHost = utils.import_site_class(
32 __file__, "autotest_lib.server.hosts.site_host", "SiteHost",
33 remote.RemoteHost)
34
35
36class AbstractSSHHost(SiteHost):
mblighbc9402b2009-12-29 01:15:34 +000037 """
38 This class represents a generic implementation of most of the
jadmanskica7da372008-10-21 16:26:52 +000039 framework necessary for controlling a host via ssh. It implements
40 almost all of the abstract Host methods, except for the core
mblighbc9402b2009-12-29 01:15:34 +000041 Host.run method.
42 """
jadmanskica7da372008-10-21 16:26:52 +000043
jadmanskif6562912008-10-21 17:59:01 +000044 def _initialize(self, hostname, user="root", port=22, password="",
45 *args, **dargs):
46 super(AbstractSSHHost, self)._initialize(hostname=hostname,
47 *args, **dargs)
mbligh6369cf22008-10-24 17:21:57 +000048 self.ip = socket.getaddrinfo(self.hostname, None)[0][4][0]
jadmanskica7da372008-10-21 16:26:52 +000049 self.user = user
50 self.port = port
51 self.password = password
showard6eafb492010-01-15 20:29:06 +000052 self._use_rsync = None
Fang Deng3af66202013-08-16 15:19:25 -070053 self.known_hosts_file = tempfile.mkstemp()[1]
jadmanskica7da372008-10-21 16:26:52 +000054
mblighefccc1b2010-01-11 19:08:42 +000055 """
56 Master SSH connection background job, socket temp directory and socket
57 control path option. If master-SSH is enabled, these fields will be
58 initialized by start_master_ssh when a new SSH connection is initiated.
59 """
60 self.master_ssh_job = None
61 self.master_ssh_tempdir = None
62 self.master_ssh_option = ''
63
showard6eafb492010-01-15 20:29:06 +000064
65 def use_rsync(self):
66 if self._use_rsync is not None:
67 return self._use_rsync
68
mblighc9892c02010-01-06 19:02:16 +000069 # Check if rsync is available on the remote host. If it's not,
70 # don't try to use it for any future file transfers.
showard6eafb492010-01-15 20:29:06 +000071 self._use_rsync = self._check_rsync()
72 if not self._use_rsync:
mblighc9892c02010-01-06 19:02:16 +000073 logging.warn("rsync not available on remote host %s -- disabled",
74 self.hostname)
Eric Lie0493a42010-11-15 13:05:43 -080075 return self._use_rsync
mblighc9892c02010-01-06 19:02:16 +000076
77
78 def _check_rsync(self):
79 """
80 Check if rsync is available on the remote host.
81 """
82 try:
83 self.run("rsync --version", stdout_tee=None, stderr_tee=None)
84 except error.AutoservRunError:
85 return False
86 return True
87
jadmanskica7da372008-10-21 16:26:52 +000088
showard56176ec2009-10-28 19:52:30 +000089 def _encode_remote_paths(self, paths, escape=True):
mblighbc9402b2009-12-29 01:15:34 +000090 """
91 Given a list of file paths, encodes it as a single remote path, in
92 the style used by rsync and scp.
93 """
showard56176ec2009-10-28 19:52:30 +000094 if escape:
95 paths = [utils.scp_remote_escape(path) for path in paths]
96 return '%s@%s:"%s"' % (self.user, self.hostname, " ".join(paths))
jadmanskica7da372008-10-21 16:26:52 +000097
jadmanskica7da372008-10-21 16:26:52 +000098
mbligh45561782009-05-11 21:14:34 +000099 def _make_rsync_cmd(self, sources, dest, delete_dest, preserve_symlinks):
mblighbc9402b2009-12-29 01:15:34 +0000100 """
101 Given a list of source paths and a destination path, produces the
jadmanskid7b79ed2009-01-07 17:19:48 +0000102 appropriate rsync command for copying them. Remote paths must be
mblighbc9402b2009-12-29 01:15:34 +0000103 pre-encoded.
104 """
lmraf676f32010-02-04 03:36:26 +0000105 ssh_cmd = make_ssh_command(user=self.user, port=self.port,
106 opts=self.master_ssh_option,
Fang Deng3af66202013-08-16 15:19:25 -0700107 hosts_file=self.known_hosts_file)
jadmanskid7b79ed2009-01-07 17:19:48 +0000108 if delete_dest:
109 delete_flag = "--delete"
110 else:
111 delete_flag = ""
mbligh45561782009-05-11 21:14:34 +0000112 if preserve_symlinks:
113 symlink_flag = ""
114 else:
115 symlink_flag = "-L"
116 command = "rsync %s %s --timeout=1800 --rsh='%s' -az %s %s"
117 return command % (symlink_flag, delete_flag, ssh_cmd,
118 " ".join(sources), dest)
jadmanskid7b79ed2009-01-07 17:19:48 +0000119
120
Eric Li861b2d52011-02-04 14:50:35 -0800121 def _make_ssh_cmd(self, cmd):
122 """
123 Create a base ssh command string for the host which can be used
124 to run commands directly on the machine
125 """
126 base_cmd = make_ssh_command(user=self.user, port=self.port,
127 opts=self.master_ssh_option,
Fang Deng3af66202013-08-16 15:19:25 -0700128 hosts_file=self.known_hosts_file)
Eric Li861b2d52011-02-04 14:50:35 -0800129
130 return '%s %s "%s"' % (base_cmd, self.hostname, utils.sh_escape(cmd))
131
jadmanskid7b79ed2009-01-07 17:19:48 +0000132 def _make_scp_cmd(self, sources, dest):
mblighbc9402b2009-12-29 01:15:34 +0000133 """
134 Given a list of source paths and a destination path, produces the
jadmanskid7b79ed2009-01-07 17:19:48 +0000135 appropriate scp command for encoding it. Remote paths must be
mblighbc9402b2009-12-29 01:15:34 +0000136 pre-encoded.
137 """
mblighc0649d62010-01-15 18:15:58 +0000138 command = ("scp -rq %s -o StrictHostKeyChecking=no "
lmraf676f32010-02-04 03:36:26 +0000139 "-o UserKnownHostsFile=%s -P %d %s '%s'")
Fang Deng3af66202013-08-16 15:19:25 -0700140 return command % (self.master_ssh_option, self.known_hosts_file,
mblighefccc1b2010-01-11 19:08:42 +0000141 self.port, " ".join(sources), dest)
jadmanskid7b79ed2009-01-07 17:19:48 +0000142
143
144 def _make_rsync_compatible_globs(self, path, is_local):
mblighbc9402b2009-12-29 01:15:34 +0000145 """
146 Given an rsync-style path, returns a list of globbed paths
jadmanskid7b79ed2009-01-07 17:19:48 +0000147 that will hopefully provide equivalent behaviour for scp. Does not
148 support the full range of rsync pattern matching behaviour, only that
149 exposed in the get/send_file interface (trailing slashes).
150
151 The is_local param is flag indicating if the paths should be
mblighbc9402b2009-12-29 01:15:34 +0000152 interpreted as local or remote paths.
153 """
jadmanskid7b79ed2009-01-07 17:19:48 +0000154
155 # non-trailing slash paths should just work
156 if len(path) == 0 or path[-1] != "/":
157 return [path]
158
159 # make a function to test if a pattern matches any files
160 if is_local:
showard56176ec2009-10-28 19:52:30 +0000161 def glob_matches_files(path, pattern):
162 return len(glob.glob(path + pattern)) > 0
jadmanskid7b79ed2009-01-07 17:19:48 +0000163 else:
showard56176ec2009-10-28 19:52:30 +0000164 def glob_matches_files(path, pattern):
165 result = self.run("ls \"%s\"%s" % (utils.sh_escape(path),
166 pattern),
167 stdout_tee=None, ignore_status=True)
jadmanskid7b79ed2009-01-07 17:19:48 +0000168 return result.exit_status == 0
169
170 # take a set of globs that cover all files, and see which are needed
171 patterns = ["*", ".[!.]*"]
showard56176ec2009-10-28 19:52:30 +0000172 patterns = [p for p in patterns if glob_matches_files(path, p)]
jadmanskid7b79ed2009-01-07 17:19:48 +0000173
174 # convert them into a set of paths suitable for the commandline
jadmanskid7b79ed2009-01-07 17:19:48 +0000175 if is_local:
showard56176ec2009-10-28 19:52:30 +0000176 return ["\"%s\"%s" % (utils.sh_escape(path), pattern)
177 for pattern in patterns]
jadmanskid7b79ed2009-01-07 17:19:48 +0000178 else:
showard56176ec2009-10-28 19:52:30 +0000179 return [utils.scp_remote_escape(path) + pattern
180 for pattern in patterns]
jadmanskid7b79ed2009-01-07 17:19:48 +0000181
182
183 def _make_rsync_compatible_source(self, source, is_local):
mblighbc9402b2009-12-29 01:15:34 +0000184 """
185 Applies the same logic as _make_rsync_compatible_globs, but
jadmanskid7b79ed2009-01-07 17:19:48 +0000186 applies it to an entire list of sources, producing a new list of
mblighbc9402b2009-12-29 01:15:34 +0000187 sources, properly quoted.
188 """
jadmanskid7b79ed2009-01-07 17:19:48 +0000189 return sum((self._make_rsync_compatible_globs(path, is_local)
190 for path in source), [])
jadmanskica7da372008-10-21 16:26:52 +0000191
192
mblighfeac0102009-04-28 18:31:12 +0000193 def _set_umask_perms(self, dest):
mblighbc9402b2009-12-29 01:15:34 +0000194 """
195 Given a destination file/dir (recursively) set the permissions on
196 all the files and directories to the max allowed by running umask.
197 """
mblighfeac0102009-04-28 18:31:12 +0000198
199 # now this looks strange but I haven't found a way in Python to _just_
200 # get the umask, apparently the only option is to try to set it
201 umask = os.umask(0)
202 os.umask(umask)
203
204 max_privs = 0777 & ~umask
205
206 def set_file_privs(filename):
Chris Masone567d0d92011-12-19 09:38:30 -0800207 """Sets mode of |filename|. Assumes |filename| exists."""
208 file_stat = os.stat(filename)
mblighfeac0102009-04-28 18:31:12 +0000209
210 file_privs = max_privs
211 # if the original file permissions do not have at least one
212 # executable bit then do not set it anywhere
213 if not file_stat.st_mode & 0111:
214 file_privs &= ~0111
215
216 os.chmod(filename, file_privs)
217
218 # try a bottom-up walk so changes on directory permissions won't cut
219 # our access to the files/directories inside it
220 for root, dirs, files in os.walk(dest, topdown=False):
221 # when setting the privileges we emulate the chmod "X" behaviour
222 # that sets to execute only if it is a directory or any of the
223 # owner/group/other already has execute right
224 for dirname in dirs:
225 os.chmod(os.path.join(root, dirname), max_privs)
226
Chris Masone567d0d92011-12-19 09:38:30 -0800227 # Filter out broken symlinks as we go.
228 for filename in filter(os.path.exists, files):
mblighfeac0102009-04-28 18:31:12 +0000229 set_file_privs(os.path.join(root, filename))
230
231
232 # now set privs for the dest itself
233 if os.path.isdir(dest):
234 os.chmod(dest, max_privs)
235 else:
236 set_file_privs(dest)
237
238
mbligh45561782009-05-11 21:14:34 +0000239 def get_file(self, source, dest, delete_dest=False, preserve_perm=True,
240 preserve_symlinks=False):
jadmanskica7da372008-10-21 16:26:52 +0000241 """
242 Copy files from the remote host to a local path.
243
244 Directories will be copied recursively.
245 If a source component is a directory with a trailing slash,
246 the content of the directory will be copied, otherwise, the
247 directory itself and its content will be copied. This
248 behavior is similar to that of the program 'rsync'.
249
250 Args:
251 source: either
252 1) a single file or directory, as a string
253 2) a list of one or more (possibly mixed)
254 files or directories
255 dest: a file or a directory (if source contains a
256 directory or more than one element, you must
257 supply a directory dest)
mbligh89e258d2008-10-24 13:58:08 +0000258 delete_dest: if this is true, the command will also clear
259 out any old files at dest that are not in the
260 source
mblighfeac0102009-04-28 18:31:12 +0000261 preserve_perm: tells get_file() to try to preserve the sources
262 permissions on files and dirs
mbligh45561782009-05-11 21:14:34 +0000263 preserve_symlinks: try to preserve symlinks instead of
264 transforming them into files/dirs on copy
jadmanskica7da372008-10-21 16:26:52 +0000265
266 Raises:
267 AutoservRunError: the scp command failed
268 """
mblighefccc1b2010-01-11 19:08:42 +0000269
270 # Start a master SSH connection if necessary.
271 self.start_master_ssh()
272
jadmanskica7da372008-10-21 16:26:52 +0000273 if isinstance(source, basestring):
274 source = [source]
jadmanskid7b79ed2009-01-07 17:19:48 +0000275 dest = os.path.abspath(dest)
jadmanskica7da372008-10-21 16:26:52 +0000276
mblighc9892c02010-01-06 19:02:16 +0000277 # If rsync is disabled or fails, try scp.
showard6eafb492010-01-15 20:29:06 +0000278 try_scp = True
279 if self.use_rsync():
mblighc9892c02010-01-06 19:02:16 +0000280 try:
281 remote_source = self._encode_remote_paths(source)
282 local_dest = utils.sh_escape(dest)
283 rsync = self._make_rsync_cmd([remote_source], local_dest,
284 delete_dest, preserve_symlinks)
285 utils.run(rsync)
showard6eafb492010-01-15 20:29:06 +0000286 try_scp = False
mblighc9892c02010-01-06 19:02:16 +0000287 except error.CmdError, e:
288 logging.warn("trying scp, rsync failed: %s" % e)
mblighc9892c02010-01-06 19:02:16 +0000289
290 if try_scp:
jadmanskid7b79ed2009-01-07 17:19:48 +0000291 # scp has no equivalent to --delete, just drop the entire dest dir
292 if delete_dest and os.path.isdir(dest):
293 shutil.rmtree(dest)
294 os.mkdir(dest)
jadmanskica7da372008-10-21 16:26:52 +0000295
jadmanskid7b79ed2009-01-07 17:19:48 +0000296 remote_source = self._make_rsync_compatible_source(source, False)
297 if remote_source:
showard56176ec2009-10-28 19:52:30 +0000298 # _make_rsync_compatible_source() already did the escaping
299 remote_source = self._encode_remote_paths(remote_source,
300 escape=False)
jadmanskid7b79ed2009-01-07 17:19:48 +0000301 local_dest = utils.sh_escape(dest)
jadmanski2583a432009-02-10 23:59:11 +0000302 scp = self._make_scp_cmd([remote_source], local_dest)
jadmanskid7b79ed2009-01-07 17:19:48 +0000303 try:
304 utils.run(scp)
305 except error.CmdError, e:
306 raise error.AutoservRunError(e.args[0], e.args[1])
jadmanskica7da372008-10-21 16:26:52 +0000307
mblighfeac0102009-04-28 18:31:12 +0000308 if not preserve_perm:
309 # we have no way to tell scp to not try to preserve the
310 # permissions so set them after copy instead.
311 # for rsync we could use "--no-p --chmod=ugo=rwX" but those
312 # options are only in very recent rsync versions
313 self._set_umask_perms(dest)
314
jadmanskica7da372008-10-21 16:26:52 +0000315
mbligh45561782009-05-11 21:14:34 +0000316 def send_file(self, source, dest, delete_dest=False,
317 preserve_symlinks=False):
jadmanskica7da372008-10-21 16:26:52 +0000318 """
319 Copy files from a local path to the remote host.
320
321 Directories will be copied recursively.
322 If a source component is a directory with a trailing slash,
323 the content of the directory will be copied, otherwise, the
324 directory itself and its content will be copied. This
325 behavior is similar to that of the program 'rsync'.
326
327 Args:
328 source: either
329 1) a single file or directory, as a string
330 2) a list of one or more (possibly mixed)
331 files or directories
332 dest: a file or a directory (if source contains a
333 directory or more than one element, you must
334 supply a directory dest)
mbligh89e258d2008-10-24 13:58:08 +0000335 delete_dest: if this is true, the command will also clear
336 out any old files at dest that are not in the
337 source
mbligh45561782009-05-11 21:14:34 +0000338 preserve_symlinks: controls if symlinks on the source will be
339 copied as such on the destination or transformed into the
340 referenced file/directory
jadmanskica7da372008-10-21 16:26:52 +0000341
342 Raises:
343 AutoservRunError: the scp command failed
344 """
mblighefccc1b2010-01-11 19:08:42 +0000345
346 # Start a master SSH connection if necessary.
347 self.start_master_ssh()
348
jadmanskica7da372008-10-21 16:26:52 +0000349 if isinstance(source, basestring):
350 source = [source]
jadmanski2583a432009-02-10 23:59:11 +0000351 remote_dest = self._encode_remote_paths([dest])
jadmanskica7da372008-10-21 16:26:52 +0000352
mblighc9892c02010-01-06 19:02:16 +0000353 # If rsync is disabled or fails, try scp.
showard6eafb492010-01-15 20:29:06 +0000354 try_scp = True
355 if self.use_rsync():
mblighc9892c02010-01-06 19:02:16 +0000356 try:
357 local_sources = [utils.sh_escape(path) for path in source]
358 rsync = self._make_rsync_cmd(local_sources, remote_dest,
359 delete_dest, preserve_symlinks)
360 utils.run(rsync)
showard6eafb492010-01-15 20:29:06 +0000361 try_scp = False
mblighc9892c02010-01-06 19:02:16 +0000362 except error.CmdError, e:
363 logging.warn("trying scp, rsync failed: %s" % e)
mblighc9892c02010-01-06 19:02:16 +0000364
365 if try_scp:
jadmanskid7b79ed2009-01-07 17:19:48 +0000366 # scp has no equivalent to --delete, just drop the entire dest dir
367 if delete_dest:
showard27160152009-07-15 14:28:42 +0000368 is_dir = self.run("ls -d %s/" % dest,
jadmanskid7b79ed2009-01-07 17:19:48 +0000369 ignore_status=True).exit_status == 0
370 if is_dir:
371 cmd = "rm -rf %s && mkdir %s"
mbligh5a0ca532009-08-03 16:44:34 +0000372 cmd %= (dest, dest)
jadmanskid7b79ed2009-01-07 17:19:48 +0000373 self.run(cmd)
jadmanskica7da372008-10-21 16:26:52 +0000374
jadmanski2583a432009-02-10 23:59:11 +0000375 local_sources = self._make_rsync_compatible_source(source, True)
376 if local_sources:
377 scp = self._make_scp_cmd(local_sources, remote_dest)
jadmanskid7b79ed2009-01-07 17:19:48 +0000378 try:
379 utils.run(scp)
380 except error.CmdError, e:
381 raise error.AutoservRunError(e.args[0], e.args[1])
382
jadmanskica7da372008-10-21 16:26:52 +0000383
384 def ssh_ping(self, timeout=60):
beepsadd66d32013-03-04 17:21:51 -0800385 """
386 Pings remote host via ssh.
387
388 @param timeout: Time in seconds before giving up.
389 Defaults to 60 seconds.
390 @raise AutoservSSHTimeout: If the ssh ping times out.
391 @raise AutoservSshPermissionDeniedError: If ssh ping fails due to
392 permissions.
393 @raise AutoservSshPingHostError: For other AutoservRunErrors.
394 """
jadmanskica7da372008-10-21 16:26:52 +0000395 try:
396 self.run("true", timeout=timeout, connect_timeout=timeout)
397 except error.AutoservSSHTimeout:
mblighd0e94982009-07-11 00:15:18 +0000398 msg = "Host (ssh) verify timed out (timeout = %d)" % timeout
jadmanskica7da372008-10-21 16:26:52 +0000399 raise error.AutoservSSHTimeout(msg)
mbligh9d738d62009-03-09 21:17:10 +0000400 except error.AutoservSshPermissionDeniedError:
401 #let AutoservSshPermissionDeniedError be visible to the callers
402 raise
jadmanskica7da372008-10-21 16:26:52 +0000403 except error.AutoservRunError, e:
mblighc971c5f2009-06-08 16:48:54 +0000404 # convert the generic AutoservRunError into something more
405 # specific for this context
406 raise error.AutoservSshPingHostError(e.description + '\n' +
407 repr(e.result_obj))
jadmanskica7da372008-10-21 16:26:52 +0000408
409
beepsadd66d32013-03-04 17:21:51 -0800410 def is_up(self, timeout=60):
jadmanskica7da372008-10-21 16:26:52 +0000411 """
412 Check if the remote host is up.
413
beepsadd66d32013-03-04 17:21:51 -0800414 @param timeout: timeout in seconds.
415 @returns True if the remote host is up before the timeout expires,
416 False otherwise.
jadmanskica7da372008-10-21 16:26:52 +0000417 """
418 try:
beepsadd66d32013-03-04 17:21:51 -0800419 self.ssh_ping(timeout=timeout)
jadmanskica7da372008-10-21 16:26:52 +0000420 except error.AutoservError:
421 return False
422 else:
423 return True
424
425
426 def wait_up(self, timeout=None):
427 """
428 Wait until the remote host is up or the timeout expires.
429
430 In fact, it will wait until an ssh connection to the remote
431 host can be established, and getty is running.
432
jadmanskic0354912010-01-12 15:57:29 +0000433 @param timeout time limit in seconds before returning even
434 if the host is not up.
jadmanskica7da372008-10-21 16:26:52 +0000435
beepsadd66d32013-03-04 17:21:51 -0800436 @returns True if the host was found to be up before the timeout expires,
437 False otherwise
jadmanskica7da372008-10-21 16:26:52 +0000438 """
439 if timeout:
440 end_time = time.time() + timeout
beepsadd66d32013-03-04 17:21:51 -0800441 current_time = time.time()
jadmanskica7da372008-10-21 16:26:52 +0000442
beepsadd66d32013-03-04 17:21:51 -0800443 while not timeout or current_time < end_time:
444 if self.is_up(timeout=end_time - current_time):
jadmanskica7da372008-10-21 16:26:52 +0000445 try:
446 if self.are_wait_up_processes_up():
jadmanski7ebac3d2010-06-17 16:06:31 +0000447 logging.debug('Host %s is now up', self.hostname)
jadmanskica7da372008-10-21 16:26:52 +0000448 return True
449 except error.AutoservError:
450 pass
451 time.sleep(1)
beepsadd66d32013-03-04 17:21:51 -0800452 current_time = time.time()
jadmanskica7da372008-10-21 16:26:52 +0000453
jadmanski7ebac3d2010-06-17 16:06:31 +0000454 logging.debug('Host %s is still down after waiting %d seconds',
455 self.hostname, int(timeout + time.time() - end_time))
jadmanskica7da372008-10-21 16:26:52 +0000456 return False
457
458
jadmanskic0354912010-01-12 15:57:29 +0000459 def wait_down(self, timeout=None, warning_timer=None, old_boot_id=None):
jadmanskica7da372008-10-21 16:26:52 +0000460 """
461 Wait until the remote host is down or the timeout expires.
462
jadmanskic0354912010-01-12 15:57:29 +0000463 If old_boot_id is provided, this will wait until either the machine
464 is unpingable or self.get_boot_id() returns a value different from
465 old_boot_id. If the boot_id value has changed then the function
466 returns true under the assumption that the machine has shut down
467 and has now already come back up.
jadmanskica7da372008-10-21 16:26:52 +0000468
jadmanskic0354912010-01-12 15:57:29 +0000469 If old_boot_id is None then until the machine becomes unreachable the
470 method assumes the machine has not yet shut down.
jadmanskica7da372008-10-21 16:26:52 +0000471
beepsadd66d32013-03-04 17:21:51 -0800472 Based on this definition, the 4 possible permutations of timeout
473 and old_boot_id are:
474 1. timeout and old_boot_id: wait timeout seconds for either the
475 host to become unpingable, or the boot id
476 to change. In the latter case we've rebooted
477 and in the former case we've only shutdown,
478 but both cases return True.
479 2. only timeout: wait timeout seconds for the host to become unpingable.
480 If the host remains pingable throughout timeout seconds
481 we return False.
482 3. only old_boot_id: wait forever until either the host becomes
483 unpingable or the boot_id changes. Return true
484 when either of those conditions are met.
485 4. not timeout, not old_boot_id: wait forever till the host becomes
486 unpingable.
487
jadmanskic0354912010-01-12 15:57:29 +0000488 @param timeout Time limit in seconds before returning even
489 if the host is still up.
490 @param warning_timer Time limit in seconds that will generate
491 a warning if the host is not down yet.
492 @param old_boot_id A string containing the result of self.get_boot_id()
493 prior to the host being told to shut down. Can be None if this is
494 not available.
495
496 @returns True if the host was found to be down, False otherwise
jadmanskica7da372008-10-21 16:26:52 +0000497 """
mblighe5e3cf22010-05-27 23:33:14 +0000498 #TODO: there is currently no way to distinguish between knowing
499 #TODO: boot_id was unsupported and not knowing the boot_id.
mbligh2ed998f2009-04-08 21:03:47 +0000500 current_time = time.time()
jadmanskica7da372008-10-21 16:26:52 +0000501 if timeout:
mbligh2ed998f2009-04-08 21:03:47 +0000502 end_time = current_time + timeout
jadmanskica7da372008-10-21 16:26:52 +0000503
mbligh2ed998f2009-04-08 21:03:47 +0000504 if warning_timer:
505 warn_time = current_time + warning_timer
506
jadmanskic0354912010-01-12 15:57:29 +0000507 if old_boot_id is not None:
508 logging.debug('Host %s pre-shutdown boot_id is %s',
509 self.hostname, old_boot_id)
510
beepsadd66d32013-03-04 17:21:51 -0800511 # Impose semi real-time deadline constraints, since some clients
512 # (eg: watchdog timer tests) expect strict checking of time elapsed.
513 # Each iteration of this loop is treated as though it atomically
514 # completes within current_time, this is needed because if we used
515 # inline time.time() calls instead then the following could happen:
516 #
517 # while not timeout or time.time() < end_time: [23 < 30]
518 # some code. [takes 10 secs]
519 # try:
520 # new_boot_id = self.get_boot_id(timeout=end_time - time.time())
521 # [30 - 33]
522 # The last step will lead to a return True, when in fact the machine
523 # went down at 32 seconds (>30). Hence we need to pass get_boot_id
524 # the same time that allowed us into that iteration of the loop.
mbligh2ed998f2009-04-08 21:03:47 +0000525 while not timeout or current_time < end_time:
jadmanskic0354912010-01-12 15:57:29 +0000526 try:
beepsadd66d32013-03-04 17:21:51 -0800527 new_boot_id = self.get_boot_id(timeout=end_time - current_time)
mblighdbc7e4a2010-01-15 20:34:20 +0000528 except error.AutoservError:
jadmanskic0354912010-01-12 15:57:29 +0000529 logging.debug('Host %s is now unreachable over ssh, is down',
530 self.hostname)
jadmanskica7da372008-10-21 16:26:52 +0000531 return True
jadmanskic0354912010-01-12 15:57:29 +0000532 else:
533 # if the machine is up but the boot_id value has changed from
534 # old boot id, then we can assume the machine has gone down
535 # and then already come back up
536 if old_boot_id is not None and old_boot_id != new_boot_id:
537 logging.debug('Host %s now has boot_id %s and so must '
538 'have rebooted', self.hostname, new_boot_id)
539 return True
mbligh2ed998f2009-04-08 21:03:47 +0000540
541 if warning_timer and current_time > warn_time:
542 self.record("WARN", None, "shutdown",
543 "Shutdown took longer than %ds" % warning_timer)
544 # Print the warning only once.
545 warning_timer = None
mbligha4464402009-04-17 20:13:41 +0000546 # If a machine is stuck switching runlevels
547 # This may cause the machine to reboot.
548 self.run('kill -HUP 1', ignore_status=True)
mbligh2ed998f2009-04-08 21:03:47 +0000549
jadmanskica7da372008-10-21 16:26:52 +0000550 time.sleep(1)
mbligh2ed998f2009-04-08 21:03:47 +0000551 current_time = time.time()
jadmanskica7da372008-10-21 16:26:52 +0000552
553 return False
jadmanskif6562912008-10-21 17:59:01 +0000554
mbligha0a27592009-01-24 01:41:36 +0000555
jadmanskif6562912008-10-21 17:59:01 +0000556 # tunable constants for the verify & repair code
mblighb86bfa12010-02-12 20:22:21 +0000557 AUTOTEST_GB_DISKSPACE_REQUIRED = get_value("SERVER",
558 "gb_diskspace_required",
Fang Deng6b05f5b2013-03-20 13:42:11 -0700559 type=float,
560 default=20.0)
mbligha0a27592009-01-24 01:41:36 +0000561
jadmanskif6562912008-10-21 17:59:01 +0000562
showardca572982009-09-18 21:20:01 +0000563 def verify_connectivity(self):
564 super(AbstractSSHHost, self).verify_connectivity()
jadmanskif6562912008-10-21 17:59:01 +0000565
showardb18134f2009-03-20 20:52:18 +0000566 logging.info('Pinging host ' + self.hostname)
jadmanskif6562912008-10-21 17:59:01 +0000567 self.ssh_ping()
mbligh2ba7ab02009-08-24 22:09:26 +0000568 logging.info("Host (ssh) %s is alive", self.hostname)
jadmanskif6562912008-10-21 17:59:01 +0000569
jadmanski80deb752009-01-21 17:14:16 +0000570 if self.is_shutting_down():
mblighc971c5f2009-06-08 16:48:54 +0000571 raise error.AutoservHostIsShuttingDownError("Host is shutting down")
jadmanski80deb752009-01-21 17:14:16 +0000572
mblighb49b5232009-02-12 21:54:49 +0000573
showardca572982009-09-18 21:20:01 +0000574 def verify_software(self):
575 super(AbstractSSHHost, self).verify_software()
jadmanskif6562912008-10-21 17:59:01 +0000576 try:
showardad812bf2009-10-20 23:49:56 +0000577 self.check_diskspace(autotest.Autotest.get_install_dir(self),
578 self.AUTOTEST_GB_DISKSPACE_REQUIRED)
jadmanskif6562912008-10-21 17:59:01 +0000579 except error.AutoservHostError:
580 raise # only want to raise if it's a space issue
showardad812bf2009-10-20 23:49:56 +0000581 except autotest.AutodirNotFoundError:
showardca572982009-09-18 21:20:01 +0000582 # autotest dir may not exist, etc. ignore
583 logging.debug('autodir space check exception, this is probably '
584 'safe to ignore\n' + traceback.format_exc())
mblighefccc1b2010-01-11 19:08:42 +0000585
586
587 def close(self):
588 super(AbstractSSHHost, self).close()
589 self._cleanup_master_ssh()
Fang Deng3af66202013-08-16 15:19:25 -0700590 os.remove(self.known_hosts_file)
mblighefccc1b2010-01-11 19:08:42 +0000591
592
593 def _cleanup_master_ssh(self):
594 """
595 Release all resources (process, temporary directory) used by an active
596 master SSH connection.
597 """
598 # If a master SSH connection is running, kill it.
599 if self.master_ssh_job is not None:
600 utils.nuke_subprocess(self.master_ssh_job.sp)
601 self.master_ssh_job = None
602
603 # Remove the temporary directory for the master SSH socket.
604 if self.master_ssh_tempdir is not None:
605 self.master_ssh_tempdir.clean()
606 self.master_ssh_tempdir = None
607 self.master_ssh_option = ''
608
609
610 def start_master_ssh(self):
611 """
612 Called whenever a slave SSH connection needs to be initiated (e.g., by
613 run, rsync, scp). If master SSH support is enabled and a master SSH
614 connection is not active already, start a new one in the background.
615 Also, cleanup any zombie master SSH connections (e.g., dead due to
616 reboot).
617 """
618 if not enable_master_ssh:
619 return
620
621 # If a previously started master SSH connection is not running
622 # anymore, it needs to be cleaned up and then restarted.
623 if self.master_ssh_job is not None:
624 if self.master_ssh_job.sp.poll() is not None:
625 logging.info("Master ssh connection to %s is down.",
626 self.hostname)
627 self._cleanup_master_ssh()
628
629 # Start a new master SSH connection.
630 if self.master_ssh_job is None:
631 # Create a shared socket in a temp location.
632 self.master_ssh_tempdir = autotemp.tempdir(unique_id='ssh-master')
633 self.master_ssh_option = ("-o ControlPath=%s/socket" %
634 self.master_ssh_tempdir.name)
635
636 # Start the master SSH connection in the background.
mbligh5644c122010-01-29 17:43:26 +0000637 master_cmd = self.ssh_command(options="-N -o ControlMaster=yes")
mblighefccc1b2010-01-11 19:08:42 +0000638 logging.info("Starting master ssh connection '%s'" % master_cmd)
639 self.master_ssh_job = utils.BgJob(master_cmd)
mbligh0a883702010-04-21 01:58:34 +0000640
641
642 def clear_known_hosts(self):
643 """Clears out the temporary ssh known_hosts file.
644
645 This is useful if the test SSHes to the machine, then reinstalls it,
646 then SSHes to it again. It can be called after the reinstall to
647 reduce the spam in the logs.
648 """
649 logging.info("Clearing known hosts for host '%s', file '%s'.",
Fang Deng3af66202013-08-16 15:19:25 -0700650 self.hostname, self.known_hosts_file)
mbligh0a883702010-04-21 01:58:34 +0000651 # Clear out the file by opening it for writing and then closing.
Fang Deng3af66202013-08-16 15:19:25 -0700652 fh = open(self.known_hosts_file, "w")
mbligh0a883702010-04-21 01:58:34 +0000653 fh.close()