blob: 7af5a2e6f764e938149ef0face27bbaee42ab809 [file] [log] [blame]
Aviv Keshet07f16242013-10-10 07:54:19 -07001import os, time, socket, shutil, glob, logging, traceback, tempfile
Aviv Keshet53a216a2013-08-27 13:58:46 -07002from autotest_lib.client.common_lib import autotemp, error
jadmanski31c49b72008-10-27 20:44:48 +00003from autotest_lib.server import utils, autotest
mblighe8b93af2009-01-30 00:45:53 +00004from autotest_lib.server.hosts import remote
mblighefccc1b2010-01-11 19:08:42 +00005from autotest_lib.client.common_lib.global_config import global_config
jadmanskica7da372008-10-21 16:26:52 +00006
Aviv Keshet53a216a2013-08-27 13:58:46 -07007# pylint: disable-msg=C0111
jadmanskica7da372008-10-21 16:26:52 +00008
mblighb86bfa12010-02-12 20:22:21 +00009get_value = global_config.get_config_value
10enable_master_ssh = get_value('AUTOSERV', 'enable_master_ssh', type=bool,
11 default=False)
mblighefccc1b2010-01-11 19:08:42 +000012
13
Fang Deng96667ca2013-08-01 17:46:18 -070014class AbstractSSHHost(remote.RemoteHost):
mblighbc9402b2009-12-29 01:15:34 +000015 """
16 This class represents a generic implementation of most of the
jadmanskica7da372008-10-21 16:26:52 +000017 framework necessary for controlling a host via ssh. It implements
18 almost all of the abstract Host methods, except for the core
mblighbc9402b2009-12-29 01:15:34 +000019 Host.run method.
20 """
jadmanskica7da372008-10-21 16:26:52 +000021
jadmanskif6562912008-10-21 17:59:01 +000022 def _initialize(self, hostname, user="root", port=22, password="",
23 *args, **dargs):
24 super(AbstractSSHHost, self)._initialize(hostname=hostname,
25 *args, **dargs)
mbligh6369cf22008-10-24 17:21:57 +000026 self.ip = socket.getaddrinfo(self.hostname, None)[0][4][0]
jadmanskica7da372008-10-21 16:26:52 +000027 self.user = user
28 self.port = port
29 self.password = password
showard6eafb492010-01-15 20:29:06 +000030 self._use_rsync = None
Fang Deng3af66202013-08-16 15:19:25 -070031 self.known_hosts_file = tempfile.mkstemp()[1]
jadmanskica7da372008-10-21 16:26:52 +000032
mblighefccc1b2010-01-11 19:08:42 +000033 """
34 Master SSH connection background job, socket temp directory and socket
35 control path option. If master-SSH is enabled, these fields will be
36 initialized by start_master_ssh when a new SSH connection is initiated.
37 """
38 self.master_ssh_job = None
39 self.master_ssh_tempdir = None
40 self.master_ssh_option = ''
41
showard6eafb492010-01-15 20:29:06 +000042
Fang Deng96667ca2013-08-01 17:46:18 -070043 def make_ssh_command(self, user="root", port=22, opts='',
44 hosts_file='/dev/null',
45 connect_timeout=30, alive_interval=300):
46 base_command = ("/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no "
47 "-o UserKnownHostsFile=%s -o BatchMode=yes "
48 "-o ConnectTimeout=%d -o ServerAliveInterval=%d "
49 "-l %s -p %d")
50 assert isinstance(connect_timeout, (int, long))
51 assert connect_timeout > 0 # can't disable the timeout
52 return base_command % (opts, hosts_file, connect_timeout,
53 alive_interval, user, port)
54
55
showard6eafb492010-01-15 20:29:06 +000056 def use_rsync(self):
57 if self._use_rsync is not None:
58 return self._use_rsync
59
mblighc9892c02010-01-06 19:02:16 +000060 # Check if rsync is available on the remote host. If it's not,
61 # don't try to use it for any future file transfers.
showard6eafb492010-01-15 20:29:06 +000062 self._use_rsync = self._check_rsync()
63 if not self._use_rsync:
mblighc9892c02010-01-06 19:02:16 +000064 logging.warn("rsync not available on remote host %s -- disabled",
65 self.hostname)
Eric Lie0493a42010-11-15 13:05:43 -080066 return self._use_rsync
mblighc9892c02010-01-06 19:02:16 +000067
68
69 def _check_rsync(self):
70 """
71 Check if rsync is available on the remote host.
72 """
73 try:
74 self.run("rsync --version", stdout_tee=None, stderr_tee=None)
75 except error.AutoservRunError:
76 return False
77 return True
78
jadmanskica7da372008-10-21 16:26:52 +000079
showard56176ec2009-10-28 19:52:30 +000080 def _encode_remote_paths(self, paths, escape=True):
mblighbc9402b2009-12-29 01:15:34 +000081 """
82 Given a list of file paths, encodes it as a single remote path, in
83 the style used by rsync and scp.
84 """
showard56176ec2009-10-28 19:52:30 +000085 if escape:
86 paths = [utils.scp_remote_escape(path) for path in paths]
87 return '%s@%s:"%s"' % (self.user, self.hostname, " ".join(paths))
jadmanskica7da372008-10-21 16:26:52 +000088
jadmanskica7da372008-10-21 16:26:52 +000089
mbligh45561782009-05-11 21:14:34 +000090 def _make_rsync_cmd(self, sources, dest, delete_dest, preserve_symlinks):
mblighbc9402b2009-12-29 01:15:34 +000091 """
92 Given a list of source paths and a destination path, produces the
jadmanskid7b79ed2009-01-07 17:19:48 +000093 appropriate rsync command for copying them. Remote paths must be
mblighbc9402b2009-12-29 01:15:34 +000094 pre-encoded.
95 """
Fang Deng96667ca2013-08-01 17:46:18 -070096 ssh_cmd = self.make_ssh_command(user=self.user, port=self.port,
97 opts=self.master_ssh_option,
98 hosts_file=self.known_hosts_file)
jadmanskid7b79ed2009-01-07 17:19:48 +000099 if delete_dest:
100 delete_flag = "--delete"
101 else:
102 delete_flag = ""
mbligh45561782009-05-11 21:14:34 +0000103 if preserve_symlinks:
104 symlink_flag = ""
105 else:
106 symlink_flag = "-L"
107 command = "rsync %s %s --timeout=1800 --rsh='%s' -az %s %s"
108 return command % (symlink_flag, delete_flag, ssh_cmd,
109 " ".join(sources), dest)
jadmanskid7b79ed2009-01-07 17:19:48 +0000110
111
Eric Li861b2d52011-02-04 14:50:35 -0800112 def _make_ssh_cmd(self, cmd):
113 """
114 Create a base ssh command string for the host which can be used
115 to run commands directly on the machine
116 """
Fang Deng96667ca2013-08-01 17:46:18 -0700117 base_cmd = self.make_ssh_command(user=self.user, port=self.port,
118 opts=self.master_ssh_option,
119 hosts_file=self.known_hosts_file)
Eric Li861b2d52011-02-04 14:50:35 -0800120
121 return '%s %s "%s"' % (base_cmd, self.hostname, utils.sh_escape(cmd))
122
jadmanskid7b79ed2009-01-07 17:19:48 +0000123 def _make_scp_cmd(self, sources, dest):
mblighbc9402b2009-12-29 01:15:34 +0000124 """
125 Given a list of source paths and a destination path, produces the
jadmanskid7b79ed2009-01-07 17:19:48 +0000126 appropriate scp command for encoding it. Remote paths must be
mblighbc9402b2009-12-29 01:15:34 +0000127 pre-encoded.
128 """
mblighc0649d62010-01-15 18:15:58 +0000129 command = ("scp -rq %s -o StrictHostKeyChecking=no "
lmraf676f32010-02-04 03:36:26 +0000130 "-o UserKnownHostsFile=%s -P %d %s '%s'")
Fang Deng3af66202013-08-16 15:19:25 -0700131 return command % (self.master_ssh_option, self.known_hosts_file,
mblighefccc1b2010-01-11 19:08:42 +0000132 self.port, " ".join(sources), dest)
jadmanskid7b79ed2009-01-07 17:19:48 +0000133
134
135 def _make_rsync_compatible_globs(self, path, is_local):
mblighbc9402b2009-12-29 01:15:34 +0000136 """
137 Given an rsync-style path, returns a list of globbed paths
jadmanskid7b79ed2009-01-07 17:19:48 +0000138 that will hopefully provide equivalent behaviour for scp. Does not
139 support the full range of rsync pattern matching behaviour, only that
140 exposed in the get/send_file interface (trailing slashes).
141
142 The is_local param is flag indicating if the paths should be
mblighbc9402b2009-12-29 01:15:34 +0000143 interpreted as local or remote paths.
144 """
jadmanskid7b79ed2009-01-07 17:19:48 +0000145
146 # non-trailing slash paths should just work
147 if len(path) == 0 or path[-1] != "/":
148 return [path]
149
150 # make a function to test if a pattern matches any files
151 if is_local:
showard56176ec2009-10-28 19:52:30 +0000152 def glob_matches_files(path, pattern):
153 return len(glob.glob(path + pattern)) > 0
jadmanskid7b79ed2009-01-07 17:19:48 +0000154 else:
showard56176ec2009-10-28 19:52:30 +0000155 def glob_matches_files(path, pattern):
156 result = self.run("ls \"%s\"%s" % (utils.sh_escape(path),
157 pattern),
158 stdout_tee=None, ignore_status=True)
jadmanskid7b79ed2009-01-07 17:19:48 +0000159 return result.exit_status == 0
160
161 # take a set of globs that cover all files, and see which are needed
162 patterns = ["*", ".[!.]*"]
showard56176ec2009-10-28 19:52:30 +0000163 patterns = [p for p in patterns if glob_matches_files(path, p)]
jadmanskid7b79ed2009-01-07 17:19:48 +0000164
165 # convert them into a set of paths suitable for the commandline
jadmanskid7b79ed2009-01-07 17:19:48 +0000166 if is_local:
showard56176ec2009-10-28 19:52:30 +0000167 return ["\"%s\"%s" % (utils.sh_escape(path), pattern)
168 for pattern in patterns]
jadmanskid7b79ed2009-01-07 17:19:48 +0000169 else:
showard56176ec2009-10-28 19:52:30 +0000170 return [utils.scp_remote_escape(path) + pattern
171 for pattern in patterns]
jadmanskid7b79ed2009-01-07 17:19:48 +0000172
173
174 def _make_rsync_compatible_source(self, source, is_local):
mblighbc9402b2009-12-29 01:15:34 +0000175 """
176 Applies the same logic as _make_rsync_compatible_globs, but
jadmanskid7b79ed2009-01-07 17:19:48 +0000177 applies it to an entire list of sources, producing a new list of
mblighbc9402b2009-12-29 01:15:34 +0000178 sources, properly quoted.
179 """
jadmanskid7b79ed2009-01-07 17:19:48 +0000180 return sum((self._make_rsync_compatible_globs(path, is_local)
181 for path in source), [])
jadmanskica7da372008-10-21 16:26:52 +0000182
183
mblighfeac0102009-04-28 18:31:12 +0000184 def _set_umask_perms(self, dest):
mblighbc9402b2009-12-29 01:15:34 +0000185 """
186 Given a destination file/dir (recursively) set the permissions on
187 all the files and directories to the max allowed by running umask.
188 """
mblighfeac0102009-04-28 18:31:12 +0000189
190 # now this looks strange but I haven't found a way in Python to _just_
191 # get the umask, apparently the only option is to try to set it
192 umask = os.umask(0)
193 os.umask(umask)
194
195 max_privs = 0777 & ~umask
196
197 def set_file_privs(filename):
Chris Masone567d0d92011-12-19 09:38:30 -0800198 """Sets mode of |filename|. Assumes |filename| exists."""
199 file_stat = os.stat(filename)
mblighfeac0102009-04-28 18:31:12 +0000200
201 file_privs = max_privs
202 # if the original file permissions do not have at least one
203 # executable bit then do not set it anywhere
204 if not file_stat.st_mode & 0111:
205 file_privs &= ~0111
206
207 os.chmod(filename, file_privs)
208
209 # try a bottom-up walk so changes on directory permissions won't cut
210 # our access to the files/directories inside it
211 for root, dirs, files in os.walk(dest, topdown=False):
212 # when setting the privileges we emulate the chmod "X" behaviour
213 # that sets to execute only if it is a directory or any of the
214 # owner/group/other already has execute right
215 for dirname in dirs:
216 os.chmod(os.path.join(root, dirname), max_privs)
217
Chris Masone567d0d92011-12-19 09:38:30 -0800218 # Filter out broken symlinks as we go.
219 for filename in filter(os.path.exists, files):
mblighfeac0102009-04-28 18:31:12 +0000220 set_file_privs(os.path.join(root, filename))
221
222
223 # now set privs for the dest itself
224 if os.path.isdir(dest):
225 os.chmod(dest, max_privs)
226 else:
227 set_file_privs(dest)
228
229
mbligh45561782009-05-11 21:14:34 +0000230 def get_file(self, source, dest, delete_dest=False, preserve_perm=True,
231 preserve_symlinks=False):
jadmanskica7da372008-10-21 16:26:52 +0000232 """
233 Copy files from the remote host to a local path.
234
235 Directories will be copied recursively.
236 If a source component is a directory with a trailing slash,
237 the content of the directory will be copied, otherwise, the
238 directory itself and its content will be copied. This
239 behavior is similar to that of the program 'rsync'.
240
241 Args:
242 source: either
243 1) a single file or directory, as a string
244 2) a list of one or more (possibly mixed)
245 files or directories
246 dest: a file or a directory (if source contains a
247 directory or more than one element, you must
248 supply a directory dest)
mbligh89e258d2008-10-24 13:58:08 +0000249 delete_dest: if this is true, the command will also clear
250 out any old files at dest that are not in the
251 source
mblighfeac0102009-04-28 18:31:12 +0000252 preserve_perm: tells get_file() to try to preserve the sources
253 permissions on files and dirs
mbligh45561782009-05-11 21:14:34 +0000254 preserve_symlinks: try to preserve symlinks instead of
255 transforming them into files/dirs on copy
jadmanskica7da372008-10-21 16:26:52 +0000256
257 Raises:
258 AutoservRunError: the scp command failed
259 """
Simran Basi882f15b2013-10-29 14:59:34 -0700260 logging.debug('get_file. source: %s, dest: %s, delete_dest: %s,'
261 'preserve_perm: %s, preserve_symlinks:%s', source, dest,
262 delete_dest, preserve_perm, preserve_symlinks)
mblighefccc1b2010-01-11 19:08:42 +0000263 # Start a master SSH connection if necessary.
264 self.start_master_ssh()
265
jadmanskica7da372008-10-21 16:26:52 +0000266 if isinstance(source, basestring):
267 source = [source]
jadmanskid7b79ed2009-01-07 17:19:48 +0000268 dest = os.path.abspath(dest)
jadmanskica7da372008-10-21 16:26:52 +0000269
mblighc9892c02010-01-06 19:02:16 +0000270 # If rsync is disabled or fails, try scp.
showard6eafb492010-01-15 20:29:06 +0000271 try_scp = True
272 if self.use_rsync():
Simran Basi882f15b2013-10-29 14:59:34 -0700273 logging.debug('Using Rsync.')
mblighc9892c02010-01-06 19:02:16 +0000274 try:
275 remote_source = self._encode_remote_paths(source)
276 local_dest = utils.sh_escape(dest)
277 rsync = self._make_rsync_cmd([remote_source], local_dest,
278 delete_dest, preserve_symlinks)
279 utils.run(rsync)
showard6eafb492010-01-15 20:29:06 +0000280 try_scp = False
mblighc9892c02010-01-06 19:02:16 +0000281 except error.CmdError, e:
Aviv Keshet53a216a2013-08-27 13:58:46 -0700282 logging.warn("trying scp, rsync failed: %s", e)
mblighc9892c02010-01-06 19:02:16 +0000283
284 if try_scp:
Simran Basi882f15b2013-10-29 14:59:34 -0700285 logging.debug('Trying scp.')
jadmanskid7b79ed2009-01-07 17:19:48 +0000286 # scp has no equivalent to --delete, just drop the entire dest dir
287 if delete_dest and os.path.isdir(dest):
288 shutil.rmtree(dest)
289 os.mkdir(dest)
jadmanskica7da372008-10-21 16:26:52 +0000290
jadmanskid7b79ed2009-01-07 17:19:48 +0000291 remote_source = self._make_rsync_compatible_source(source, False)
292 if remote_source:
showard56176ec2009-10-28 19:52:30 +0000293 # _make_rsync_compatible_source() already did the escaping
294 remote_source = self._encode_remote_paths(remote_source,
295 escape=False)
jadmanskid7b79ed2009-01-07 17:19:48 +0000296 local_dest = utils.sh_escape(dest)
jadmanski2583a432009-02-10 23:59:11 +0000297 scp = self._make_scp_cmd([remote_source], local_dest)
jadmanskid7b79ed2009-01-07 17:19:48 +0000298 try:
299 utils.run(scp)
300 except error.CmdError, e:
Simran Basi882f15b2013-10-29 14:59:34 -0700301 logging.debug('scp failed: %s', e)
jadmanskid7b79ed2009-01-07 17:19:48 +0000302 raise error.AutoservRunError(e.args[0], e.args[1])
jadmanskica7da372008-10-21 16:26:52 +0000303
mblighfeac0102009-04-28 18:31:12 +0000304 if not preserve_perm:
305 # we have no way to tell scp to not try to preserve the
306 # permissions so set them after copy instead.
307 # for rsync we could use "--no-p --chmod=ugo=rwX" but those
308 # options are only in very recent rsync versions
309 self._set_umask_perms(dest)
310
jadmanskica7da372008-10-21 16:26:52 +0000311
mbligh45561782009-05-11 21:14:34 +0000312 def send_file(self, source, dest, delete_dest=False,
313 preserve_symlinks=False):
jadmanskica7da372008-10-21 16:26:52 +0000314 """
315 Copy files from a local path to the remote host.
316
317 Directories will be copied recursively.
318 If a source component is a directory with a trailing slash,
319 the content of the directory will be copied, otherwise, the
320 directory itself and its content will be copied. This
321 behavior is similar to that of the program 'rsync'.
322
323 Args:
324 source: either
325 1) a single file or directory, as a string
326 2) a list of one or more (possibly mixed)
327 files or directories
328 dest: a file or a directory (if source contains a
329 directory or more than one element, you must
330 supply a directory dest)
mbligh89e258d2008-10-24 13:58:08 +0000331 delete_dest: if this is true, the command will also clear
332 out any old files at dest that are not in the
333 source
mbligh45561782009-05-11 21:14:34 +0000334 preserve_symlinks: controls if symlinks on the source will be
335 copied as such on the destination or transformed into the
336 referenced file/directory
jadmanskica7da372008-10-21 16:26:52 +0000337
338 Raises:
339 AutoservRunError: the scp command failed
340 """
Simran Basi882f15b2013-10-29 14:59:34 -0700341 logging.debug('send_file. source: %s, dest: %s, delete_dest: %s,'
342 'preserve_symlinks:%s', source, dest,
343 delete_dest, preserve_symlinks)
mblighefccc1b2010-01-11 19:08:42 +0000344 # Start a master SSH connection if necessary.
345 self.start_master_ssh()
346
jadmanskica7da372008-10-21 16:26:52 +0000347 if isinstance(source, basestring):
348 source = [source]
jadmanski2583a432009-02-10 23:59:11 +0000349 remote_dest = self._encode_remote_paths([dest])
jadmanskica7da372008-10-21 16:26:52 +0000350
mblighc9892c02010-01-06 19:02:16 +0000351 # If rsync is disabled or fails, try scp.
showard6eafb492010-01-15 20:29:06 +0000352 try_scp = True
353 if self.use_rsync():
Simran Basi882f15b2013-10-29 14:59:34 -0700354 logging.debug('Using Rsync.')
mblighc9892c02010-01-06 19:02:16 +0000355 try:
356 local_sources = [utils.sh_escape(path) for path in source]
357 rsync = self._make_rsync_cmd(local_sources, remote_dest,
358 delete_dest, preserve_symlinks)
359 utils.run(rsync)
showard6eafb492010-01-15 20:29:06 +0000360 try_scp = False
mblighc9892c02010-01-06 19:02:16 +0000361 except error.CmdError, e:
Aviv Keshet53a216a2013-08-27 13:58:46 -0700362 logging.warn("trying scp, rsync failed: %s", e)
mblighc9892c02010-01-06 19:02:16 +0000363
364 if try_scp:
Simran Basi882f15b2013-10-29 14:59:34 -0700365 logging.debug('Trying scp.')
jadmanskid7b79ed2009-01-07 17:19:48 +0000366 # scp has no equivalent to --delete, just drop the entire dest dir
367 if delete_dest:
showard27160152009-07-15 14:28:42 +0000368 is_dir = self.run("ls -d %s/" % dest,
jadmanskid7b79ed2009-01-07 17:19:48 +0000369 ignore_status=True).exit_status == 0
370 if is_dir:
371 cmd = "rm -rf %s && mkdir %s"
mbligh5a0ca532009-08-03 16:44:34 +0000372 cmd %= (dest, dest)
jadmanskid7b79ed2009-01-07 17:19:48 +0000373 self.run(cmd)
jadmanskica7da372008-10-21 16:26:52 +0000374
jadmanski2583a432009-02-10 23:59:11 +0000375 local_sources = self._make_rsync_compatible_source(source, True)
376 if local_sources:
377 scp = self._make_scp_cmd(local_sources, remote_dest)
jadmanskid7b79ed2009-01-07 17:19:48 +0000378 try:
379 utils.run(scp)
380 except error.CmdError, e:
Simran Basi882f15b2013-10-29 14:59:34 -0700381 logging.debug('scp failed: %s', e)
jadmanskid7b79ed2009-01-07 17:19:48 +0000382 raise error.AutoservRunError(e.args[0], e.args[1])
383
jadmanskica7da372008-10-21 16:26:52 +0000384
beeps46dadc92013-11-07 14:07:10 -0800385 def ssh_ping(self, timeout=60, base_cmd='true'):
beepsadd66d32013-03-04 17:21:51 -0800386 """
387 Pings remote host via ssh.
388
389 @param timeout: Time in seconds before giving up.
390 Defaults to 60 seconds.
beeps46dadc92013-11-07 14:07:10 -0800391 @param base_cmd: The base command to run with the ssh ping.
392 Defaults to true.
beepsadd66d32013-03-04 17:21:51 -0800393 @raise AutoservSSHTimeout: If the ssh ping times out.
394 @raise AutoservSshPermissionDeniedError: If ssh ping fails due to
395 permissions.
396 @raise AutoservSshPingHostError: For other AutoservRunErrors.
397 """
jadmanskica7da372008-10-21 16:26:52 +0000398 try:
beeps46dadc92013-11-07 14:07:10 -0800399 self.run(base_cmd, timeout=timeout, connect_timeout=timeout)
jadmanskica7da372008-10-21 16:26:52 +0000400 except error.AutoservSSHTimeout:
mblighd0e94982009-07-11 00:15:18 +0000401 msg = "Host (ssh) verify timed out (timeout = %d)" % timeout
jadmanskica7da372008-10-21 16:26:52 +0000402 raise error.AutoservSSHTimeout(msg)
mbligh9d738d62009-03-09 21:17:10 +0000403 except error.AutoservSshPermissionDeniedError:
404 #let AutoservSshPermissionDeniedError be visible to the callers
405 raise
jadmanskica7da372008-10-21 16:26:52 +0000406 except error.AutoservRunError, e:
mblighc971c5f2009-06-08 16:48:54 +0000407 # convert the generic AutoservRunError into something more
408 # specific for this context
409 raise error.AutoservSshPingHostError(e.description + '\n' +
410 repr(e.result_obj))
jadmanskica7da372008-10-21 16:26:52 +0000411
412
beeps46dadc92013-11-07 14:07:10 -0800413 def is_up(self, timeout=60, base_cmd='true'):
jadmanskica7da372008-10-21 16:26:52 +0000414 """
beeps46dadc92013-11-07 14:07:10 -0800415 Check if the remote host is up by ssh-ing and running a base command.
jadmanskica7da372008-10-21 16:26:52 +0000416
beepsadd66d32013-03-04 17:21:51 -0800417 @param timeout: timeout in seconds.
beeps46dadc92013-11-07 14:07:10 -0800418 @param base_cmd: a base command to run with ssh. The default is 'true'.
beepsadd66d32013-03-04 17:21:51 -0800419 @returns True if the remote host is up before the timeout expires,
420 False otherwise.
jadmanskica7da372008-10-21 16:26:52 +0000421 """
422 try:
beeps46dadc92013-11-07 14:07:10 -0800423 self.ssh_ping(timeout=timeout, base_cmd=base_cmd)
jadmanskica7da372008-10-21 16:26:52 +0000424 except error.AutoservError:
425 return False
426 else:
427 return True
428
429
430 def wait_up(self, timeout=None):
431 """
432 Wait until the remote host is up or the timeout expires.
433
434 In fact, it will wait until an ssh connection to the remote
435 host can be established, and getty is running.
436
jadmanskic0354912010-01-12 15:57:29 +0000437 @param timeout time limit in seconds before returning even
438 if the host is not up.
jadmanskica7da372008-10-21 16:26:52 +0000439
beepsadd66d32013-03-04 17:21:51 -0800440 @returns True if the host was found to be up before the timeout expires,
441 False otherwise
jadmanskica7da372008-10-21 16:26:52 +0000442 """
443 if timeout:
beeps46dadc92013-11-07 14:07:10 -0800444 current_time = int(time.time())
445 end_time = current_time + timeout
jadmanskica7da372008-10-21 16:26:52 +0000446
beepsadd66d32013-03-04 17:21:51 -0800447 while not timeout or current_time < end_time:
448 if self.is_up(timeout=end_time - current_time):
jadmanskica7da372008-10-21 16:26:52 +0000449 try:
450 if self.are_wait_up_processes_up():
jadmanski7ebac3d2010-06-17 16:06:31 +0000451 logging.debug('Host %s is now up', self.hostname)
jadmanskica7da372008-10-21 16:26:52 +0000452 return True
453 except error.AutoservError:
454 pass
455 time.sleep(1)
beeps46dadc92013-11-07 14:07:10 -0800456 current_time = int(time.time())
jadmanskica7da372008-10-21 16:26:52 +0000457
jadmanski7ebac3d2010-06-17 16:06:31 +0000458 logging.debug('Host %s is still down after waiting %d seconds',
459 self.hostname, int(timeout + time.time() - end_time))
jadmanskica7da372008-10-21 16:26:52 +0000460 return False
461
462
jadmanskic0354912010-01-12 15:57:29 +0000463 def wait_down(self, timeout=None, warning_timer=None, old_boot_id=None):
jadmanskica7da372008-10-21 16:26:52 +0000464 """
465 Wait until the remote host is down or the timeout expires.
466
jadmanskic0354912010-01-12 15:57:29 +0000467 If old_boot_id is provided, this will wait until either the machine
468 is unpingable or self.get_boot_id() returns a value different from
469 old_boot_id. If the boot_id value has changed then the function
470 returns true under the assumption that the machine has shut down
471 and has now already come back up.
jadmanskica7da372008-10-21 16:26:52 +0000472
jadmanskic0354912010-01-12 15:57:29 +0000473 If old_boot_id is None then until the machine becomes unreachable the
474 method assumes the machine has not yet shut down.
jadmanskica7da372008-10-21 16:26:52 +0000475
beepsadd66d32013-03-04 17:21:51 -0800476 Based on this definition, the 4 possible permutations of timeout
477 and old_boot_id are:
478 1. timeout and old_boot_id: wait timeout seconds for either the
479 host to become unpingable, or the boot id
480 to change. In the latter case we've rebooted
481 and in the former case we've only shutdown,
482 but both cases return True.
483 2. only timeout: wait timeout seconds for the host to become unpingable.
484 If the host remains pingable throughout timeout seconds
485 we return False.
486 3. only old_boot_id: wait forever until either the host becomes
487 unpingable or the boot_id changes. Return true
488 when either of those conditions are met.
489 4. not timeout, not old_boot_id: wait forever till the host becomes
490 unpingable.
491
jadmanskic0354912010-01-12 15:57:29 +0000492 @param timeout Time limit in seconds before returning even
493 if the host is still up.
494 @param warning_timer Time limit in seconds that will generate
495 a warning if the host is not down yet.
496 @param old_boot_id A string containing the result of self.get_boot_id()
497 prior to the host being told to shut down. Can be None if this is
498 not available.
499
500 @returns True if the host was found to be down, False otherwise
jadmanskica7da372008-10-21 16:26:52 +0000501 """
mblighe5e3cf22010-05-27 23:33:14 +0000502 #TODO: there is currently no way to distinguish between knowing
503 #TODO: boot_id was unsupported and not knowing the boot_id.
beeps46dadc92013-11-07 14:07:10 -0800504 current_time = int(time.time())
jadmanskica7da372008-10-21 16:26:52 +0000505 if timeout:
mbligh2ed998f2009-04-08 21:03:47 +0000506 end_time = current_time + timeout
jadmanskica7da372008-10-21 16:26:52 +0000507
mbligh2ed998f2009-04-08 21:03:47 +0000508 if warning_timer:
509 warn_time = current_time + warning_timer
510
jadmanskic0354912010-01-12 15:57:29 +0000511 if old_boot_id is not None:
512 logging.debug('Host %s pre-shutdown boot_id is %s',
513 self.hostname, old_boot_id)
514
beepsadd66d32013-03-04 17:21:51 -0800515 # Impose semi real-time deadline constraints, since some clients
516 # (eg: watchdog timer tests) expect strict checking of time elapsed.
517 # Each iteration of this loop is treated as though it atomically
518 # completes within current_time, this is needed because if we used
519 # inline time.time() calls instead then the following could happen:
520 #
521 # while not timeout or time.time() < end_time: [23 < 30]
522 # some code. [takes 10 secs]
523 # try:
524 # new_boot_id = self.get_boot_id(timeout=end_time - time.time())
525 # [30 - 33]
526 # The last step will lead to a return True, when in fact the machine
527 # went down at 32 seconds (>30). Hence we need to pass get_boot_id
528 # the same time that allowed us into that iteration of the loop.
mbligh2ed998f2009-04-08 21:03:47 +0000529 while not timeout or current_time < end_time:
jadmanskic0354912010-01-12 15:57:29 +0000530 try:
beeps46dadc92013-11-07 14:07:10 -0800531 new_boot_id = self.get_boot_id(timeout=end_time-current_time)
mblighdbc7e4a2010-01-15 20:34:20 +0000532 except error.AutoservError:
jadmanskic0354912010-01-12 15:57:29 +0000533 logging.debug('Host %s is now unreachable over ssh, is down',
534 self.hostname)
jadmanskica7da372008-10-21 16:26:52 +0000535 return True
jadmanskic0354912010-01-12 15:57:29 +0000536 else:
537 # if the machine is up but the boot_id value has changed from
538 # old boot id, then we can assume the machine has gone down
539 # and then already come back up
540 if old_boot_id is not None and old_boot_id != new_boot_id:
541 logging.debug('Host %s now has boot_id %s and so must '
542 'have rebooted', self.hostname, new_boot_id)
543 return True
mbligh2ed998f2009-04-08 21:03:47 +0000544
545 if warning_timer and current_time > warn_time:
Scott Zawalskic86fdeb2013-10-23 10:24:04 -0400546 self.record("INFO", None, "shutdown",
mbligh2ed998f2009-04-08 21:03:47 +0000547 "Shutdown took longer than %ds" % warning_timer)
548 # Print the warning only once.
549 warning_timer = None
mbligha4464402009-04-17 20:13:41 +0000550 # If a machine is stuck switching runlevels
551 # This may cause the machine to reboot.
552 self.run('kill -HUP 1', ignore_status=True)
mbligh2ed998f2009-04-08 21:03:47 +0000553
jadmanskica7da372008-10-21 16:26:52 +0000554 time.sleep(1)
beeps46dadc92013-11-07 14:07:10 -0800555 current_time = int(time.time())
jadmanskica7da372008-10-21 16:26:52 +0000556
557 return False
jadmanskif6562912008-10-21 17:59:01 +0000558
mbligha0a27592009-01-24 01:41:36 +0000559
jadmanskif6562912008-10-21 17:59:01 +0000560 # tunable constants for the verify & repair code
mblighb86bfa12010-02-12 20:22:21 +0000561 AUTOTEST_GB_DISKSPACE_REQUIRED = get_value("SERVER",
562 "gb_diskspace_required",
Fang Deng6b05f5b2013-03-20 13:42:11 -0700563 type=float,
564 default=20.0)
mbligha0a27592009-01-24 01:41:36 +0000565
jadmanskif6562912008-10-21 17:59:01 +0000566
showardca572982009-09-18 21:20:01 +0000567 def verify_connectivity(self):
568 super(AbstractSSHHost, self).verify_connectivity()
jadmanskif6562912008-10-21 17:59:01 +0000569
showardb18134f2009-03-20 20:52:18 +0000570 logging.info('Pinging host ' + self.hostname)
jadmanskif6562912008-10-21 17:59:01 +0000571 self.ssh_ping()
mbligh2ba7ab02009-08-24 22:09:26 +0000572 logging.info("Host (ssh) %s is alive", self.hostname)
jadmanskif6562912008-10-21 17:59:01 +0000573
jadmanski80deb752009-01-21 17:14:16 +0000574 if self.is_shutting_down():
mblighc971c5f2009-06-08 16:48:54 +0000575 raise error.AutoservHostIsShuttingDownError("Host is shutting down")
jadmanski80deb752009-01-21 17:14:16 +0000576
mblighb49b5232009-02-12 21:54:49 +0000577
showardca572982009-09-18 21:20:01 +0000578 def verify_software(self):
579 super(AbstractSSHHost, self).verify_software()
jadmanskif6562912008-10-21 17:59:01 +0000580 try:
showardad812bf2009-10-20 23:49:56 +0000581 self.check_diskspace(autotest.Autotest.get_install_dir(self),
582 self.AUTOTEST_GB_DISKSPACE_REQUIRED)
jadmanskif6562912008-10-21 17:59:01 +0000583 except error.AutoservHostError:
584 raise # only want to raise if it's a space issue
showardad812bf2009-10-20 23:49:56 +0000585 except autotest.AutodirNotFoundError:
showardca572982009-09-18 21:20:01 +0000586 # autotest dir may not exist, etc. ignore
587 logging.debug('autodir space check exception, this is probably '
588 'safe to ignore\n' + traceback.format_exc())
mblighefccc1b2010-01-11 19:08:42 +0000589
590
591 def close(self):
592 super(AbstractSSHHost, self).close()
593 self._cleanup_master_ssh()
Fang Deng3af66202013-08-16 15:19:25 -0700594 os.remove(self.known_hosts_file)
mblighefccc1b2010-01-11 19:08:42 +0000595
596
597 def _cleanup_master_ssh(self):
598 """
599 Release all resources (process, temporary directory) used by an active
600 master SSH connection.
601 """
602 # If a master SSH connection is running, kill it.
603 if self.master_ssh_job is not None:
Aviv Keshet46250752013-08-27 15:52:06 -0700604 logging.debug('Nuking master_ssh_job.')
mblighefccc1b2010-01-11 19:08:42 +0000605 utils.nuke_subprocess(self.master_ssh_job.sp)
606 self.master_ssh_job = None
607
608 # Remove the temporary directory for the master SSH socket.
609 if self.master_ssh_tempdir is not None:
Aviv Keshet46250752013-08-27 15:52:06 -0700610 logging.debug('Cleaning master_ssh_tempdir.')
mblighefccc1b2010-01-11 19:08:42 +0000611 self.master_ssh_tempdir.clean()
612 self.master_ssh_tempdir = None
613 self.master_ssh_option = ''
614
615
Aviv Keshet0749a822013-10-17 09:53:26 -0700616 def start_master_ssh(self, timeout=5):
mblighefccc1b2010-01-11 19:08:42 +0000617 """
618 Called whenever a slave SSH connection needs to be initiated (e.g., by
619 run, rsync, scp). If master SSH support is enabled and a master SSH
620 connection is not active already, start a new one in the background.
621 Also, cleanup any zombie master SSH connections (e.g., dead due to
622 reboot).
Aviv Keshet0749a822013-10-17 09:53:26 -0700623
624 timeout: timeout in seconds (default 5) to wait for master ssh
625 connection to be established. If timeout is reached, a
626 warning message is logged, but no other action is taken.
mblighefccc1b2010-01-11 19:08:42 +0000627 """
628 if not enable_master_ssh:
629 return
630
631 # If a previously started master SSH connection is not running
632 # anymore, it needs to be cleaned up and then restarted.
633 if self.master_ssh_job is not None:
Aviv Keshet49b4ed02013-09-05 14:48:21 -0700634 socket_path = os.path.join(self.master_ssh_tempdir.name, 'socket')
635 if (not os.path.exists(socket_path) or
636 self.master_ssh_job.sp.poll() is not None):
Aviv Keshet982fde12013-09-06 11:24:10 -0700637 if self.master_ssh_job.sp.poll() is None:
638 logging.warning('Master ssh connection socket file '
639 'was missing while its subprocess was '
Aviv Keshet3ae1a5f2013-10-09 12:22:56 -0400640 'still running.')
641 if os.path.exists(self.master_ssh_tempdir.name):
642 logging.warning('However, the socket file temporary '
643 'directory still exists.')
Aviv Keshet07f16242013-10-10 07:54:19 -0700644
Aviv Keshet3ae1a5f2013-10-09 12:22:56 -0400645 logging.warning('Info on defunct master ssh ps below.')
Aviv Keshet44c609e2013-09-06 12:57:49 -0700646 master_pid = str(self.master_ssh_job.sp.pid)
Aviv Keshet07f16242013-10-10 07:54:19 -0700647 ps_output = utils.run(['ps', '-Fww', master_pid],
648 ignore_status=True).stdout
649 logging.warning('Master ssh connection ps info: %s',
Aviv Keshet44c609e2013-09-06 12:57:49 -0700650 ps_output)
Aviv Keshet07f16242013-10-10 07:54:19 -0700651 lsof_output = utils.run(['lsof', '-p', master_pid],
652 ignore_status=True).stdout
653 logging.warning('Master ssh connection lsof info: %s',
654 lsof_output)
Aviv Keshet44c609e2013-09-06 12:57:49 -0700655
mblighefccc1b2010-01-11 19:08:42 +0000656 logging.info("Master ssh connection to %s is down.",
657 self.hostname)
658 self._cleanup_master_ssh()
659
660 # Start a new master SSH connection.
661 if self.master_ssh_job is None:
662 # Create a shared socket in a temp location.
663 self.master_ssh_tempdir = autotemp.tempdir(unique_id='ssh-master')
664 self.master_ssh_option = ("-o ControlPath=%s/socket" %
665 self.master_ssh_tempdir.name)
666
667 # Start the master SSH connection in the background.
mbligh5644c122010-01-29 17:43:26 +0000668 master_cmd = self.ssh_command(options="-N -o ControlMaster=yes")
Aviv Keshet31b9ec82013-10-15 13:58:13 -0400669 logging.debug("System load: %s", utils.run(['uptime']).stdout)
Aviv Keshet46250752013-08-27 15:52:06 -0700670 logging.info("Starting master ssh connection '%s'", master_cmd)
Aviv Keshet53a216a2013-08-27 13:58:46 -0700671 self.master_ssh_job = utils.BgJob(master_cmd,
Aviv Keshet46250752013-08-27 15:52:06 -0700672 nickname='master-ssh',
673 no_pipes=True)
Aviv Keshet0749a822013-10-17 09:53:26 -0700674 # To prevent a race between the the master ssh connection startup
675 # and its first attempted use, wait for socket file to exist before
676 # returning.
677 end_time = time.time() + timeout
678 socket_file_path = os.path.join(self.master_ssh_tempdir.name,
679 'socket')
680 while time.time() < end_time:
681 if os.path.exists(socket_file_path):
682 break
683 time.sleep(.2)
684 else:
685 logging.warn('Timed out waiting for master-ssh connection '
686 'to be established.')
mbligh0a883702010-04-21 01:58:34 +0000687
688
689 def clear_known_hosts(self):
690 """Clears out the temporary ssh known_hosts file.
691
692 This is useful if the test SSHes to the machine, then reinstalls it,
693 then SSHes to it again. It can be called after the reinstall to
694 reduce the spam in the logs.
695 """
696 logging.info("Clearing known hosts for host '%s', file '%s'.",
Fang Deng3af66202013-08-16 15:19:25 -0700697 self.hostname, self.known_hosts_file)
mbligh0a883702010-04-21 01:58:34 +0000698 # Clear out the file by opening it for writing and then closing.
Fang Deng3af66202013-08-16 15:19:25 -0700699 fh = open(self.known_hosts_file, "w")
mbligh0a883702010-04-21 01:58:34 +0000700 fh.close()