blob: 3c45a6bc9496bac4c612dc4a7b976dfc9e5f4b4b [file] [log] [blame]
Fang Deng3af66202013-08-16 15:19:25 -07001import os, time, types, socket, shutil, glob, logging, traceback, tempfile
mblighefccc1b2010-01-11 19:08:42 +00002from autotest_lib.client.common_lib import autotemp, error, logging_manager
jadmanski31c49b72008-10-27 20:44:48 +00003from autotest_lib.server import utils, autotest
mblighe8b93af2009-01-30 00:45:53 +00004from autotest_lib.server.hosts import remote
mblighefccc1b2010-01-11 19:08:42 +00005from autotest_lib.client.common_lib.global_config import global_config
jadmanskica7da372008-10-21 16:26:52 +00006
7
mblighb86bfa12010-02-12 20:22:21 +00008get_value = global_config.get_config_value
9enable_master_ssh = get_value('AUTOSERV', 'enable_master_ssh', type=bool,
10 default=False)
mblighefccc1b2010-01-11 19:08:42 +000011
12
Fang Deng96667ca2013-08-01 17:46:18 -070013class AbstractSSHHost(remote.RemoteHost):
mblighbc9402b2009-12-29 01:15:34 +000014 """
15 This class represents a generic implementation of most of the
jadmanskica7da372008-10-21 16:26:52 +000016 framework necessary for controlling a host via ssh. It implements
17 almost all of the abstract Host methods, except for the core
mblighbc9402b2009-12-29 01:15:34 +000018 Host.run method.
19 """
jadmanskica7da372008-10-21 16:26:52 +000020
jadmanskif6562912008-10-21 17:59:01 +000021 def _initialize(self, hostname, user="root", port=22, password="",
22 *args, **dargs):
23 super(AbstractSSHHost, self)._initialize(hostname=hostname,
24 *args, **dargs)
mbligh6369cf22008-10-24 17:21:57 +000025 self.ip = socket.getaddrinfo(self.hostname, None)[0][4][0]
jadmanskica7da372008-10-21 16:26:52 +000026 self.user = user
27 self.port = port
28 self.password = password
showard6eafb492010-01-15 20:29:06 +000029 self._use_rsync = None
Fang Deng3af66202013-08-16 15:19:25 -070030 self.known_hosts_file = tempfile.mkstemp()[1]
jadmanskica7da372008-10-21 16:26:52 +000031
mblighefccc1b2010-01-11 19:08:42 +000032 """
33 Master SSH connection background job, socket temp directory and socket
34 control path option. If master-SSH is enabled, these fields will be
35 initialized by start_master_ssh when a new SSH connection is initiated.
36 """
37 self.master_ssh_job = None
38 self.master_ssh_tempdir = None
39 self.master_ssh_option = ''
40
showard6eafb492010-01-15 20:29:06 +000041
Fang Deng96667ca2013-08-01 17:46:18 -070042 def make_ssh_command(self, user="root", port=22, opts='',
43 hosts_file='/dev/null',
44 connect_timeout=30, alive_interval=300):
45 base_command = ("/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no "
46 "-o UserKnownHostsFile=%s -o BatchMode=yes "
47 "-o ConnectTimeout=%d -o ServerAliveInterval=%d "
48 "-l %s -p %d")
49 assert isinstance(connect_timeout, (int, long))
50 assert connect_timeout > 0 # can't disable the timeout
51 return base_command % (opts, hosts_file, connect_timeout,
52 alive_interval, user, port)
53
54
showard6eafb492010-01-15 20:29:06 +000055 def use_rsync(self):
56 if self._use_rsync is not None:
57 return self._use_rsync
58
mblighc9892c02010-01-06 19:02:16 +000059 # Check if rsync is available on the remote host. If it's not,
60 # don't try to use it for any future file transfers.
showard6eafb492010-01-15 20:29:06 +000061 self._use_rsync = self._check_rsync()
62 if not self._use_rsync:
mblighc9892c02010-01-06 19:02:16 +000063 logging.warn("rsync not available on remote host %s -- disabled",
64 self.hostname)
Eric Lie0493a42010-11-15 13:05:43 -080065 return self._use_rsync
mblighc9892c02010-01-06 19:02:16 +000066
67
68 def _check_rsync(self):
69 """
70 Check if rsync is available on the remote host.
71 """
72 try:
73 self.run("rsync --version", stdout_tee=None, stderr_tee=None)
74 except error.AutoservRunError:
75 return False
76 return True
77
jadmanskica7da372008-10-21 16:26:52 +000078
showard56176ec2009-10-28 19:52:30 +000079 def _encode_remote_paths(self, paths, escape=True):
mblighbc9402b2009-12-29 01:15:34 +000080 """
81 Given a list of file paths, encodes it as a single remote path, in
82 the style used by rsync and scp.
83 """
showard56176ec2009-10-28 19:52:30 +000084 if escape:
85 paths = [utils.scp_remote_escape(path) for path in paths]
86 return '%s@%s:"%s"' % (self.user, self.hostname, " ".join(paths))
jadmanskica7da372008-10-21 16:26:52 +000087
jadmanskica7da372008-10-21 16:26:52 +000088
mbligh45561782009-05-11 21:14:34 +000089 def _make_rsync_cmd(self, sources, dest, delete_dest, preserve_symlinks):
mblighbc9402b2009-12-29 01:15:34 +000090 """
91 Given a list of source paths and a destination path, produces the
jadmanskid7b79ed2009-01-07 17:19:48 +000092 appropriate rsync command for copying them. Remote paths must be
mblighbc9402b2009-12-29 01:15:34 +000093 pre-encoded.
94 """
Fang Deng96667ca2013-08-01 17:46:18 -070095 ssh_cmd = self.make_ssh_command(user=self.user, port=self.port,
96 opts=self.master_ssh_option,
97 hosts_file=self.known_hosts_file)
jadmanskid7b79ed2009-01-07 17:19:48 +000098 if delete_dest:
99 delete_flag = "--delete"
100 else:
101 delete_flag = ""
mbligh45561782009-05-11 21:14:34 +0000102 if preserve_symlinks:
103 symlink_flag = ""
104 else:
105 symlink_flag = "-L"
106 command = "rsync %s %s --timeout=1800 --rsh='%s' -az %s %s"
107 return command % (symlink_flag, delete_flag, ssh_cmd,
108 " ".join(sources), dest)
jadmanskid7b79ed2009-01-07 17:19:48 +0000109
110
Eric Li861b2d52011-02-04 14:50:35 -0800111 def _make_ssh_cmd(self, cmd):
112 """
113 Create a base ssh command string for the host which can be used
114 to run commands directly on the machine
115 """
Fang Deng96667ca2013-08-01 17:46:18 -0700116 base_cmd = self.make_ssh_command(user=self.user, port=self.port,
117 opts=self.master_ssh_option,
118 hosts_file=self.known_hosts_file)
Eric Li861b2d52011-02-04 14:50:35 -0800119
120 return '%s %s "%s"' % (base_cmd, self.hostname, utils.sh_escape(cmd))
121
jadmanskid7b79ed2009-01-07 17:19:48 +0000122 def _make_scp_cmd(self, sources, dest):
mblighbc9402b2009-12-29 01:15:34 +0000123 """
124 Given a list of source paths and a destination path, produces the
jadmanskid7b79ed2009-01-07 17:19:48 +0000125 appropriate scp command for encoding it. Remote paths must be
mblighbc9402b2009-12-29 01:15:34 +0000126 pre-encoded.
127 """
mblighc0649d62010-01-15 18:15:58 +0000128 command = ("scp -rq %s -o StrictHostKeyChecking=no "
lmraf676f32010-02-04 03:36:26 +0000129 "-o UserKnownHostsFile=%s -P %d %s '%s'")
Fang Deng3af66202013-08-16 15:19:25 -0700130 return command % (self.master_ssh_option, self.known_hosts_file,
mblighefccc1b2010-01-11 19:08:42 +0000131 self.port, " ".join(sources), dest)
jadmanskid7b79ed2009-01-07 17:19:48 +0000132
133
134 def _make_rsync_compatible_globs(self, path, is_local):
mblighbc9402b2009-12-29 01:15:34 +0000135 """
136 Given an rsync-style path, returns a list of globbed paths
jadmanskid7b79ed2009-01-07 17:19:48 +0000137 that will hopefully provide equivalent behaviour for scp. Does not
138 support the full range of rsync pattern matching behaviour, only that
139 exposed in the get/send_file interface (trailing slashes).
140
141 The is_local param is flag indicating if the paths should be
mblighbc9402b2009-12-29 01:15:34 +0000142 interpreted as local or remote paths.
143 """
jadmanskid7b79ed2009-01-07 17:19:48 +0000144
145 # non-trailing slash paths should just work
146 if len(path) == 0 or path[-1] != "/":
147 return [path]
148
149 # make a function to test if a pattern matches any files
150 if is_local:
showard56176ec2009-10-28 19:52:30 +0000151 def glob_matches_files(path, pattern):
152 return len(glob.glob(path + pattern)) > 0
jadmanskid7b79ed2009-01-07 17:19:48 +0000153 else:
showard56176ec2009-10-28 19:52:30 +0000154 def glob_matches_files(path, pattern):
155 result = self.run("ls \"%s\"%s" % (utils.sh_escape(path),
156 pattern),
157 stdout_tee=None, ignore_status=True)
jadmanskid7b79ed2009-01-07 17:19:48 +0000158 return result.exit_status == 0
159
160 # take a set of globs that cover all files, and see which are needed
161 patterns = ["*", ".[!.]*"]
showard56176ec2009-10-28 19:52:30 +0000162 patterns = [p for p in patterns if glob_matches_files(path, p)]
jadmanskid7b79ed2009-01-07 17:19:48 +0000163
164 # convert them into a set of paths suitable for the commandline
jadmanskid7b79ed2009-01-07 17:19:48 +0000165 if is_local:
showard56176ec2009-10-28 19:52:30 +0000166 return ["\"%s\"%s" % (utils.sh_escape(path), pattern)
167 for pattern in patterns]
jadmanskid7b79ed2009-01-07 17:19:48 +0000168 else:
showard56176ec2009-10-28 19:52:30 +0000169 return [utils.scp_remote_escape(path) + pattern
170 for pattern in patterns]
jadmanskid7b79ed2009-01-07 17:19:48 +0000171
172
173 def _make_rsync_compatible_source(self, source, is_local):
mblighbc9402b2009-12-29 01:15:34 +0000174 """
175 Applies the same logic as _make_rsync_compatible_globs, but
jadmanskid7b79ed2009-01-07 17:19:48 +0000176 applies it to an entire list of sources, producing a new list of
mblighbc9402b2009-12-29 01:15:34 +0000177 sources, properly quoted.
178 """
jadmanskid7b79ed2009-01-07 17:19:48 +0000179 return sum((self._make_rsync_compatible_globs(path, is_local)
180 for path in source), [])
jadmanskica7da372008-10-21 16:26:52 +0000181
182
mblighfeac0102009-04-28 18:31:12 +0000183 def _set_umask_perms(self, dest):
mblighbc9402b2009-12-29 01:15:34 +0000184 """
185 Given a destination file/dir (recursively) set the permissions on
186 all the files and directories to the max allowed by running umask.
187 """
mblighfeac0102009-04-28 18:31:12 +0000188
189 # now this looks strange but I haven't found a way in Python to _just_
190 # get the umask, apparently the only option is to try to set it
191 umask = os.umask(0)
192 os.umask(umask)
193
194 max_privs = 0777 & ~umask
195
196 def set_file_privs(filename):
Chris Masone567d0d92011-12-19 09:38:30 -0800197 """Sets mode of |filename|. Assumes |filename| exists."""
198 file_stat = os.stat(filename)
mblighfeac0102009-04-28 18:31:12 +0000199
200 file_privs = max_privs
201 # if the original file permissions do not have at least one
202 # executable bit then do not set it anywhere
203 if not file_stat.st_mode & 0111:
204 file_privs &= ~0111
205
206 os.chmod(filename, file_privs)
207
208 # try a bottom-up walk so changes on directory permissions won't cut
209 # our access to the files/directories inside it
210 for root, dirs, files in os.walk(dest, topdown=False):
211 # when setting the privileges we emulate the chmod "X" behaviour
212 # that sets to execute only if it is a directory or any of the
213 # owner/group/other already has execute right
214 for dirname in dirs:
215 os.chmod(os.path.join(root, dirname), max_privs)
216
Chris Masone567d0d92011-12-19 09:38:30 -0800217 # Filter out broken symlinks as we go.
218 for filename in filter(os.path.exists, files):
mblighfeac0102009-04-28 18:31:12 +0000219 set_file_privs(os.path.join(root, filename))
220
221
222 # now set privs for the dest itself
223 if os.path.isdir(dest):
224 os.chmod(dest, max_privs)
225 else:
226 set_file_privs(dest)
227
228
mbligh45561782009-05-11 21:14:34 +0000229 def get_file(self, source, dest, delete_dest=False, preserve_perm=True,
230 preserve_symlinks=False):
jadmanskica7da372008-10-21 16:26:52 +0000231 """
232 Copy files from the remote host to a local path.
233
234 Directories will be copied recursively.
235 If a source component is a directory with a trailing slash,
236 the content of the directory will be copied, otherwise, the
237 directory itself and its content will be copied. This
238 behavior is similar to that of the program 'rsync'.
239
240 Args:
241 source: either
242 1) a single file or directory, as a string
243 2) a list of one or more (possibly mixed)
244 files or directories
245 dest: a file or a directory (if source contains a
246 directory or more than one element, you must
247 supply a directory dest)
mbligh89e258d2008-10-24 13:58:08 +0000248 delete_dest: if this is true, the command will also clear
249 out any old files at dest that are not in the
250 source
mblighfeac0102009-04-28 18:31:12 +0000251 preserve_perm: tells get_file() to try to preserve the sources
252 permissions on files and dirs
mbligh45561782009-05-11 21:14:34 +0000253 preserve_symlinks: try to preserve symlinks instead of
254 transforming them into files/dirs on copy
jadmanskica7da372008-10-21 16:26:52 +0000255
256 Raises:
257 AutoservRunError: the scp command failed
258 """
mblighefccc1b2010-01-11 19:08:42 +0000259
260 # Start a master SSH connection if necessary.
261 self.start_master_ssh()
262
jadmanskica7da372008-10-21 16:26:52 +0000263 if isinstance(source, basestring):
264 source = [source]
jadmanskid7b79ed2009-01-07 17:19:48 +0000265 dest = os.path.abspath(dest)
jadmanskica7da372008-10-21 16:26:52 +0000266
mblighc9892c02010-01-06 19:02:16 +0000267 # If rsync is disabled or fails, try scp.
showard6eafb492010-01-15 20:29:06 +0000268 try_scp = True
269 if self.use_rsync():
mblighc9892c02010-01-06 19:02:16 +0000270 try:
271 remote_source = self._encode_remote_paths(source)
272 local_dest = utils.sh_escape(dest)
273 rsync = self._make_rsync_cmd([remote_source], local_dest,
274 delete_dest, preserve_symlinks)
275 utils.run(rsync)
showard6eafb492010-01-15 20:29:06 +0000276 try_scp = False
mblighc9892c02010-01-06 19:02:16 +0000277 except error.CmdError, e:
278 logging.warn("trying scp, rsync failed: %s" % e)
mblighc9892c02010-01-06 19:02:16 +0000279
280 if try_scp:
jadmanskid7b79ed2009-01-07 17:19:48 +0000281 # scp has no equivalent to --delete, just drop the entire dest dir
282 if delete_dest and os.path.isdir(dest):
283 shutil.rmtree(dest)
284 os.mkdir(dest)
jadmanskica7da372008-10-21 16:26:52 +0000285
jadmanskid7b79ed2009-01-07 17:19:48 +0000286 remote_source = self._make_rsync_compatible_source(source, False)
287 if remote_source:
showard56176ec2009-10-28 19:52:30 +0000288 # _make_rsync_compatible_source() already did the escaping
289 remote_source = self._encode_remote_paths(remote_source,
290 escape=False)
jadmanskid7b79ed2009-01-07 17:19:48 +0000291 local_dest = utils.sh_escape(dest)
jadmanski2583a432009-02-10 23:59:11 +0000292 scp = self._make_scp_cmd([remote_source], local_dest)
jadmanskid7b79ed2009-01-07 17:19:48 +0000293 try:
294 utils.run(scp)
295 except error.CmdError, e:
296 raise error.AutoservRunError(e.args[0], e.args[1])
jadmanskica7da372008-10-21 16:26:52 +0000297
mblighfeac0102009-04-28 18:31:12 +0000298 if not preserve_perm:
299 # we have no way to tell scp to not try to preserve the
300 # permissions so set them after copy instead.
301 # for rsync we could use "--no-p --chmod=ugo=rwX" but those
302 # options are only in very recent rsync versions
303 self._set_umask_perms(dest)
304
jadmanskica7da372008-10-21 16:26:52 +0000305
mbligh45561782009-05-11 21:14:34 +0000306 def send_file(self, source, dest, delete_dest=False,
307 preserve_symlinks=False):
jadmanskica7da372008-10-21 16:26:52 +0000308 """
309 Copy files from a local path to the remote host.
310
311 Directories will be copied recursively.
312 If a source component is a directory with a trailing slash,
313 the content of the directory will be copied, otherwise, the
314 directory itself and its content will be copied. This
315 behavior is similar to that of the program 'rsync'.
316
317 Args:
318 source: either
319 1) a single file or directory, as a string
320 2) a list of one or more (possibly mixed)
321 files or directories
322 dest: a file or a directory (if source contains a
323 directory or more than one element, you must
324 supply a directory dest)
mbligh89e258d2008-10-24 13:58:08 +0000325 delete_dest: if this is true, the command will also clear
326 out any old files at dest that are not in the
327 source
mbligh45561782009-05-11 21:14:34 +0000328 preserve_symlinks: controls if symlinks on the source will be
329 copied as such on the destination or transformed into the
330 referenced file/directory
jadmanskica7da372008-10-21 16:26:52 +0000331
332 Raises:
333 AutoservRunError: the scp command failed
334 """
mblighefccc1b2010-01-11 19:08:42 +0000335
336 # Start a master SSH connection if necessary.
337 self.start_master_ssh()
338
jadmanskica7da372008-10-21 16:26:52 +0000339 if isinstance(source, basestring):
340 source = [source]
jadmanski2583a432009-02-10 23:59:11 +0000341 remote_dest = self._encode_remote_paths([dest])
jadmanskica7da372008-10-21 16:26:52 +0000342
mblighc9892c02010-01-06 19:02:16 +0000343 # If rsync is disabled or fails, try scp.
showard6eafb492010-01-15 20:29:06 +0000344 try_scp = True
345 if self.use_rsync():
mblighc9892c02010-01-06 19:02:16 +0000346 try:
347 local_sources = [utils.sh_escape(path) for path in source]
348 rsync = self._make_rsync_cmd(local_sources, remote_dest,
349 delete_dest, preserve_symlinks)
350 utils.run(rsync)
showard6eafb492010-01-15 20:29:06 +0000351 try_scp = False
mblighc9892c02010-01-06 19:02:16 +0000352 except error.CmdError, e:
353 logging.warn("trying scp, rsync failed: %s" % e)
mblighc9892c02010-01-06 19:02:16 +0000354
355 if try_scp:
jadmanskid7b79ed2009-01-07 17:19:48 +0000356 # scp has no equivalent to --delete, just drop the entire dest dir
357 if delete_dest:
showard27160152009-07-15 14:28:42 +0000358 is_dir = self.run("ls -d %s/" % dest,
jadmanskid7b79ed2009-01-07 17:19:48 +0000359 ignore_status=True).exit_status == 0
360 if is_dir:
361 cmd = "rm -rf %s && mkdir %s"
mbligh5a0ca532009-08-03 16:44:34 +0000362 cmd %= (dest, dest)
jadmanskid7b79ed2009-01-07 17:19:48 +0000363 self.run(cmd)
jadmanskica7da372008-10-21 16:26:52 +0000364
jadmanski2583a432009-02-10 23:59:11 +0000365 local_sources = self._make_rsync_compatible_source(source, True)
366 if local_sources:
367 scp = self._make_scp_cmd(local_sources, remote_dest)
jadmanskid7b79ed2009-01-07 17:19:48 +0000368 try:
369 utils.run(scp)
370 except error.CmdError, e:
371 raise error.AutoservRunError(e.args[0], e.args[1])
372
jadmanskica7da372008-10-21 16:26:52 +0000373
374 def ssh_ping(self, timeout=60):
beepsadd66d32013-03-04 17:21:51 -0800375 """
376 Pings remote host via ssh.
377
378 @param timeout: Time in seconds before giving up.
379 Defaults to 60 seconds.
380 @raise AutoservSSHTimeout: If the ssh ping times out.
381 @raise AutoservSshPermissionDeniedError: If ssh ping fails due to
382 permissions.
383 @raise AutoservSshPingHostError: For other AutoservRunErrors.
384 """
jadmanskica7da372008-10-21 16:26:52 +0000385 try:
386 self.run("true", timeout=timeout, connect_timeout=timeout)
387 except error.AutoservSSHTimeout:
mblighd0e94982009-07-11 00:15:18 +0000388 msg = "Host (ssh) verify timed out (timeout = %d)" % timeout
jadmanskica7da372008-10-21 16:26:52 +0000389 raise error.AutoservSSHTimeout(msg)
mbligh9d738d62009-03-09 21:17:10 +0000390 except error.AutoservSshPermissionDeniedError:
391 #let AutoservSshPermissionDeniedError be visible to the callers
392 raise
jadmanskica7da372008-10-21 16:26:52 +0000393 except error.AutoservRunError, e:
mblighc971c5f2009-06-08 16:48:54 +0000394 # convert the generic AutoservRunError into something more
395 # specific for this context
396 raise error.AutoservSshPingHostError(e.description + '\n' +
397 repr(e.result_obj))
jadmanskica7da372008-10-21 16:26:52 +0000398
399
beepsadd66d32013-03-04 17:21:51 -0800400 def is_up(self, timeout=60):
jadmanskica7da372008-10-21 16:26:52 +0000401 """
402 Check if the remote host is up.
403
beepsadd66d32013-03-04 17:21:51 -0800404 @param timeout: timeout in seconds.
405 @returns True if the remote host is up before the timeout expires,
406 False otherwise.
jadmanskica7da372008-10-21 16:26:52 +0000407 """
408 try:
beepsadd66d32013-03-04 17:21:51 -0800409 self.ssh_ping(timeout=timeout)
jadmanskica7da372008-10-21 16:26:52 +0000410 except error.AutoservError:
411 return False
412 else:
413 return True
414
415
416 def wait_up(self, timeout=None):
417 """
418 Wait until the remote host is up or the timeout expires.
419
420 In fact, it will wait until an ssh connection to the remote
421 host can be established, and getty is running.
422
jadmanskic0354912010-01-12 15:57:29 +0000423 @param timeout time limit in seconds before returning even
424 if the host is not up.
jadmanskica7da372008-10-21 16:26:52 +0000425
beepsadd66d32013-03-04 17:21:51 -0800426 @returns True if the host was found to be up before the timeout expires,
427 False otherwise
jadmanskica7da372008-10-21 16:26:52 +0000428 """
429 if timeout:
430 end_time = time.time() + timeout
beepsadd66d32013-03-04 17:21:51 -0800431 current_time = time.time()
jadmanskica7da372008-10-21 16:26:52 +0000432
beepsadd66d32013-03-04 17:21:51 -0800433 while not timeout or current_time < end_time:
434 if self.is_up(timeout=end_time - current_time):
jadmanskica7da372008-10-21 16:26:52 +0000435 try:
436 if self.are_wait_up_processes_up():
jadmanski7ebac3d2010-06-17 16:06:31 +0000437 logging.debug('Host %s is now up', self.hostname)
jadmanskica7da372008-10-21 16:26:52 +0000438 return True
439 except error.AutoservError:
440 pass
441 time.sleep(1)
beepsadd66d32013-03-04 17:21:51 -0800442 current_time = time.time()
jadmanskica7da372008-10-21 16:26:52 +0000443
jadmanski7ebac3d2010-06-17 16:06:31 +0000444 logging.debug('Host %s is still down after waiting %d seconds',
445 self.hostname, int(timeout + time.time() - end_time))
jadmanskica7da372008-10-21 16:26:52 +0000446 return False
447
448
jadmanskic0354912010-01-12 15:57:29 +0000449 def wait_down(self, timeout=None, warning_timer=None, old_boot_id=None):
jadmanskica7da372008-10-21 16:26:52 +0000450 """
451 Wait until the remote host is down or the timeout expires.
452
jadmanskic0354912010-01-12 15:57:29 +0000453 If old_boot_id is provided, this will wait until either the machine
454 is unpingable or self.get_boot_id() returns a value different from
455 old_boot_id. If the boot_id value has changed then the function
456 returns true under the assumption that the machine has shut down
457 and has now already come back up.
jadmanskica7da372008-10-21 16:26:52 +0000458
jadmanskic0354912010-01-12 15:57:29 +0000459 If old_boot_id is None then until the machine becomes unreachable the
460 method assumes the machine has not yet shut down.
jadmanskica7da372008-10-21 16:26:52 +0000461
beepsadd66d32013-03-04 17:21:51 -0800462 Based on this definition, the 4 possible permutations of timeout
463 and old_boot_id are:
464 1. timeout and old_boot_id: wait timeout seconds for either the
465 host to become unpingable, or the boot id
466 to change. In the latter case we've rebooted
467 and in the former case we've only shutdown,
468 but both cases return True.
469 2. only timeout: wait timeout seconds for the host to become unpingable.
470 If the host remains pingable throughout timeout seconds
471 we return False.
472 3. only old_boot_id: wait forever until either the host becomes
473 unpingable or the boot_id changes. Return true
474 when either of those conditions are met.
475 4. not timeout, not old_boot_id: wait forever till the host becomes
476 unpingable.
477
jadmanskic0354912010-01-12 15:57:29 +0000478 @param timeout Time limit in seconds before returning even
479 if the host is still up.
480 @param warning_timer Time limit in seconds that will generate
481 a warning if the host is not down yet.
482 @param old_boot_id A string containing the result of self.get_boot_id()
483 prior to the host being told to shut down. Can be None if this is
484 not available.
485
486 @returns True if the host was found to be down, False otherwise
jadmanskica7da372008-10-21 16:26:52 +0000487 """
mblighe5e3cf22010-05-27 23:33:14 +0000488 #TODO: there is currently no way to distinguish between knowing
489 #TODO: boot_id was unsupported and not knowing the boot_id.
mbligh2ed998f2009-04-08 21:03:47 +0000490 current_time = time.time()
jadmanskica7da372008-10-21 16:26:52 +0000491 if timeout:
mbligh2ed998f2009-04-08 21:03:47 +0000492 end_time = current_time + timeout
jadmanskica7da372008-10-21 16:26:52 +0000493
mbligh2ed998f2009-04-08 21:03:47 +0000494 if warning_timer:
495 warn_time = current_time + warning_timer
496
jadmanskic0354912010-01-12 15:57:29 +0000497 if old_boot_id is not None:
498 logging.debug('Host %s pre-shutdown boot_id is %s',
499 self.hostname, old_boot_id)
500
beepsadd66d32013-03-04 17:21:51 -0800501 # Impose semi real-time deadline constraints, since some clients
502 # (eg: watchdog timer tests) expect strict checking of time elapsed.
503 # Each iteration of this loop is treated as though it atomically
504 # completes within current_time, this is needed because if we used
505 # inline time.time() calls instead then the following could happen:
506 #
507 # while not timeout or time.time() < end_time: [23 < 30]
508 # some code. [takes 10 secs]
509 # try:
510 # new_boot_id = self.get_boot_id(timeout=end_time - time.time())
511 # [30 - 33]
512 # The last step will lead to a return True, when in fact the machine
513 # went down at 32 seconds (>30). Hence we need to pass get_boot_id
514 # the same time that allowed us into that iteration of the loop.
mbligh2ed998f2009-04-08 21:03:47 +0000515 while not timeout or current_time < end_time:
jadmanskic0354912010-01-12 15:57:29 +0000516 try:
beepsadd66d32013-03-04 17:21:51 -0800517 new_boot_id = self.get_boot_id(timeout=end_time - current_time)
mblighdbc7e4a2010-01-15 20:34:20 +0000518 except error.AutoservError:
jadmanskic0354912010-01-12 15:57:29 +0000519 logging.debug('Host %s is now unreachable over ssh, is down',
520 self.hostname)
jadmanskica7da372008-10-21 16:26:52 +0000521 return True
jadmanskic0354912010-01-12 15:57:29 +0000522 else:
523 # if the machine is up but the boot_id value has changed from
524 # old boot id, then we can assume the machine has gone down
525 # and then already come back up
526 if old_boot_id is not None and old_boot_id != new_boot_id:
527 logging.debug('Host %s now has boot_id %s and so must '
528 'have rebooted', self.hostname, new_boot_id)
529 return True
mbligh2ed998f2009-04-08 21:03:47 +0000530
531 if warning_timer and current_time > warn_time:
532 self.record("WARN", None, "shutdown",
533 "Shutdown took longer than %ds" % warning_timer)
534 # Print the warning only once.
535 warning_timer = None
mbligha4464402009-04-17 20:13:41 +0000536 # If a machine is stuck switching runlevels
537 # This may cause the machine to reboot.
538 self.run('kill -HUP 1', ignore_status=True)
mbligh2ed998f2009-04-08 21:03:47 +0000539
jadmanskica7da372008-10-21 16:26:52 +0000540 time.sleep(1)
mbligh2ed998f2009-04-08 21:03:47 +0000541 current_time = time.time()
jadmanskica7da372008-10-21 16:26:52 +0000542
543 return False
jadmanskif6562912008-10-21 17:59:01 +0000544
mbligha0a27592009-01-24 01:41:36 +0000545
jadmanskif6562912008-10-21 17:59:01 +0000546 # tunable constants for the verify & repair code
mblighb86bfa12010-02-12 20:22:21 +0000547 AUTOTEST_GB_DISKSPACE_REQUIRED = get_value("SERVER",
548 "gb_diskspace_required",
Fang Deng6b05f5b2013-03-20 13:42:11 -0700549 type=float,
550 default=20.0)
mbligha0a27592009-01-24 01:41:36 +0000551
jadmanskif6562912008-10-21 17:59:01 +0000552
showardca572982009-09-18 21:20:01 +0000553 def verify_connectivity(self):
554 super(AbstractSSHHost, self).verify_connectivity()
jadmanskif6562912008-10-21 17:59:01 +0000555
showardb18134f2009-03-20 20:52:18 +0000556 logging.info('Pinging host ' + self.hostname)
jadmanskif6562912008-10-21 17:59:01 +0000557 self.ssh_ping()
mbligh2ba7ab02009-08-24 22:09:26 +0000558 logging.info("Host (ssh) %s is alive", self.hostname)
jadmanskif6562912008-10-21 17:59:01 +0000559
jadmanski80deb752009-01-21 17:14:16 +0000560 if self.is_shutting_down():
mblighc971c5f2009-06-08 16:48:54 +0000561 raise error.AutoservHostIsShuttingDownError("Host is shutting down")
jadmanski80deb752009-01-21 17:14:16 +0000562
mblighb49b5232009-02-12 21:54:49 +0000563
showardca572982009-09-18 21:20:01 +0000564 def verify_software(self):
565 super(AbstractSSHHost, self).verify_software()
jadmanskif6562912008-10-21 17:59:01 +0000566 try:
showardad812bf2009-10-20 23:49:56 +0000567 self.check_diskspace(autotest.Autotest.get_install_dir(self),
568 self.AUTOTEST_GB_DISKSPACE_REQUIRED)
jadmanskif6562912008-10-21 17:59:01 +0000569 except error.AutoservHostError:
570 raise # only want to raise if it's a space issue
showardad812bf2009-10-20 23:49:56 +0000571 except autotest.AutodirNotFoundError:
showardca572982009-09-18 21:20:01 +0000572 # autotest dir may not exist, etc. ignore
573 logging.debug('autodir space check exception, this is probably '
574 'safe to ignore\n' + traceback.format_exc())
mblighefccc1b2010-01-11 19:08:42 +0000575
576
577 def close(self):
578 super(AbstractSSHHost, self).close()
579 self._cleanup_master_ssh()
Fang Deng3af66202013-08-16 15:19:25 -0700580 os.remove(self.known_hosts_file)
mblighefccc1b2010-01-11 19:08:42 +0000581
582
583 def _cleanup_master_ssh(self):
584 """
585 Release all resources (process, temporary directory) used by an active
586 master SSH connection.
587 """
588 # If a master SSH connection is running, kill it.
589 if self.master_ssh_job is not None:
590 utils.nuke_subprocess(self.master_ssh_job.sp)
591 self.master_ssh_job = None
592
593 # Remove the temporary directory for the master SSH socket.
594 if self.master_ssh_tempdir is not None:
595 self.master_ssh_tempdir.clean()
596 self.master_ssh_tempdir = None
597 self.master_ssh_option = ''
598
599
600 def start_master_ssh(self):
601 """
602 Called whenever a slave SSH connection needs to be initiated (e.g., by
603 run, rsync, scp). If master SSH support is enabled and a master SSH
604 connection is not active already, start a new one in the background.
605 Also, cleanup any zombie master SSH connections (e.g., dead due to
606 reboot).
607 """
608 if not enable_master_ssh:
609 return
610
611 # If a previously started master SSH connection is not running
612 # anymore, it needs to be cleaned up and then restarted.
613 if self.master_ssh_job is not None:
614 if self.master_ssh_job.sp.poll() is not None:
615 logging.info("Master ssh connection to %s is down.",
616 self.hostname)
617 self._cleanup_master_ssh()
618
619 # Start a new master SSH connection.
620 if self.master_ssh_job is None:
621 # Create a shared socket in a temp location.
622 self.master_ssh_tempdir = autotemp.tempdir(unique_id='ssh-master')
623 self.master_ssh_option = ("-o ControlPath=%s/socket" %
624 self.master_ssh_tempdir.name)
625
626 # Start the master SSH connection in the background.
mbligh5644c122010-01-29 17:43:26 +0000627 master_cmd = self.ssh_command(options="-N -o ControlMaster=yes")
mblighefccc1b2010-01-11 19:08:42 +0000628 logging.info("Starting master ssh connection '%s'" % master_cmd)
629 self.master_ssh_job = utils.BgJob(master_cmd)
mbligh0a883702010-04-21 01:58:34 +0000630
631
632 def clear_known_hosts(self):
633 """Clears out the temporary ssh known_hosts file.
634
635 This is useful if the test SSHes to the machine, then reinstalls it,
636 then SSHes to it again. It can be called after the reinstall to
637 reduce the spam in the logs.
638 """
639 logging.info("Clearing known hosts for host '%s', file '%s'.",
Fang Deng3af66202013-08-16 15:19:25 -0700640 self.hostname, self.known_hosts_file)
mbligh0a883702010-04-21 01:58:34 +0000641 # Clear out the file by opening it for writing and then closing.
Fang Deng3af66202013-08-16 15:19:25 -0700642 fh = open(self.known_hosts_file, "w")
mbligh0a883702010-04-21 01:58:34 +0000643 fh.close()