blob: 8296ca3ba34a9a4fc57004de710aad538fd03acf [file] [log] [blame]
mbligh57e78662008-06-17 19:53:49 +00001"""
2The main job wrapper for the server side.
3
4This is the core infrastructure. Derived from the client side job.py
5
6Copyright Martin J. Bligh, Andy Whitcroft 2007
7"""
8
jadmanski6bb32d72009-03-19 20:25:24 +00009import getpass, os, sys, re, stat, tempfile, time, select, subprocess
showardb18134f2009-03-20 20:52:18 +000010import traceback, shutil, warnings, fcntl, pickle, logging, logging.config
jadmanskic09fc152008-10-15 17:56:59 +000011from autotest_lib.client.bin import fd_stack, sysinfo
mbligh09108442008-10-15 16:27:38 +000012from autotest_lib.client.common_lib import error, log, utils, packages
jadmanski043e1132008-11-19 17:10:32 +000013from autotest_lib.server import test, subcommand, profilers
jadmanski10646442008-08-13 14:05:21 +000014from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils
jadmanski10646442008-08-13 14:05:21 +000015
16
mbligh084bc172008-10-18 14:02:45 +000017def _control_segment_path(name):
18 """Get the pathname of the named control segment file."""
jadmanski10646442008-08-13 14:05:21 +000019 server_dir = os.path.dirname(os.path.abspath(__file__))
mbligh084bc172008-10-18 14:02:45 +000020 return os.path.join(server_dir, "control_segments", name)
jadmanski10646442008-08-13 14:05:21 +000021
22
mbligh084bc172008-10-18 14:02:45 +000023CLIENT_CONTROL_FILENAME = 'control'
24SERVER_CONTROL_FILENAME = 'control.srv'
25MACHINES_FILENAME = '.machines'
jadmanski10646442008-08-13 14:05:21 +000026
mbligh084bc172008-10-18 14:02:45 +000027CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')
28CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')
29CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')
mbligh084bc172008-10-18 14:02:45 +000030INSTALL_CONTROL_FILE = _control_segment_path('install')
showard45ae8192008-11-05 19:32:53 +000031CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')
jadmanski10646442008-08-13 14:05:21 +000032
mbligh084bc172008-10-18 14:02:45 +000033VERIFY_CONTROL_FILE = _control_segment_path('verify')
mbligh084bc172008-10-18 14:02:45 +000034REPAIR_CONTROL_FILE = _control_segment_path('repair')
jadmanski10646442008-08-13 14:05:21 +000035
36
mbligh062ed152009-01-13 00:57:14 +000037# by default provide a stub that generates no site data
38def _get_site_job_data_dummy(job):
39 return {}
40
41
jadmanski10646442008-08-13 14:05:21 +000042# load up site-specific code for generating site-specific job data
mbligh062ed152009-01-13 00:57:14 +000043get_site_job_data = utils.import_site_function(__file__,
jadmanskic0a623d2009-03-03 21:11:48 +000044 "autotest_lib.server.site_server_job", "get_site_job_data",
mbligh062ed152009-01-13 00:57:14 +000045 _get_site_job_data_dummy)
jadmanski10646442008-08-13 14:05:21 +000046
47
48class base_server_job(object):
mbligh2b92b862008-11-22 13:25:32 +000049 """
50 The actual job against which we do everything.
jadmanski10646442008-08-13 14:05:21 +000051
52 Properties:
53 autodir
54 The top level autotest directory (/usr/local/autotest).
55 serverdir
56 <autodir>/server/
57 clientdir
58 <autodir>/client/
59 conmuxdir
60 <autodir>/conmux/
61 testdir
62 <autodir>/server/tests/
63 site_testdir
64 <autodir>/server/site_tests/
65 control
66 the control file for this job
mblighb5dac432008-11-27 00:38:44 +000067 drop_caches_between_iterations
68 drop the pagecache between each iteration
jadmanski10646442008-08-13 14:05:21 +000069 """
70
71 STATUS_VERSION = 1
jadmanski16a7ff72009-04-01 18:19:53 +000072 WARNING_DISABLE_DELAY = 5
jadmanski10646442008-08-13 14:05:21 +000073
74 def __init__(self, control, args, resultdir, label, user, machines,
75 client=False, parse_job='',
76 ssh_user='root', ssh_port=22, ssh_pass=''):
77 """
mblighb5dac432008-11-27 00:38:44 +000078 Server side job object.
79
80 Parameters:
81 control: The control file (pathname of)
82 args: args to pass to the control file
83 resultdir: where to throw the results
84 label: label for the job
85 user: Username for the job (email address)
86 client: True if a client-side control file
jadmanski10646442008-08-13 14:05:21 +000087 """
88 path = os.path.dirname(__file__)
89 self.autodir = os.path.abspath(os.path.join(path, '..'))
90 self.serverdir = os.path.join(self.autodir, 'server')
91 self.testdir = os.path.join(self.serverdir, 'tests')
92 self.site_testdir = os.path.join(self.serverdir, 'site_tests')
93 self.tmpdir = os.path.join(self.serverdir, 'tmp')
94 self.conmuxdir = os.path.join(self.autodir, 'conmux')
95 self.clientdir = os.path.join(self.autodir, 'client')
96 self.toolsdir = os.path.join(self.autodir, 'client/tools')
97 if control:
jadmanskie432dd22009-01-30 15:04:51 +000098 self.control = self._load_control_file(control)
jadmanski10646442008-08-13 14:05:21 +000099 else:
showard45ae8192008-11-05 19:32:53 +0000100 self.control = ''
jadmanski10646442008-08-13 14:05:21 +0000101 self.resultdir = resultdir
mbligha788dc42009-03-26 21:10:16 +0000102 self.uncollected_log_file = None
mbligh80e1eba2008-11-19 00:26:18 +0000103 if resultdir:
mbligha788dc42009-03-26 21:10:16 +0000104 self.uncollected_log_file = os.path.join(resultdir, "uncollected_logs")
105 self.debugdir = os.path.join(resultdir, 'debug')
106
mbligh80e1eba2008-11-19 00:26:18 +0000107 if not os.path.exists(resultdir):
108 os.mkdir(resultdir)
jadmanskidef0c3c2009-03-25 20:07:10 +0000109 if not os.path.exists(self.uncollected_log_file):
110 log_file = open(self.uncollected_log_file, "w")
111 pickle.dump([], log_file)
112 log_file.close()
mbligh80e1eba2008-11-19 00:26:18 +0000113 if not os.path.exists(self.debugdir):
114 os.mkdir(self.debugdir)
jadmanski779bd292009-03-19 17:33:33 +0000115 status_log = self.get_status_log_path()
116 if os.path.exists(status_log):
117 os.remove(status_log)
jadmanski10646442008-08-13 14:05:21 +0000118 self.label = label
119 self.user = user
120 self.args = args
121 self.machines = machines
122 self.client = client
123 self.record_prefix = ''
124 self.warning_loggers = set()
jadmanskif37df842009-02-11 00:03:26 +0000125 self.warning_manager = warning_manager()
jadmanski10646442008-08-13 14:05:21 +0000126 self.ssh_user = ssh_user
127 self.ssh_port = ssh_port
128 self.ssh_pass = ssh_pass
jadmanski23afbec2008-09-17 18:12:07 +0000129 self.run_test_cleanup = True
mbligh09108442008-10-15 16:27:38 +0000130 self.last_boot_tag = None
jadmanski53aaf382008-11-17 16:22:31 +0000131 self.hosts = set()
mblighb5dac432008-11-27 00:38:44 +0000132 self.drop_caches_between_iterations = False
jadmanski10646442008-08-13 14:05:21 +0000133
134 self.stdout = fd_stack.fd_stack(1, sys.stdout)
135 self.stderr = fd_stack.fd_stack(2, sys.stderr)
136
mbligh80e1eba2008-11-19 00:26:18 +0000137 if resultdir:
138 self.sysinfo = sysinfo.sysinfo(self.resultdir)
jadmanski043e1132008-11-19 17:10:32 +0000139 self.profilers = profilers.profilers(self)
jadmanskic09fc152008-10-15 17:56:59 +0000140
jadmanski025099d2008-09-23 14:13:48 +0000141 if not os.access(self.tmpdir, os.W_OK):
142 try:
143 os.makedirs(self.tmpdir, 0700)
144 except os.error, e:
145 # Thrown if the directory already exists, which it may.
146 pass
147
mbligh2b92b862008-11-22 13:25:32 +0000148 if not (os.access(self.tmpdir, os.W_OK) and os.path.isdir(self.tmpdir)):
jadmanski025099d2008-09-23 14:13:48 +0000149 self.tmpdir = os.path.join(tempfile.gettempdir(),
150 'autotest-' + getpass.getuser())
151 try:
152 os.makedirs(self.tmpdir, 0700)
153 except os.error, e:
154 # Thrown if the directory already exists, which it may.
155 # If the problem was something other than the
156 # directory already existing, this chmod should throw as well
157 # exception.
158 os.chmod(self.tmpdir, stat.S_IRWXU)
159
jadmanski10646442008-08-13 14:05:21 +0000160 job_data = {'label' : label, 'user' : user,
161 'hostname' : ','.join(machines),
showard170873e2009-01-07 00:22:26 +0000162 'status_version' : str(self.STATUS_VERSION),
163 'job_started' : str(int(time.time()))}
mbligh80e1eba2008-11-19 00:26:18 +0000164 if self.resultdir:
165 job_data.update(get_site_job_data(self))
166 utils.write_keyval(self.resultdir, job_data)
jadmanski10646442008-08-13 14:05:21 +0000167
168 self.parse_job = parse_job
169 if self.parse_job and len(machines) == 1:
170 self.using_parser = True
171 self.init_parser(resultdir)
172 else:
173 self.using_parser = False
mbligh2b92b862008-11-22 13:25:32 +0000174 self.pkgmgr = packages.PackageManager(self.autodir,
175 run_function_dargs={'timeout':600})
jadmanski10646442008-08-13 14:05:21 +0000176 self.pkgdir = os.path.join(self.autodir, 'packages')
177
showard21baa452008-10-21 00:08:39 +0000178 self.num_tests_run = 0
179 self.num_tests_failed = 0
180
jadmanski550fdc22008-11-20 16:32:08 +0000181 self._register_subcommand_hooks()
182
183
jadmanskie432dd22009-01-30 15:04:51 +0000184 @staticmethod
185 def _load_control_file(path):
186 f = open(path)
187 try:
188 control_file = f.read()
189 finally:
190 f.close()
191 return re.sub('\r', '', control_file)
192
193
jadmanski550fdc22008-11-20 16:32:08 +0000194 def _register_subcommand_hooks(self):
mbligh2b92b862008-11-22 13:25:32 +0000195 """
196 Register some hooks into the subcommand modules that allow us
197 to properly clean up self.hosts created in forked subprocesses.
198 """
jadmanski550fdc22008-11-20 16:32:08 +0000199 def on_fork(cmd):
200 self._existing_hosts_on_fork = set(self.hosts)
201 def on_join(cmd):
202 new_hosts = self.hosts - self._existing_hosts_on_fork
203 for host in new_hosts:
204 host.close()
205 subcommand.subcommand.register_fork_hook(on_fork)
206 subcommand.subcommand.register_join_hook(on_join)
207
jadmanski10646442008-08-13 14:05:21 +0000208
209 def init_parser(self, resultdir):
mbligh2b92b862008-11-22 13:25:32 +0000210 """
211 Start the continuous parsing of resultdir. This sets up
jadmanski10646442008-08-13 14:05:21 +0000212 the database connection and inserts the basic job object into
mbligh2b92b862008-11-22 13:25:32 +0000213 the database if necessary.
214 """
jadmanski10646442008-08-13 14:05:21 +0000215 # redirect parser debugging to .parse.log
216 parse_log = os.path.join(resultdir, '.parse.log')
217 parse_log = open(parse_log, 'w', 0)
218 tko_utils.redirect_parser_debugging(parse_log)
219 # create a job model object and set up the db
220 self.results_db = tko_db.db(autocommit=True)
221 self.parser = status_lib.parser(self.STATUS_VERSION)
222 self.job_model = self.parser.make_job(resultdir)
223 self.parser.start(self.job_model)
224 # check if a job already exists in the db and insert it if
225 # it does not
226 job_idx = self.results_db.find_job(self.parse_job)
227 if job_idx is None:
mbligh2b92b862008-11-22 13:25:32 +0000228 self.results_db.insert_job(self.parse_job, self.job_model)
jadmanski10646442008-08-13 14:05:21 +0000229 else:
mbligh2b92b862008-11-22 13:25:32 +0000230 machine_idx = self.results_db.lookup_machine(self.job_model.machine)
jadmanski10646442008-08-13 14:05:21 +0000231 self.job_model.index = job_idx
232 self.job_model.machine_idx = machine_idx
233
234
235 def cleanup_parser(self):
mbligh2b92b862008-11-22 13:25:32 +0000236 """
237 This should be called after the server job is finished
jadmanski10646442008-08-13 14:05:21 +0000238 to carry out any remaining cleanup (e.g. flushing any
mbligh2b92b862008-11-22 13:25:32 +0000239 remaining test results to the results db)
240 """
jadmanski10646442008-08-13 14:05:21 +0000241 if not self.using_parser:
242 return
243 final_tests = self.parser.end()
244 for test in final_tests:
245 self.__insert_test(test)
246 self.using_parser = False
247
248
249 def verify(self):
250 if not self.machines:
mbligh084bc172008-10-18 14:02:45 +0000251 raise error.AutoservError('No machines specified to verify')
mbligh0fce4112008-11-27 00:37:17 +0000252 if self.resultdir:
253 os.chdir(self.resultdir)
jadmanski10646442008-08-13 14:05:21 +0000254 try:
jadmanskicdd0c402008-09-19 21:21:31 +0000255 namespace = {'machines' : self.machines, 'job' : self,
256 'ssh_user' : self.ssh_user,
257 'ssh_port' : self.ssh_port,
258 'ssh_pass' : self.ssh_pass}
mbligh084bc172008-10-18 14:02:45 +0000259 self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)
jadmanski10646442008-08-13 14:05:21 +0000260 except Exception, e:
mbligh2b92b862008-11-22 13:25:32 +0000261 msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())
jadmanski10646442008-08-13 14:05:21 +0000262 self.record('ABORT', None, None, msg)
263 raise
264
265
266 def repair(self, host_protection):
267 if not self.machines:
268 raise error.AutoservError('No machines specified to repair')
mbligh0fce4112008-11-27 00:37:17 +0000269 if self.resultdir:
270 os.chdir(self.resultdir)
jadmanski10646442008-08-13 14:05:21 +0000271 namespace = {'machines': self.machines, 'job': self,
272 'ssh_user': self.ssh_user, 'ssh_port': self.ssh_port,
273 'ssh_pass': self.ssh_pass,
274 'protection_level': host_protection}
mbligh25c0b8c2009-01-24 01:44:17 +0000275
mbligh0931b0a2009-04-08 17:44:48 +0000276 self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)
jadmanski10646442008-08-13 14:05:21 +0000277
278
279 def precheck(self):
280 """
281 perform any additional checks in derived classes.
282 """
283 pass
284
285
286 def enable_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000287 """
288 Start or restart external logging mechanism.
jadmanski10646442008-08-13 14:05:21 +0000289 """
290 pass
291
292
293 def disable_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000294 """
295 Pause or stop external logging mechanism.
jadmanski10646442008-08-13 14:05:21 +0000296 """
297 pass
298
299
jadmanski23afbec2008-09-17 18:12:07 +0000300 def enable_test_cleanup(self):
mbligh2b92b862008-11-22 13:25:32 +0000301 """
302 By default tests run test.cleanup
303 """
jadmanski23afbec2008-09-17 18:12:07 +0000304 self.run_test_cleanup = True
305
306
307 def disable_test_cleanup(self):
mbligh2b92b862008-11-22 13:25:32 +0000308 """
309 By default tests do not run test.cleanup
310 """
jadmanski23afbec2008-09-17 18:12:07 +0000311 self.run_test_cleanup = False
312
313
jadmanski10646442008-08-13 14:05:21 +0000314 def use_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000315 """
316 Return True if external logging should be used.
jadmanski10646442008-08-13 14:05:21 +0000317 """
318 return False
319
320
321 def parallel_simple(self, function, machines, log=True, timeout=None):
mbligh2b92b862008-11-22 13:25:32 +0000322 """
323 Run 'function' using parallel_simple, with an extra wrapper to handle
324 the necessary setup for continuous parsing, if possible. If continuous
325 parsing is already properly initialized then this should just work.
326 """
327 is_forking = not (len(machines) == 1 and self.machines == machines)
jadmanski4dd1a002008-09-05 20:27:30 +0000328 if self.parse_job and is_forking and log:
jadmanski10646442008-08-13 14:05:21 +0000329 def wrapper(machine):
330 self.parse_job += "/" + machine
331 self.using_parser = True
332 self.machines = [machine]
mbligh2b92b862008-11-22 13:25:32 +0000333 self.resultdir = os.path.join(self.resultdir, machine)
jadmanski609a5f42008-08-26 20:52:42 +0000334 os.chdir(self.resultdir)
showard2bab8f42008-11-12 18:15:22 +0000335 utils.write_keyval(self.resultdir, {"hostname": machine})
jadmanski10646442008-08-13 14:05:21 +0000336 self.init_parser(self.resultdir)
337 result = function(machine)
338 self.cleanup_parser()
339 return result
jadmanski4dd1a002008-09-05 20:27:30 +0000340 elif len(machines) > 1 and log:
jadmanski10646442008-08-13 14:05:21 +0000341 def wrapper(machine):
342 self.resultdir = os.path.join(self.resultdir, machine)
jadmanski609a5f42008-08-26 20:52:42 +0000343 os.chdir(self.resultdir)
mbligh838d82d2009-03-11 17:14:31 +0000344 machine_data = {'hostname' : machine,
345 'status_version' : str(self.STATUS_VERSION)}
346 utils.write_keyval(self.resultdir, machine_data)
jadmanski10646442008-08-13 14:05:21 +0000347 result = function(machine)
348 return result
349 else:
350 wrapper = function
351 subcommand.parallel_simple(wrapper, machines, log, timeout)
352
353
jadmanskie432dd22009-01-30 15:04:51 +0000354 USE_TEMP_DIR = object()
mbligh2b92b862008-11-22 13:25:32 +0000355 def run(self, cleanup=False, install_before=False, install_after=False,
jadmanskie432dd22009-01-30 15:04:51 +0000356 collect_crashdumps=True, namespace={}, control=None,
jadmanskidef0c3c2009-03-25 20:07:10 +0000357 control_file_dir=None, only_collect_crashinfo=False):
jadmanski10646442008-08-13 14:05:21 +0000358 # use a copy so changes don't affect the original dictionary
359 namespace = namespace.copy()
360 machines = self.machines
jadmanskie432dd22009-01-30 15:04:51 +0000361 if control is None:
362 control = self.control
363 if control_file_dir is None:
364 control_file_dir = self.resultdir
jadmanski10646442008-08-13 14:05:21 +0000365
366 self.aborted = False
367 namespace['machines'] = machines
368 namespace['args'] = self.args
369 namespace['job'] = self
370 namespace['ssh_user'] = self.ssh_user
371 namespace['ssh_port'] = self.ssh_port
372 namespace['ssh_pass'] = self.ssh_pass
373 test_start_time = int(time.time())
374
mbligh80e1eba2008-11-19 00:26:18 +0000375 if self.resultdir:
376 os.chdir(self.resultdir)
jadmanski779bd292009-03-19 17:33:33 +0000377 # touch status.log so that the parser knows a job is running here
378 open(self.get_status_log_path(), 'w').close()
mbligh80e1eba2008-11-19 00:26:18 +0000379 self.enable_external_logging()
jadmanskie432dd22009-01-30 15:04:51 +0000380
jadmanskicdd0c402008-09-19 21:21:31 +0000381 collect_crashinfo = True
mblighaebe3b62008-12-22 14:45:40 +0000382 temp_control_file_dir = None
jadmanski10646442008-08-13 14:05:21 +0000383 try:
384 if install_before and machines:
mbligh084bc172008-10-18 14:02:45 +0000385 self._execute_code(INSTALL_CONTROL_FILE, namespace)
jadmanskie432dd22009-01-30 15:04:51 +0000386
jadmanskidef0c3c2009-03-25 20:07:10 +0000387 if not only_collect_crashinfo:
388 # determine the dir to write the control files to
389 cfd_specified = (control_file_dir
390 and control_file_dir is not self.USE_TEMP_DIR)
391 if cfd_specified:
392 temp_control_file_dir = None
393 else:
394 temp_control_file_dir = tempfile.mkdtemp(
395 suffix='temp_control_file_dir')
396 control_file_dir = temp_control_file_dir
397 server_control_file = os.path.join(control_file_dir,
398 SERVER_CONTROL_FILENAME)
399 client_control_file = os.path.join(control_file_dir,
400 CLIENT_CONTROL_FILENAME)
401 if self.client:
402 namespace['control'] = control
403 utils.open_write_close(client_control_file, control)
404 shutil.copy(CLIENT_WRAPPER_CONTROL_FILE,
405 server_control_file)
406 else:
407 utils.open_write_close(server_control_file, control)
408 self._execute_code(server_control_file, namespace)
jadmanski10646442008-08-13 14:05:21 +0000409
jadmanskidef0c3c2009-03-25 20:07:10 +0000410 # no error occured, so we don't need to collect crashinfo
411 collect_crashinfo = False
jadmanski10646442008-08-13 14:05:21 +0000412 finally:
mblighaebe3b62008-12-22 14:45:40 +0000413 if temp_control_file_dir:
jadmanskie432dd22009-01-30 15:04:51 +0000414 # Clean up temp directory used for copies of the control files
mblighaebe3b62008-12-22 14:45:40 +0000415 try:
416 shutil.rmtree(temp_control_file_dir)
417 except Exception, e:
jadmanskie432dd22009-01-30 15:04:51 +0000418 print 'Error %s removing dir %s' % (e,
419 temp_control_file_dir)
420
jadmanskicdd0c402008-09-19 21:21:31 +0000421 if machines and (collect_crashdumps or collect_crashinfo):
jadmanski10646442008-08-13 14:05:21 +0000422 namespace['test_start_time'] = test_start_time
jadmanskicdd0c402008-09-19 21:21:31 +0000423 if collect_crashinfo:
mbligh084bc172008-10-18 14:02:45 +0000424 # includes crashdumps
425 self._execute_code(CRASHINFO_CONTROL_FILE, namespace)
jadmanskicdd0c402008-09-19 21:21:31 +0000426 else:
mbligh084bc172008-10-18 14:02:45 +0000427 self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)
mbligha788dc42009-03-26 21:10:16 +0000428 if self.uncollected_log_file:
429 os.remove(self.uncollected_log_file)
jadmanski10646442008-08-13 14:05:21 +0000430 self.disable_external_logging()
showard45ae8192008-11-05 19:32:53 +0000431 if cleanup and machines:
432 self._execute_code(CLEANUP_CONTROL_FILE, namespace)
jadmanski10646442008-08-13 14:05:21 +0000433 if install_after and machines:
mbligh084bc172008-10-18 14:02:45 +0000434 self._execute_code(INSTALL_CONTROL_FILE, namespace)
jadmanski10646442008-08-13 14:05:21 +0000435
436
437 def run_test(self, url, *args, **dargs):
mbligh2b92b862008-11-22 13:25:32 +0000438 """
439 Summon a test object and run it.
jadmanski10646442008-08-13 14:05:21 +0000440
441 tag
442 tag to add to testname
443 url
444 url of the test to run
445 """
446
447 (group, testname) = self.pkgmgr.get_package_name(url, 'test')
jadmanski10646442008-08-13 14:05:21 +0000448
449 tag = dargs.pop('tag', None)
450 if tag:
mbligh8ad24202009-01-07 16:49:36 +0000451 testname += '.' + str(tag)
jadmanskide292df2008-08-26 20:51:14 +0000452 subdir = testname
jadmanski10646442008-08-13 14:05:21 +0000453
454 outputdir = os.path.join(self.resultdir, subdir)
455 if os.path.exists(outputdir):
456 msg = ("%s already exists, test <%s> may have"
mbligh2b92b862008-11-22 13:25:32 +0000457 " already run with tag <%s>" % (outputdir, testname, tag))
jadmanski10646442008-08-13 14:05:21 +0000458 raise error.TestError(msg)
459 os.mkdir(outputdir)
460
461 def group_func():
462 try:
463 test.runtest(self, url, tag, args, dargs)
464 except error.TestBaseException, e:
465 self.record(e.exit_status, subdir, testname, str(e))
466 raise
467 except Exception, e:
468 info = str(e) + "\n" + traceback.format_exc()
469 self.record('FAIL', subdir, testname, info)
470 raise
471 else:
mbligh2b92b862008-11-22 13:25:32 +0000472 self.record('GOOD', subdir, testname, 'completed successfully')
jadmanskide292df2008-08-26 20:51:14 +0000473
474 result, exc_info = self._run_group(testname, subdir, group_func)
475 if exc_info and isinstance(exc_info[1], error.TestBaseException):
476 return False
477 elif exc_info:
478 raise exc_info[0], exc_info[1], exc_info[2]
479 else:
480 return True
jadmanski10646442008-08-13 14:05:21 +0000481
482
483 def _run_group(self, name, subdir, function, *args, **dargs):
484 """\
485 Underlying method for running something inside of a group.
486 """
jadmanskide292df2008-08-26 20:51:14 +0000487 result, exc_info = None, None
jadmanski10646442008-08-13 14:05:21 +0000488 old_record_prefix = self.record_prefix
489 try:
490 self.record('START', subdir, name)
491 self.record_prefix += '\t'
492 try:
493 result = function(*args, **dargs)
494 finally:
495 self.record_prefix = old_record_prefix
496 except error.TestBaseException, e:
jadmanskib88d6dc2009-01-10 00:33:18 +0000497 self.record("END %s" % e.exit_status, subdir, name)
jadmanskide292df2008-08-26 20:51:14 +0000498 exc_info = sys.exc_info()
jadmanski10646442008-08-13 14:05:21 +0000499 except Exception, e:
500 err_msg = str(e) + '\n'
501 err_msg += traceback.format_exc()
502 self.record('END ABORT', subdir, name, err_msg)
503 raise error.JobError(name + ' failed\n' + traceback.format_exc())
504 else:
505 self.record('END GOOD', subdir, name)
506
jadmanskide292df2008-08-26 20:51:14 +0000507 return result, exc_info
jadmanski10646442008-08-13 14:05:21 +0000508
509
510 def run_group(self, function, *args, **dargs):
511 """\
512 function:
513 subroutine to run
514 *args:
515 arguments for the function
516 """
517
518 name = function.__name__
519
520 # Allow the tag for the group to be specified.
521 tag = dargs.pop('tag', None)
522 if tag:
523 name = tag
524
jadmanskide292df2008-08-26 20:51:14 +0000525 return self._run_group(name, None, function, *args, **dargs)[0]
jadmanski10646442008-08-13 14:05:21 +0000526
527
528 def run_reboot(self, reboot_func, get_kernel_func):
529 """\
530 A specialization of run_group meant specifically for handling
531 a reboot. Includes support for capturing the kernel version
532 after the reboot.
533
534 reboot_func: a function that carries out the reboot
535
536 get_kernel_func: a function that returns a string
537 representing the kernel version.
538 """
539
540 old_record_prefix = self.record_prefix
541 try:
542 self.record('START', None, 'reboot')
543 self.record_prefix += '\t'
544 reboot_func()
545 except Exception, e:
546 self.record_prefix = old_record_prefix
547 err_msg = str(e) + '\n' + traceback.format_exc()
548 self.record('END FAIL', None, 'reboot', err_msg)
jadmanski4b51d542009-04-08 14:17:16 +0000549 raise
jadmanski10646442008-08-13 14:05:21 +0000550 else:
551 kernel = get_kernel_func()
552 self.record_prefix = old_record_prefix
553 self.record('END GOOD', None, 'reboot',
554 optional_fields={"kernel": kernel})
555
556
jadmanskie432dd22009-01-30 15:04:51 +0000557 def run_control(self, path):
558 """Execute a control file found at path (relative to the autotest
559 path). Intended for executing a control file within a control file,
560 not for running the top-level job control file."""
561 path = os.path.join(self.autodir, path)
562 control_file = self._load_control_file(path)
563 self.run(control=control_file, control_file_dir=self.USE_TEMP_DIR)
564
565
jadmanskic09fc152008-10-15 17:56:59 +0000566 def add_sysinfo_command(self, command, logfile=None, on_every_test=False):
mbligh4395bbd2009-03-25 19:34:17 +0000567 self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),
jadmanskic09fc152008-10-15 17:56:59 +0000568 on_every_test)
569
570
571 def add_sysinfo_logfile(self, file, on_every_test=False):
572 self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)
573
574
575 def _add_sysinfo_loggable(self, loggable, on_every_test):
576 if on_every_test:
577 self.sysinfo.test_loggables.add(loggable)
578 else:
579 self.sysinfo.boot_loggables.add(loggable)
580
581
jadmanski10646442008-08-13 14:05:21 +0000582 def record(self, status_code, subdir, operation, status='',
583 optional_fields=None):
584 """
585 Record job-level status
586
587 The intent is to make this file both machine parseable and
588 human readable. That involves a little more complexity, but
589 really isn't all that bad ;-)
590
591 Format is <status code>\t<subdir>\t<operation>\t<status>
592
mbligh1b3b3762008-09-25 02:46:34 +0000593 status code: see common_lib.log.is_valid_status()
jadmanski10646442008-08-13 14:05:21 +0000594 for valid status definition
595
596 subdir: MUST be a relevant subdirectory in the results,
597 or None, which will be represented as '----'
598
599 operation: description of what you ran (e.g. "dbench", or
600 "mkfs -t foobar /dev/sda9")
601
602 status: error message or "completed sucessfully"
603
604 ------------------------------------------------------------
605
606 Initial tabs indicate indent levels for grouping, and is
607 governed by self.record_prefix
608
609 multiline messages have secondary lines prefaced by a double
610 space (' ')
611
612 Executing this method will trigger the logging of all new
613 warnings to date from the various console loggers.
614 """
615 # poll all our warning loggers for new warnings
616 warnings = self._read_warnings()
jadmanski2de83112009-04-01 18:21:04 +0000617 old_record_prefix = self.record_prefix
618 try:
619 if status_code.startswith("END "):
620 self.record_prefix += "\t"
621 for timestamp, msg in warnings:
622 self._record("WARN", None, None, msg, timestamp)
623 finally:
624 self.record_prefix = old_record_prefix
jadmanski10646442008-08-13 14:05:21 +0000625
626 # write out the actual status log line
627 self._record(status_code, subdir, operation, status,
628 optional_fields=optional_fields)
629
630
631 def _read_warnings(self):
jadmanskif37df842009-02-11 00:03:26 +0000632 """Poll all the warning loggers and extract any new warnings that have
633 been logged. If the warnings belong to a category that is currently
634 disabled, this method will discard them and they will no longer be
635 retrievable.
636
637 Returns a list of (timestamp, message) tuples, where timestamp is an
638 integer epoch timestamp."""
jadmanski10646442008-08-13 14:05:21 +0000639 warnings = []
640 while True:
641 # pull in a line of output from every logger that has
642 # output ready to be read
mbligh2b92b862008-11-22 13:25:32 +0000643 loggers, _, _ = select.select(self.warning_loggers, [], [], 0)
jadmanski10646442008-08-13 14:05:21 +0000644 closed_loggers = set()
645 for logger in loggers:
646 line = logger.readline()
647 # record any broken pipes (aka line == empty)
648 if len(line) == 0:
649 closed_loggers.add(logger)
650 continue
jadmanskif37df842009-02-11 00:03:26 +0000651 # parse out the warning
652 timestamp, msgtype, msg = line.split('\t', 2)
653 timestamp = int(timestamp)
654 # if the warning is valid, add it to the results
655 if self.warning_manager.is_valid(timestamp, msgtype):
656 warnings.append((timestamp, msg.strip()))
jadmanski10646442008-08-13 14:05:21 +0000657
658 # stop listening to loggers that are closed
659 self.warning_loggers -= closed_loggers
660
661 # stop if none of the loggers have any output left
662 if not loggers:
663 break
664
665 # sort into timestamp order
666 warnings.sort()
667 return warnings
668
669
jadmanski16a7ff72009-04-01 18:19:53 +0000670 def disable_warnings(self, warning_type):
jadmanskif37df842009-02-11 00:03:26 +0000671 self.warning_manager.disable_warnings(warning_type)
jadmanski16a7ff72009-04-01 18:19:53 +0000672 self.record("INFO", None, None,
673 "disabling %s warnings" % warning_type,
674 {"warnings.disable": warning_type})
675 time.sleep(self.WARNING_DISABLE_DELAY)
jadmanskif37df842009-02-11 00:03:26 +0000676
677
jadmanski16a7ff72009-04-01 18:19:53 +0000678 def enable_warnings(self, warning_type):
679 time.sleep(self.WARNING_DISABLE_DELAY)
jadmanskif37df842009-02-11 00:03:26 +0000680 self.warning_manager.enable_warnings(warning_type)
jadmanski16a7ff72009-04-01 18:19:53 +0000681 self.record("INFO", None, None,
682 "enabling %s warnings" % warning_type,
683 {"warnings.enable": warning_type})
jadmanskif37df842009-02-11 00:03:26 +0000684
685
jadmanski779bd292009-03-19 17:33:33 +0000686 def get_status_log_path(self, subdir=None):
687 """Return the path to the job status log.
688
689 @param subdir - Optional paramter indicating that you want the path
690 to a subdirectory status log.
691
692 @returns The path where the status log should be.
693 """
mbligh210bae62009-04-01 18:33:13 +0000694 if self.resultdir:
695 if subdir:
696 return os.path.join(self.resultdir, subdir, "status.log")
697 else:
698 return os.path.join(self.resultdir, "status.log")
jadmanski779bd292009-03-19 17:33:33 +0000699 else:
mbligh210bae62009-04-01 18:33:13 +0000700 return None
jadmanski779bd292009-03-19 17:33:33 +0000701
702
jadmanski6bb32d72009-03-19 20:25:24 +0000703 def _update_uncollected_logs_list(self, update_func):
704 """Updates the uncollected logs list in a multi-process safe manner.
705
706 @param update_func - a function that updates the list of uncollected
707 logs. Should take one parameter, the list to be updated.
708 """
mbligha788dc42009-03-26 21:10:16 +0000709 if self.uncollected_log_file:
710 log_file = open(self.uncollected_log_file, "r+")
711 fcntl.flock(log_file, fcntl.LOCK_EX)
jadmanski6bb32d72009-03-19 20:25:24 +0000712 try:
713 uncollected_logs = pickle.load(log_file)
714 update_func(uncollected_logs)
715 log_file.seek(0)
716 log_file.truncate()
717 pickle.dump(uncollected_logs, log_file)
718 finally:
719 fcntl.flock(log_file, fcntl.LOCK_UN)
720 log_file.close()
721
722
723 def add_client_log(self, hostname, remote_path, local_path):
724 """Adds a new set of client logs to the list of uncollected logs,
725 to allow for future log recovery.
726
727 @param host - the hostname of the machine holding the logs
728 @param remote_path - the directory on the remote machine holding logs
729 @param local_path - the local directory to copy the logs into
730 """
731 def update_func(logs_list):
732 logs_list.append((hostname, remote_path, local_path))
733 self._update_uncollected_logs_list(update_func)
734
735
736 def remove_client_log(self, hostname, remote_path, local_path):
737 """Removes a set of client logs from the list of uncollected logs,
738 to allow for future log recovery.
739
740 @param host - the hostname of the machine holding the logs
741 @param remote_path - the directory on the remote machine holding logs
742 @param local_path - the local directory to copy the logs into
743 """
744 def update_func(logs_list):
745 logs_list.remove((hostname, remote_path, local_path))
746 self._update_uncollected_logs_list(update_func)
747
748
jadmanski10646442008-08-13 14:05:21 +0000749 def _render_record(self, status_code, subdir, operation, status='',
750 epoch_time=None, record_prefix=None,
751 optional_fields=None):
752 """
753 Internal Function to generate a record to be written into a
754 status log. For use by server_job.* classes only.
755 """
756 if subdir:
757 if re.match(r'[\n\t]', subdir):
mbligh2b92b862008-11-22 13:25:32 +0000758 raise ValueError('Invalid character in subdir string')
jadmanski10646442008-08-13 14:05:21 +0000759 substr = subdir
760 else:
761 substr = '----'
762
mbligh1b3b3762008-09-25 02:46:34 +0000763 if not log.is_valid_status(status_code):
mbligh2b92b862008-11-22 13:25:32 +0000764 raise ValueError('Invalid status code supplied: %s' % status_code)
jadmanski10646442008-08-13 14:05:21 +0000765 if not operation:
766 operation = '----'
767 if re.match(r'[\n\t]', operation):
mbligh2b92b862008-11-22 13:25:32 +0000768 raise ValueError('Invalid character in operation string')
jadmanski10646442008-08-13 14:05:21 +0000769 operation = operation.rstrip()
770 status = status.rstrip()
771 status = re.sub(r"\t", " ", status)
772 # Ensure any continuation lines are marked so we can
773 # detect them in the status file to ensure it is parsable.
774 status = re.sub(r"\n", "\n" + self.record_prefix + " ", status)
775
776 if not optional_fields:
777 optional_fields = {}
778
779 # Generate timestamps for inclusion in the logs
780 if epoch_time is None:
781 epoch_time = int(time.time())
782 local_time = time.localtime(epoch_time)
783 optional_fields["timestamp"] = str(epoch_time)
784 optional_fields["localtime"] = time.strftime("%b %d %H:%M:%S",
785 local_time)
786
787 fields = [status_code, substr, operation]
788 fields += ["%s=%s" % x for x in optional_fields.iteritems()]
789 fields.append(status)
790
791 if record_prefix is None:
792 record_prefix = self.record_prefix
793
794 msg = '\t'.join(str(x) for x in fields)
jadmanski10646442008-08-13 14:05:21 +0000795 return record_prefix + msg + '\n'
796
797
798 def _record_prerendered(self, msg):
799 """
800 Record a pre-rendered msg into the status logs. The only
801 change this makes to the message is to add on the local
802 indentation. Should not be called outside of server_job.*
803 classes. Unlike _record, this does not write the message
804 to standard output.
805 """
806 lines = []
jadmanski779bd292009-03-19 17:33:33 +0000807 status_file = self.get_status_log_path()
jadmanski10646442008-08-13 14:05:21 +0000808 status_log = open(status_file, 'a')
809 for line in msg.splitlines():
810 line = self.record_prefix + line + '\n'
811 lines.append(line)
812 status_log.write(line)
813 status_log.close()
814 self.__parse_status(lines)
815
816
mbligh084bc172008-10-18 14:02:45 +0000817 def _fill_server_control_namespace(self, namespace, protect=True):
mbligh2b92b862008-11-22 13:25:32 +0000818 """
819 Prepare a namespace to be used when executing server control files.
mbligh084bc172008-10-18 14:02:45 +0000820
821 This sets up the control file API by importing modules and making them
822 available under the appropriate names within namespace.
823
824 For use by _execute_code().
825
826 Args:
827 namespace: The namespace dictionary to fill in.
828 protect: Boolean. If True (the default) any operation that would
829 clobber an existing entry in namespace will cause an error.
830 Raises:
831 error.AutoservError: When a name would be clobbered by import.
832 """
833 def _import_names(module_name, names=()):
mbligh2b92b862008-11-22 13:25:32 +0000834 """
835 Import a module and assign named attributes into namespace.
mbligh084bc172008-10-18 14:02:45 +0000836
837 Args:
838 module_name: The string module name.
839 names: A limiting list of names to import from module_name. If
840 empty (the default), all names are imported from the module
841 similar to a "from foo.bar import *" statement.
842 Raises:
843 error.AutoservError: When a name being imported would clobber
844 a name already in namespace.
845 """
846 module = __import__(module_name, {}, {}, names)
847
848 # No names supplied? Import * from the lowest level module.
849 # (Ugh, why do I have to implement this part myself?)
850 if not names:
851 for submodule_name in module_name.split('.')[1:]:
852 module = getattr(module, submodule_name)
853 if hasattr(module, '__all__'):
854 names = getattr(module, '__all__')
855 else:
856 names = dir(module)
857
858 # Install each name into namespace, checking to make sure it
859 # doesn't override anything that already exists.
860 for name in names:
861 # Check for conflicts to help prevent future problems.
862 if name in namespace and protect:
863 if namespace[name] is not getattr(module, name):
864 raise error.AutoservError('importing name '
865 '%s from %s %r would override %r' %
866 (name, module_name, getattr(module, name),
867 namespace[name]))
868 else:
869 # Encourage cleanliness and the use of __all__ for a
870 # more concrete API with less surprises on '*' imports.
871 warnings.warn('%s (%r) being imported from %s for use '
872 'in server control files is not the '
873 'first occurrance of that import.' %
874 (name, namespace[name], module_name))
875
876 namespace[name] = getattr(module, name)
877
878
879 # This is the equivalent of prepending a bunch of import statements to
880 # the front of the control script.
881 namespace.update(os=os, sys=sys)
882 _import_names('autotest_lib.server',
883 ('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',
884 'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))
885 _import_names('autotest_lib.server.subcommand',
886 ('parallel', 'parallel_simple', 'subcommand'))
887 _import_names('autotest_lib.server.utils',
888 ('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))
889 _import_names('autotest_lib.client.common_lib.error')
890 _import_names('autotest_lib.client.common_lib.barrier', ('barrier',))
891
892 # Inject ourself as the job object into other classes within the API.
893 # (Yuck, this injection is a gross thing be part of a public API. -gps)
894 #
895 # XXX Base & SiteAutotest do not appear to use .job. Who does?
896 namespace['autotest'].Autotest.job = self
897 # server.hosts.base_classes.Host uses .job.
898 namespace['hosts'].Host.job = self
899
900
901 def _execute_code(self, code_file, namespace, protect=True):
mbligh2b92b862008-11-22 13:25:32 +0000902 """
903 Execute code using a copy of namespace as a server control script.
mbligh084bc172008-10-18 14:02:45 +0000904
905 Unless protect_namespace is explicitly set to False, the dict will not
906 be modified.
907
908 Args:
909 code_file: The filename of the control file to execute.
910 namespace: A dict containing names to make available during execution.
911 protect: Boolean. If True (the default) a copy of the namespace dict
912 is used during execution to prevent the code from modifying its
913 contents outside of this function. If False the raw dict is
914 passed in and modifications will be allowed.
915 """
916 if protect:
917 namespace = namespace.copy()
918 self._fill_server_control_namespace(namespace, protect=protect)
919 # TODO: Simplify and get rid of the special cases for only 1 machine.
showard3e66e8c2008-10-27 19:20:51 +0000920 if len(self.machines) > 1:
mbligh084bc172008-10-18 14:02:45 +0000921 machines_text = '\n'.join(self.machines) + '\n'
922 # Only rewrite the file if it does not match our machine list.
923 try:
924 machines_f = open(MACHINES_FILENAME, 'r')
925 existing_machines_text = machines_f.read()
926 machines_f.close()
927 except EnvironmentError:
928 existing_machines_text = None
929 if machines_text != existing_machines_text:
930 utils.open_write_close(MACHINES_FILENAME, machines_text)
931 execfile(code_file, namespace, namespace)
jadmanski10646442008-08-13 14:05:21 +0000932
933
934 def _record(self, status_code, subdir, operation, status='',
935 epoch_time=None, optional_fields=None):
936 """
937 Actual function for recording a single line into the status
938 logs. Should never be called directly, only by job.record as
939 this would bypass the console monitor logging.
940 """
941
mbligh2b92b862008-11-22 13:25:32 +0000942 msg = self._render_record(status_code, subdir, operation, status,
943 epoch_time, optional_fields=optional_fields)
jadmanski10646442008-08-13 14:05:21 +0000944
jadmanski779bd292009-03-19 17:33:33 +0000945 status_file = self.get_status_log_path()
jadmanski10646442008-08-13 14:05:21 +0000946 sys.stdout.write(msg)
mbligh210bae62009-04-01 18:33:13 +0000947 if status_file:
948 open(status_file, "a").write(msg)
jadmanski10646442008-08-13 14:05:21 +0000949 if subdir:
jadmanski779bd292009-03-19 17:33:33 +0000950 sub_status_file = self.get_status_log_path(subdir)
951 open(sub_status_file, "a").write(msg)
jadmanski10646442008-08-13 14:05:21 +0000952 self.__parse_status(msg.splitlines())
953
954
955 def __parse_status(self, new_lines):
956 if not self.using_parser:
957 return
958 new_tests = self.parser.process_lines(new_lines)
959 for test in new_tests:
960 self.__insert_test(test)
961
962
963 def __insert_test(self, test):
mbligh2b92b862008-11-22 13:25:32 +0000964 """
965 An internal method to insert a new test result into the
jadmanski10646442008-08-13 14:05:21 +0000966 database. This method will not raise an exception, even if an
967 error occurs during the insert, to avoid failing a test
968 simply because of unexpected database issues."""
showard21baa452008-10-21 00:08:39 +0000969 self.num_tests_run += 1
970 if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):
971 self.num_tests_failed += 1
jadmanski10646442008-08-13 14:05:21 +0000972 try:
973 self.results_db.insert_test(self.job_model, test)
974 except Exception:
975 msg = ("WARNING: An unexpected error occured while "
976 "inserting test results into the database. "
977 "Ignoring error.\n" + traceback.format_exc())
978 print >> sys.stderr, msg
979
mblighcaa62c22008-04-07 21:51:17 +0000980
mbligha7007722009-01-13 00:37:11 +0000981site_server_job = utils.import_site_class(
982 __file__, "autotest_lib.server.site_server_job", "site_server_job",
983 base_server_job)
jadmanski0afbb632008-06-06 21:10:57 +0000984
jadmanski10646442008-08-13 14:05:21 +0000985class server_job(site_server_job, base_server_job):
jadmanski0afbb632008-06-06 21:10:57 +0000986 pass
jadmanskif37df842009-02-11 00:03:26 +0000987
988
989class warning_manager(object):
990 """Class for controlling warning logs. Manages the enabling and disabling
991 of warnings."""
992 def __init__(self):
993 # a map of warning types to a list of disabled time intervals
994 self.disabled_warnings = {}
995
996
997 def is_valid(self, timestamp, warning_type):
998 """Indicates if a warning (based on the time it occured and its type)
999 is a valid warning. A warning is considered "invalid" if this type of
1000 warning was marked as "disabled" at the time the warning occured."""
1001 disabled_intervals = self.disabled_warnings.get(warning_type, [])
1002 for start, end in disabled_intervals:
1003 if timestamp >= start and (end is None or timestamp < end):
1004 return False
1005 return True
1006
1007
1008 def disable_warnings(self, warning_type, current_time_func=time.time):
1009 """As of now, disables all further warnings of this type."""
1010 intervals = self.disabled_warnings.setdefault(warning_type, [])
1011 if not intervals or intervals[-1][1] is not None:
jadmanski16a7ff72009-04-01 18:19:53 +00001012 intervals.append((int(current_time_func()), None))
jadmanskif37df842009-02-11 00:03:26 +00001013
1014
1015 def enable_warnings(self, warning_type, current_time_func=time.time):
1016 """As of now, enables all further warnings of this type."""
1017 intervals = self.disabled_warnings.get(warning_type, [])
1018 if intervals and intervals[-1][1] is None:
jadmanski16a7ff72009-04-01 18:19:53 +00001019 intervals[-1] = (intervals[-1][0], int(current_time_func()))