blob: 98e4ed199d796e2246754201592ba430c0cab93f [file] [log] [blame]
mbligh57e78662008-06-17 19:53:49 +00001"""
2The main job wrapper for the server side.
3
4This is the core infrastructure. Derived from the client side job.py
5
6Copyright Martin J. Bligh, Andy Whitcroft 2007
7"""
8
jadmanski6bb32d72009-03-19 20:25:24 +00009import getpass, os, sys, re, stat, tempfile, time, select, subprocess
showardb18134f2009-03-20 20:52:18 +000010import traceback, shutil, warnings, fcntl, pickle, logging, logging.config
jadmanskic09fc152008-10-15 17:56:59 +000011from autotest_lib.client.bin import fd_stack, sysinfo
mbligh09108442008-10-15 16:27:38 +000012from autotest_lib.client.common_lib import error, log, utils, packages
jadmanski043e1132008-11-19 17:10:32 +000013from autotest_lib.server import test, subcommand, profilers
jadmanski10646442008-08-13 14:05:21 +000014from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils
jadmanski10646442008-08-13 14:05:21 +000015
16
mbligh084bc172008-10-18 14:02:45 +000017def _control_segment_path(name):
18 """Get the pathname of the named control segment file."""
jadmanski10646442008-08-13 14:05:21 +000019 server_dir = os.path.dirname(os.path.abspath(__file__))
mbligh084bc172008-10-18 14:02:45 +000020 return os.path.join(server_dir, "control_segments", name)
jadmanski10646442008-08-13 14:05:21 +000021
22
mbligh084bc172008-10-18 14:02:45 +000023CLIENT_CONTROL_FILENAME = 'control'
24SERVER_CONTROL_FILENAME = 'control.srv'
25MACHINES_FILENAME = '.machines'
jadmanski10646442008-08-13 14:05:21 +000026
mbligh084bc172008-10-18 14:02:45 +000027CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')
28CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')
29CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')
mbligh084bc172008-10-18 14:02:45 +000030INSTALL_CONTROL_FILE = _control_segment_path('install')
showard45ae8192008-11-05 19:32:53 +000031CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')
jadmanski10646442008-08-13 14:05:21 +000032
mbligh084bc172008-10-18 14:02:45 +000033VERIFY_CONTROL_FILE = _control_segment_path('verify')
mbligh084bc172008-10-18 14:02:45 +000034REPAIR_CONTROL_FILE = _control_segment_path('repair')
jadmanski10646442008-08-13 14:05:21 +000035
36
mbligh062ed152009-01-13 00:57:14 +000037# by default provide a stub that generates no site data
38def _get_site_job_data_dummy(job):
39 return {}
40
41
jadmanski10646442008-08-13 14:05:21 +000042# load up site-specific code for generating site-specific job data
mbligh062ed152009-01-13 00:57:14 +000043get_site_job_data = utils.import_site_function(__file__,
jadmanskic0a623d2009-03-03 21:11:48 +000044 "autotest_lib.server.site_server_job", "get_site_job_data",
mbligh062ed152009-01-13 00:57:14 +000045 _get_site_job_data_dummy)
jadmanski10646442008-08-13 14:05:21 +000046
47
48class base_server_job(object):
mbligh2b92b862008-11-22 13:25:32 +000049 """
50 The actual job against which we do everything.
jadmanski10646442008-08-13 14:05:21 +000051
52 Properties:
53 autodir
54 The top level autotest directory (/usr/local/autotest).
55 serverdir
56 <autodir>/server/
57 clientdir
58 <autodir>/client/
59 conmuxdir
60 <autodir>/conmux/
61 testdir
62 <autodir>/server/tests/
63 site_testdir
64 <autodir>/server/site_tests/
65 control
66 the control file for this job
mblighb5dac432008-11-27 00:38:44 +000067 drop_caches_between_iterations
68 drop the pagecache between each iteration
jadmanski10646442008-08-13 14:05:21 +000069 """
70
71 STATUS_VERSION = 1
72
73
74 def __init__(self, control, args, resultdir, label, user, machines,
75 client=False, parse_job='',
76 ssh_user='root', ssh_port=22, ssh_pass=''):
77 """
mblighb5dac432008-11-27 00:38:44 +000078 Server side job object.
79
80 Parameters:
81 control: The control file (pathname of)
82 args: args to pass to the control file
83 resultdir: where to throw the results
84 label: label for the job
85 user: Username for the job (email address)
86 client: True if a client-side control file
jadmanski10646442008-08-13 14:05:21 +000087 """
88 path = os.path.dirname(__file__)
89 self.autodir = os.path.abspath(os.path.join(path, '..'))
90 self.serverdir = os.path.join(self.autodir, 'server')
91 self.testdir = os.path.join(self.serverdir, 'tests')
92 self.site_testdir = os.path.join(self.serverdir, 'site_tests')
93 self.tmpdir = os.path.join(self.serverdir, 'tmp')
94 self.conmuxdir = os.path.join(self.autodir, 'conmux')
95 self.clientdir = os.path.join(self.autodir, 'client')
96 self.toolsdir = os.path.join(self.autodir, 'client/tools')
97 if control:
jadmanskie432dd22009-01-30 15:04:51 +000098 self.control = self._load_control_file(control)
jadmanski10646442008-08-13 14:05:21 +000099 else:
showard45ae8192008-11-05 19:32:53 +0000100 self.control = ''
jadmanski10646442008-08-13 14:05:21 +0000101 self.resultdir = resultdir
jadmanski6bb32d72009-03-19 20:25:24 +0000102 self.uncollected_log_file = os.path.join(resultdir, "uncollected_logs")
103 self.debugdir = os.path.join(resultdir, 'debug')
mbligh80e1eba2008-11-19 00:26:18 +0000104 if resultdir:
105 if not os.path.exists(resultdir):
106 os.mkdir(resultdir)
jadmanski6bb32d72009-03-19 20:25:24 +0000107 log_file = open(self.uncollected_log_file, "w")
108 pickle.dump([], log_file)
109 log_file.close()
mbligh80e1eba2008-11-19 00:26:18 +0000110 if not os.path.exists(self.debugdir):
111 os.mkdir(self.debugdir)
jadmanski779bd292009-03-19 17:33:33 +0000112 status_log = self.get_status_log_path()
113 if os.path.exists(status_log):
114 os.remove(status_log)
jadmanski10646442008-08-13 14:05:21 +0000115 self.label = label
116 self.user = user
117 self.args = args
118 self.machines = machines
119 self.client = client
120 self.record_prefix = ''
121 self.warning_loggers = set()
jadmanskif37df842009-02-11 00:03:26 +0000122 self.warning_manager = warning_manager()
jadmanski10646442008-08-13 14:05:21 +0000123 self.ssh_user = ssh_user
124 self.ssh_port = ssh_port
125 self.ssh_pass = ssh_pass
jadmanski23afbec2008-09-17 18:12:07 +0000126 self.run_test_cleanup = True
mbligh09108442008-10-15 16:27:38 +0000127 self.last_boot_tag = None
jadmanski53aaf382008-11-17 16:22:31 +0000128 self.hosts = set()
mblighb5dac432008-11-27 00:38:44 +0000129 self.drop_caches_between_iterations = False
jadmanski10646442008-08-13 14:05:21 +0000130
131 self.stdout = fd_stack.fd_stack(1, sys.stdout)
132 self.stderr = fd_stack.fd_stack(2, sys.stderr)
133
mbligh80e1eba2008-11-19 00:26:18 +0000134 if resultdir:
135 self.sysinfo = sysinfo.sysinfo(self.resultdir)
jadmanski043e1132008-11-19 17:10:32 +0000136 self.profilers = profilers.profilers(self)
jadmanskic09fc152008-10-15 17:56:59 +0000137
jadmanski025099d2008-09-23 14:13:48 +0000138 if not os.access(self.tmpdir, os.W_OK):
139 try:
140 os.makedirs(self.tmpdir, 0700)
141 except os.error, e:
142 # Thrown if the directory already exists, which it may.
143 pass
144
mbligh2b92b862008-11-22 13:25:32 +0000145 if not (os.access(self.tmpdir, os.W_OK) and os.path.isdir(self.tmpdir)):
jadmanski025099d2008-09-23 14:13:48 +0000146 self.tmpdir = os.path.join(tempfile.gettempdir(),
147 'autotest-' + getpass.getuser())
148 try:
149 os.makedirs(self.tmpdir, 0700)
150 except os.error, e:
151 # Thrown if the directory already exists, which it may.
152 # If the problem was something other than the
153 # directory already existing, this chmod should throw as well
154 # exception.
155 os.chmod(self.tmpdir, stat.S_IRWXU)
156
jadmanski10646442008-08-13 14:05:21 +0000157 job_data = {'label' : label, 'user' : user,
158 'hostname' : ','.join(machines),
showard170873e2009-01-07 00:22:26 +0000159 'status_version' : str(self.STATUS_VERSION),
160 'job_started' : str(int(time.time()))}
mbligh80e1eba2008-11-19 00:26:18 +0000161 if self.resultdir:
162 job_data.update(get_site_job_data(self))
163 utils.write_keyval(self.resultdir, job_data)
jadmanski10646442008-08-13 14:05:21 +0000164
165 self.parse_job = parse_job
166 if self.parse_job and len(machines) == 1:
167 self.using_parser = True
168 self.init_parser(resultdir)
169 else:
170 self.using_parser = False
mbligh2b92b862008-11-22 13:25:32 +0000171 self.pkgmgr = packages.PackageManager(self.autodir,
172 run_function_dargs={'timeout':600})
jadmanski10646442008-08-13 14:05:21 +0000173 self.pkgdir = os.path.join(self.autodir, 'packages')
174
showard21baa452008-10-21 00:08:39 +0000175 self.num_tests_run = 0
176 self.num_tests_failed = 0
177
jadmanski550fdc22008-11-20 16:32:08 +0000178 self._register_subcommand_hooks()
179
180
jadmanskie432dd22009-01-30 15:04:51 +0000181 @staticmethod
182 def _load_control_file(path):
183 f = open(path)
184 try:
185 control_file = f.read()
186 finally:
187 f.close()
188 return re.sub('\r', '', control_file)
189
190
jadmanski550fdc22008-11-20 16:32:08 +0000191 def _register_subcommand_hooks(self):
mbligh2b92b862008-11-22 13:25:32 +0000192 """
193 Register some hooks into the subcommand modules that allow us
194 to properly clean up self.hosts created in forked subprocesses.
195 """
jadmanski550fdc22008-11-20 16:32:08 +0000196 def on_fork(cmd):
197 self._existing_hosts_on_fork = set(self.hosts)
198 def on_join(cmd):
199 new_hosts = self.hosts - self._existing_hosts_on_fork
200 for host in new_hosts:
201 host.close()
202 subcommand.subcommand.register_fork_hook(on_fork)
203 subcommand.subcommand.register_join_hook(on_join)
204
jadmanski10646442008-08-13 14:05:21 +0000205
206 def init_parser(self, resultdir):
mbligh2b92b862008-11-22 13:25:32 +0000207 """
208 Start the continuous parsing of resultdir. This sets up
jadmanski10646442008-08-13 14:05:21 +0000209 the database connection and inserts the basic job object into
mbligh2b92b862008-11-22 13:25:32 +0000210 the database if necessary.
211 """
jadmanski10646442008-08-13 14:05:21 +0000212 # redirect parser debugging to .parse.log
213 parse_log = os.path.join(resultdir, '.parse.log')
214 parse_log = open(parse_log, 'w', 0)
215 tko_utils.redirect_parser_debugging(parse_log)
216 # create a job model object and set up the db
217 self.results_db = tko_db.db(autocommit=True)
218 self.parser = status_lib.parser(self.STATUS_VERSION)
219 self.job_model = self.parser.make_job(resultdir)
220 self.parser.start(self.job_model)
221 # check if a job already exists in the db and insert it if
222 # it does not
223 job_idx = self.results_db.find_job(self.parse_job)
224 if job_idx is None:
mbligh2b92b862008-11-22 13:25:32 +0000225 self.results_db.insert_job(self.parse_job, self.job_model)
jadmanski10646442008-08-13 14:05:21 +0000226 else:
mbligh2b92b862008-11-22 13:25:32 +0000227 machine_idx = self.results_db.lookup_machine(self.job_model.machine)
jadmanski10646442008-08-13 14:05:21 +0000228 self.job_model.index = job_idx
229 self.job_model.machine_idx = machine_idx
230
231
232 def cleanup_parser(self):
mbligh2b92b862008-11-22 13:25:32 +0000233 """
234 This should be called after the server job is finished
jadmanski10646442008-08-13 14:05:21 +0000235 to carry out any remaining cleanup (e.g. flushing any
mbligh2b92b862008-11-22 13:25:32 +0000236 remaining test results to the results db)
237 """
jadmanski10646442008-08-13 14:05:21 +0000238 if not self.using_parser:
239 return
240 final_tests = self.parser.end()
241 for test in final_tests:
242 self.__insert_test(test)
243 self.using_parser = False
244
245
246 def verify(self):
247 if not self.machines:
mbligh084bc172008-10-18 14:02:45 +0000248 raise error.AutoservError('No machines specified to verify')
mbligh0fce4112008-11-27 00:37:17 +0000249 if self.resultdir:
250 os.chdir(self.resultdir)
jadmanski10646442008-08-13 14:05:21 +0000251 try:
jadmanskicdd0c402008-09-19 21:21:31 +0000252 namespace = {'machines' : self.machines, 'job' : self,
253 'ssh_user' : self.ssh_user,
254 'ssh_port' : self.ssh_port,
255 'ssh_pass' : self.ssh_pass}
mbligh084bc172008-10-18 14:02:45 +0000256 self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)
jadmanski10646442008-08-13 14:05:21 +0000257 except Exception, e:
mbligh2b92b862008-11-22 13:25:32 +0000258 msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())
jadmanski10646442008-08-13 14:05:21 +0000259 self.record('ABORT', None, None, msg)
260 raise
261
262
263 def repair(self, host_protection):
264 if not self.machines:
265 raise error.AutoservError('No machines specified to repair')
mbligh0fce4112008-11-27 00:37:17 +0000266 if self.resultdir:
267 os.chdir(self.resultdir)
jadmanski10646442008-08-13 14:05:21 +0000268 namespace = {'machines': self.machines, 'job': self,
269 'ssh_user': self.ssh_user, 'ssh_port': self.ssh_port,
270 'ssh_pass': self.ssh_pass,
271 'protection_level': host_protection}
mbligh25c0b8c2009-01-24 01:44:17 +0000272 # no matter what happens during repair (except if it succeeded in
273 # initiating hardware repair procedure), go on to try to reverify
jadmanski10646442008-08-13 14:05:21 +0000274 try:
mbligh2b92b862008-11-22 13:25:32 +0000275 self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)
mbligh25c0b8c2009-01-24 01:44:17 +0000276 except error.AutoservHardwareRepairRequestedError:
277 raise
jadmanski10646442008-08-13 14:05:21 +0000278 except Exception, exc:
279 print 'Exception occured during repair'
280 traceback.print_exc()
mbligh25c0b8c2009-01-24 01:44:17 +0000281
jadmanski10646442008-08-13 14:05:21 +0000282 self.verify()
283
284
285 def precheck(self):
286 """
287 perform any additional checks in derived classes.
288 """
289 pass
290
291
292 def enable_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000293 """
294 Start or restart external logging mechanism.
jadmanski10646442008-08-13 14:05:21 +0000295 """
296 pass
297
298
299 def disable_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000300 """
301 Pause or stop external logging mechanism.
jadmanski10646442008-08-13 14:05:21 +0000302 """
303 pass
304
305
jadmanski23afbec2008-09-17 18:12:07 +0000306 def enable_test_cleanup(self):
mbligh2b92b862008-11-22 13:25:32 +0000307 """
308 By default tests run test.cleanup
309 """
jadmanski23afbec2008-09-17 18:12:07 +0000310 self.run_test_cleanup = True
311
312
313 def disable_test_cleanup(self):
mbligh2b92b862008-11-22 13:25:32 +0000314 """
315 By default tests do not run test.cleanup
316 """
jadmanski23afbec2008-09-17 18:12:07 +0000317 self.run_test_cleanup = False
318
319
jadmanski10646442008-08-13 14:05:21 +0000320 def use_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000321 """
322 Return True if external logging should be used.
jadmanski10646442008-08-13 14:05:21 +0000323 """
324 return False
325
326
327 def parallel_simple(self, function, machines, log=True, timeout=None):
mbligh2b92b862008-11-22 13:25:32 +0000328 """
329 Run 'function' using parallel_simple, with an extra wrapper to handle
330 the necessary setup for continuous parsing, if possible. If continuous
331 parsing is already properly initialized then this should just work.
332 """
333 is_forking = not (len(machines) == 1 and self.machines == machines)
jadmanski4dd1a002008-09-05 20:27:30 +0000334 if self.parse_job and is_forking and log:
jadmanski10646442008-08-13 14:05:21 +0000335 def wrapper(machine):
336 self.parse_job += "/" + machine
337 self.using_parser = True
338 self.machines = [machine]
mbligh2b92b862008-11-22 13:25:32 +0000339 self.resultdir = os.path.join(self.resultdir, machine)
jadmanski609a5f42008-08-26 20:52:42 +0000340 os.chdir(self.resultdir)
showard2bab8f42008-11-12 18:15:22 +0000341 utils.write_keyval(self.resultdir, {"hostname": machine})
jadmanski10646442008-08-13 14:05:21 +0000342 self.init_parser(self.resultdir)
343 result = function(machine)
344 self.cleanup_parser()
345 return result
jadmanski4dd1a002008-09-05 20:27:30 +0000346 elif len(machines) > 1 and log:
jadmanski10646442008-08-13 14:05:21 +0000347 def wrapper(machine):
348 self.resultdir = os.path.join(self.resultdir, machine)
jadmanski609a5f42008-08-26 20:52:42 +0000349 os.chdir(self.resultdir)
mbligh838d82d2009-03-11 17:14:31 +0000350 machine_data = {'hostname' : machine,
351 'status_version' : str(self.STATUS_VERSION)}
352 utils.write_keyval(self.resultdir, machine_data)
jadmanski10646442008-08-13 14:05:21 +0000353 result = function(machine)
354 return result
355 else:
356 wrapper = function
357 subcommand.parallel_simple(wrapper, machines, log, timeout)
358
359
jadmanskie432dd22009-01-30 15:04:51 +0000360 USE_TEMP_DIR = object()
mbligh2b92b862008-11-22 13:25:32 +0000361 def run(self, cleanup=False, install_before=False, install_after=False,
jadmanskie432dd22009-01-30 15:04:51 +0000362 collect_crashdumps=True, namespace={}, control=None,
363 control_file_dir=None):
jadmanski10646442008-08-13 14:05:21 +0000364 # use a copy so changes don't affect the original dictionary
365 namespace = namespace.copy()
366 machines = self.machines
jadmanskie432dd22009-01-30 15:04:51 +0000367 if control is None:
368 control = self.control
369 if control_file_dir is None:
370 control_file_dir = self.resultdir
jadmanski10646442008-08-13 14:05:21 +0000371
372 self.aborted = False
373 namespace['machines'] = machines
374 namespace['args'] = self.args
375 namespace['job'] = self
376 namespace['ssh_user'] = self.ssh_user
377 namespace['ssh_port'] = self.ssh_port
378 namespace['ssh_pass'] = self.ssh_pass
379 test_start_time = int(time.time())
380
mbligh80e1eba2008-11-19 00:26:18 +0000381 if self.resultdir:
382 os.chdir(self.resultdir)
jadmanski779bd292009-03-19 17:33:33 +0000383 # touch status.log so that the parser knows a job is running here
384 open(self.get_status_log_path(), 'w').close()
mbligh80e1eba2008-11-19 00:26:18 +0000385 self.enable_external_logging()
jadmanskie432dd22009-01-30 15:04:51 +0000386
jadmanskicdd0c402008-09-19 21:21:31 +0000387 collect_crashinfo = True
mblighaebe3b62008-12-22 14:45:40 +0000388 temp_control_file_dir = None
jadmanski10646442008-08-13 14:05:21 +0000389 try:
390 if install_before and machines:
mbligh084bc172008-10-18 14:02:45 +0000391 self._execute_code(INSTALL_CONTROL_FILE, namespace)
jadmanskie432dd22009-01-30 15:04:51 +0000392
393 # determine the dir to write the control files to
394 if control_file_dir and control_file_dir is not self.USE_TEMP_DIR:
395 temp_control_file_dir = None
mblighaebe3b62008-12-22 14:45:40 +0000396 else:
jadmanskie432dd22009-01-30 15:04:51 +0000397 temp_control_file_dir = control_file_dir = tempfile.mkdtemp(
398 suffix='temp_control_file_dir')
399 server_control_file = os.path.join(control_file_dir,
400 SERVER_CONTROL_FILENAME)
401 client_control_file = os.path.join(control_file_dir,
402 CLIENT_CONTROL_FILENAME)
jadmanski10646442008-08-13 14:05:21 +0000403 if self.client:
jadmanskie432dd22009-01-30 15:04:51 +0000404 namespace['control'] = control
405 utils.open_write_close(client_control_file, control)
mblighaebe3b62008-12-22 14:45:40 +0000406 shutil.copy(CLIENT_WRAPPER_CONTROL_FILE, server_control_file)
jadmanski10646442008-08-13 14:05:21 +0000407 else:
mbligh181b7c22008-11-22 14:22:08 +0000408 namespace['utils'] = utils
jadmanskie432dd22009-01-30 15:04:51 +0000409 utils.open_write_close(server_control_file, control)
mblighaebe3b62008-12-22 14:45:40 +0000410 self._execute_code(server_control_file, namespace)
jadmanski10646442008-08-13 14:05:21 +0000411
jadmanskicdd0c402008-09-19 21:21:31 +0000412 # disable crashinfo collection if we get this far without error
413 collect_crashinfo = False
jadmanski10646442008-08-13 14:05:21 +0000414 finally:
mblighaebe3b62008-12-22 14:45:40 +0000415 if temp_control_file_dir:
jadmanskie432dd22009-01-30 15:04:51 +0000416 # Clean up temp directory used for copies of the control files
mblighaebe3b62008-12-22 14:45:40 +0000417 try:
418 shutil.rmtree(temp_control_file_dir)
419 except Exception, e:
jadmanskie432dd22009-01-30 15:04:51 +0000420 print 'Error %s removing dir %s' % (e,
421 temp_control_file_dir)
422
jadmanskicdd0c402008-09-19 21:21:31 +0000423 if machines and (collect_crashdumps or collect_crashinfo):
jadmanski10646442008-08-13 14:05:21 +0000424 namespace['test_start_time'] = test_start_time
jadmanskicdd0c402008-09-19 21:21:31 +0000425 if collect_crashinfo:
mbligh084bc172008-10-18 14:02:45 +0000426 # includes crashdumps
427 self._execute_code(CRASHINFO_CONTROL_FILE, namespace)
jadmanskicdd0c402008-09-19 21:21:31 +0000428 else:
mbligh084bc172008-10-18 14:02:45 +0000429 self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)
jadmanski6bb32d72009-03-19 20:25:24 +0000430 os.remove(self.uncollected_log_file)
jadmanski10646442008-08-13 14:05:21 +0000431 self.disable_external_logging()
showard45ae8192008-11-05 19:32:53 +0000432 if cleanup and machines:
433 self._execute_code(CLEANUP_CONTROL_FILE, namespace)
jadmanski10646442008-08-13 14:05:21 +0000434 if install_after and machines:
mbligh084bc172008-10-18 14:02:45 +0000435 self._execute_code(INSTALL_CONTROL_FILE, namespace)
jadmanski10646442008-08-13 14:05:21 +0000436
437
438 def run_test(self, url, *args, **dargs):
mbligh2b92b862008-11-22 13:25:32 +0000439 """
440 Summon a test object and run it.
jadmanski10646442008-08-13 14:05:21 +0000441
442 tag
443 tag to add to testname
444 url
445 url of the test to run
446 """
447
448 (group, testname) = self.pkgmgr.get_package_name(url, 'test')
jadmanski10646442008-08-13 14:05:21 +0000449
450 tag = dargs.pop('tag', None)
451 if tag:
mbligh8ad24202009-01-07 16:49:36 +0000452 testname += '.' + str(tag)
jadmanskide292df2008-08-26 20:51:14 +0000453 subdir = testname
jadmanski10646442008-08-13 14:05:21 +0000454
455 outputdir = os.path.join(self.resultdir, subdir)
456 if os.path.exists(outputdir):
457 msg = ("%s already exists, test <%s> may have"
mbligh2b92b862008-11-22 13:25:32 +0000458 " already run with tag <%s>" % (outputdir, testname, tag))
jadmanski10646442008-08-13 14:05:21 +0000459 raise error.TestError(msg)
460 os.mkdir(outputdir)
461
462 def group_func():
463 try:
464 test.runtest(self, url, tag, args, dargs)
465 except error.TestBaseException, e:
466 self.record(e.exit_status, subdir, testname, str(e))
467 raise
468 except Exception, e:
469 info = str(e) + "\n" + traceback.format_exc()
470 self.record('FAIL', subdir, testname, info)
471 raise
472 else:
mbligh2b92b862008-11-22 13:25:32 +0000473 self.record('GOOD', subdir, testname, 'completed successfully')
jadmanskide292df2008-08-26 20:51:14 +0000474
475 result, exc_info = self._run_group(testname, subdir, group_func)
476 if exc_info and isinstance(exc_info[1], error.TestBaseException):
477 return False
478 elif exc_info:
479 raise exc_info[0], exc_info[1], exc_info[2]
480 else:
481 return True
jadmanski10646442008-08-13 14:05:21 +0000482
483
484 def _run_group(self, name, subdir, function, *args, **dargs):
485 """\
486 Underlying method for running something inside of a group.
487 """
jadmanskide292df2008-08-26 20:51:14 +0000488 result, exc_info = None, None
jadmanski10646442008-08-13 14:05:21 +0000489 old_record_prefix = self.record_prefix
490 try:
491 self.record('START', subdir, name)
492 self.record_prefix += '\t'
493 try:
494 result = function(*args, **dargs)
495 finally:
496 self.record_prefix = old_record_prefix
497 except error.TestBaseException, e:
jadmanskib88d6dc2009-01-10 00:33:18 +0000498 self.record("END %s" % e.exit_status, subdir, name)
jadmanskide292df2008-08-26 20:51:14 +0000499 exc_info = sys.exc_info()
jadmanski10646442008-08-13 14:05:21 +0000500 except Exception, e:
501 err_msg = str(e) + '\n'
502 err_msg += traceback.format_exc()
503 self.record('END ABORT', subdir, name, err_msg)
504 raise error.JobError(name + ' failed\n' + traceback.format_exc())
505 else:
506 self.record('END GOOD', subdir, name)
507
jadmanskide292df2008-08-26 20:51:14 +0000508 return result, exc_info
jadmanski10646442008-08-13 14:05:21 +0000509
510
511 def run_group(self, function, *args, **dargs):
512 """\
513 function:
514 subroutine to run
515 *args:
516 arguments for the function
517 """
518
519 name = function.__name__
520
521 # Allow the tag for the group to be specified.
522 tag = dargs.pop('tag', None)
523 if tag:
524 name = tag
525
jadmanskide292df2008-08-26 20:51:14 +0000526 return self._run_group(name, None, function, *args, **dargs)[0]
jadmanski10646442008-08-13 14:05:21 +0000527
528
529 def run_reboot(self, reboot_func, get_kernel_func):
530 """\
531 A specialization of run_group meant specifically for handling
532 a reboot. Includes support for capturing the kernel version
533 after the reboot.
534
535 reboot_func: a function that carries out the reboot
536
537 get_kernel_func: a function that returns a string
538 representing the kernel version.
539 """
540
541 old_record_prefix = self.record_prefix
542 try:
543 self.record('START', None, 'reboot')
544 self.record_prefix += '\t'
545 reboot_func()
546 except Exception, e:
547 self.record_prefix = old_record_prefix
548 err_msg = str(e) + '\n' + traceback.format_exc()
549 self.record('END FAIL', None, 'reboot', err_msg)
550 else:
551 kernel = get_kernel_func()
552 self.record_prefix = old_record_prefix
553 self.record('END GOOD', None, 'reboot',
554 optional_fields={"kernel": kernel})
555
556
jadmanskie432dd22009-01-30 15:04:51 +0000557 def run_control(self, path):
558 """Execute a control file found at path (relative to the autotest
559 path). Intended for executing a control file within a control file,
560 not for running the top-level job control file."""
561 path = os.path.join(self.autodir, path)
562 control_file = self._load_control_file(path)
563 self.run(control=control_file, control_file_dir=self.USE_TEMP_DIR)
564
565
jadmanskic09fc152008-10-15 17:56:59 +0000566 def add_sysinfo_command(self, command, logfile=None, on_every_test=False):
mbligh4395bbd2009-03-25 19:34:17 +0000567 self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),
jadmanskic09fc152008-10-15 17:56:59 +0000568 on_every_test)
569
570
571 def add_sysinfo_logfile(self, file, on_every_test=False):
572 self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)
573
574
575 def _add_sysinfo_loggable(self, loggable, on_every_test):
576 if on_every_test:
577 self.sysinfo.test_loggables.add(loggable)
578 else:
579 self.sysinfo.boot_loggables.add(loggable)
580
581
jadmanski10646442008-08-13 14:05:21 +0000582 def record(self, status_code, subdir, operation, status='',
583 optional_fields=None):
584 """
585 Record job-level status
586
587 The intent is to make this file both machine parseable and
588 human readable. That involves a little more complexity, but
589 really isn't all that bad ;-)
590
591 Format is <status code>\t<subdir>\t<operation>\t<status>
592
mbligh1b3b3762008-09-25 02:46:34 +0000593 status code: see common_lib.log.is_valid_status()
jadmanski10646442008-08-13 14:05:21 +0000594 for valid status definition
595
596 subdir: MUST be a relevant subdirectory in the results,
597 or None, which will be represented as '----'
598
599 operation: description of what you ran (e.g. "dbench", or
600 "mkfs -t foobar /dev/sda9")
601
602 status: error message or "completed sucessfully"
603
604 ------------------------------------------------------------
605
606 Initial tabs indicate indent levels for grouping, and is
607 governed by self.record_prefix
608
609 multiline messages have secondary lines prefaced by a double
610 space (' ')
611
612 Executing this method will trigger the logging of all new
613 warnings to date from the various console loggers.
614 """
615 # poll all our warning loggers for new warnings
616 warnings = self._read_warnings()
617 for timestamp, msg in warnings:
618 self._record("WARN", None, None, msg, timestamp)
619
620 # write out the actual status log line
621 self._record(status_code, subdir, operation, status,
622 optional_fields=optional_fields)
623
624
625 def _read_warnings(self):
jadmanskif37df842009-02-11 00:03:26 +0000626 """Poll all the warning loggers and extract any new warnings that have
627 been logged. If the warnings belong to a category that is currently
628 disabled, this method will discard them and they will no longer be
629 retrievable.
630
631 Returns a list of (timestamp, message) tuples, where timestamp is an
632 integer epoch timestamp."""
jadmanski10646442008-08-13 14:05:21 +0000633 warnings = []
634 while True:
635 # pull in a line of output from every logger that has
636 # output ready to be read
mbligh2b92b862008-11-22 13:25:32 +0000637 loggers, _, _ = select.select(self.warning_loggers, [], [], 0)
jadmanski10646442008-08-13 14:05:21 +0000638 closed_loggers = set()
639 for logger in loggers:
640 line = logger.readline()
641 # record any broken pipes (aka line == empty)
642 if len(line) == 0:
643 closed_loggers.add(logger)
644 continue
jadmanskif37df842009-02-11 00:03:26 +0000645 # parse out the warning
646 timestamp, msgtype, msg = line.split('\t', 2)
647 timestamp = int(timestamp)
648 # if the warning is valid, add it to the results
649 if self.warning_manager.is_valid(timestamp, msgtype):
650 warnings.append((timestamp, msg.strip()))
jadmanski10646442008-08-13 14:05:21 +0000651
652 # stop listening to loggers that are closed
653 self.warning_loggers -= closed_loggers
654
655 # stop if none of the loggers have any output left
656 if not loggers:
657 break
658
659 # sort into timestamp order
660 warnings.sort()
661 return warnings
662
663
jadmanskif37df842009-02-11 00:03:26 +0000664 def disable_warnings(self, warning_type, record=True):
665 self.warning_manager.disable_warnings(warning_type)
666 if record:
667 self.record("INFO", None, None,
668 "disabling %s warnings" % warning_type,
669 {"warnings.disable": warning_type})
670
671
672 def enable_warnings(self, warning_type, record=True):
673 self.warning_manager.enable_warnings(warning_type)
674 if record:
675 self.record("INFO", None, None,
676 "enabling %s warnings" % warning_type,
677 {"warnings.enable": warning_type})
678
679
jadmanski779bd292009-03-19 17:33:33 +0000680 def get_status_log_path(self, subdir=None):
681 """Return the path to the job status log.
682
683 @param subdir - Optional paramter indicating that you want the path
684 to a subdirectory status log.
685
686 @returns The path where the status log should be.
687 """
688 if subdir:
689 return os.path.join(self.resultdir, subdir, "status.log")
690 else:
691 return os.path.join(self.resultdir, "status.log")
692
693
jadmanski6bb32d72009-03-19 20:25:24 +0000694 def _update_uncollected_logs_list(self, update_func):
695 """Updates the uncollected logs list in a multi-process safe manner.
696
697 @param update_func - a function that updates the list of uncollected
698 logs. Should take one parameter, the list to be updated.
699 """
700 log_file = open(self.uncollected_log_file, "r+")
701 fcntl.flock(log_file, fcntl.LOCK_EX)
702 try:
703 uncollected_logs = pickle.load(log_file)
704 update_func(uncollected_logs)
705 log_file.seek(0)
706 log_file.truncate()
707 pickle.dump(uncollected_logs, log_file)
708 finally:
709 fcntl.flock(log_file, fcntl.LOCK_UN)
710 log_file.close()
711
712
713 def add_client_log(self, hostname, remote_path, local_path):
714 """Adds a new set of client logs to the list of uncollected logs,
715 to allow for future log recovery.
716
717 @param host - the hostname of the machine holding the logs
718 @param remote_path - the directory on the remote machine holding logs
719 @param local_path - the local directory to copy the logs into
720 """
721 def update_func(logs_list):
722 logs_list.append((hostname, remote_path, local_path))
723 self._update_uncollected_logs_list(update_func)
724
725
726 def remove_client_log(self, hostname, remote_path, local_path):
727 """Removes a set of client logs from the list of uncollected logs,
728 to allow for future log recovery.
729
730 @param host - the hostname of the machine holding the logs
731 @param remote_path - the directory on the remote machine holding logs
732 @param local_path - the local directory to copy the logs into
733 """
734 def update_func(logs_list):
735 logs_list.remove((hostname, remote_path, local_path))
736 self._update_uncollected_logs_list(update_func)
737
738
jadmanski10646442008-08-13 14:05:21 +0000739 def _render_record(self, status_code, subdir, operation, status='',
740 epoch_time=None, record_prefix=None,
741 optional_fields=None):
742 """
743 Internal Function to generate a record to be written into a
744 status log. For use by server_job.* classes only.
745 """
746 if subdir:
747 if re.match(r'[\n\t]', subdir):
mbligh2b92b862008-11-22 13:25:32 +0000748 raise ValueError('Invalid character in subdir string')
jadmanski10646442008-08-13 14:05:21 +0000749 substr = subdir
750 else:
751 substr = '----'
752
mbligh1b3b3762008-09-25 02:46:34 +0000753 if not log.is_valid_status(status_code):
mbligh2b92b862008-11-22 13:25:32 +0000754 raise ValueError('Invalid status code supplied: %s' % status_code)
jadmanski10646442008-08-13 14:05:21 +0000755 if not operation:
756 operation = '----'
757 if re.match(r'[\n\t]', operation):
mbligh2b92b862008-11-22 13:25:32 +0000758 raise ValueError('Invalid character in operation string')
jadmanski10646442008-08-13 14:05:21 +0000759 operation = operation.rstrip()
760 status = status.rstrip()
761 status = re.sub(r"\t", " ", status)
762 # Ensure any continuation lines are marked so we can
763 # detect them in the status file to ensure it is parsable.
764 status = re.sub(r"\n", "\n" + self.record_prefix + " ", status)
765
766 if not optional_fields:
767 optional_fields = {}
768
769 # Generate timestamps for inclusion in the logs
770 if epoch_time is None:
771 epoch_time = int(time.time())
772 local_time = time.localtime(epoch_time)
773 optional_fields["timestamp"] = str(epoch_time)
774 optional_fields["localtime"] = time.strftime("%b %d %H:%M:%S",
775 local_time)
776
777 fields = [status_code, substr, operation]
778 fields += ["%s=%s" % x for x in optional_fields.iteritems()]
779 fields.append(status)
780
781 if record_prefix is None:
782 record_prefix = self.record_prefix
783
784 msg = '\t'.join(str(x) for x in fields)
jadmanski10646442008-08-13 14:05:21 +0000785 return record_prefix + msg + '\n'
786
787
788 def _record_prerendered(self, msg):
789 """
790 Record a pre-rendered msg into the status logs. The only
791 change this makes to the message is to add on the local
792 indentation. Should not be called outside of server_job.*
793 classes. Unlike _record, this does not write the message
794 to standard output.
795 """
796 lines = []
jadmanski779bd292009-03-19 17:33:33 +0000797 status_file = self.get_status_log_path()
jadmanski10646442008-08-13 14:05:21 +0000798 status_log = open(status_file, 'a')
799 for line in msg.splitlines():
800 line = self.record_prefix + line + '\n'
801 lines.append(line)
802 status_log.write(line)
803 status_log.close()
804 self.__parse_status(lines)
805
806
mbligh084bc172008-10-18 14:02:45 +0000807 def _fill_server_control_namespace(self, namespace, protect=True):
mbligh2b92b862008-11-22 13:25:32 +0000808 """
809 Prepare a namespace to be used when executing server control files.
mbligh084bc172008-10-18 14:02:45 +0000810
811 This sets up the control file API by importing modules and making them
812 available under the appropriate names within namespace.
813
814 For use by _execute_code().
815
816 Args:
817 namespace: The namespace dictionary to fill in.
818 protect: Boolean. If True (the default) any operation that would
819 clobber an existing entry in namespace will cause an error.
820 Raises:
821 error.AutoservError: When a name would be clobbered by import.
822 """
823 def _import_names(module_name, names=()):
mbligh2b92b862008-11-22 13:25:32 +0000824 """
825 Import a module and assign named attributes into namespace.
mbligh084bc172008-10-18 14:02:45 +0000826
827 Args:
828 module_name: The string module name.
829 names: A limiting list of names to import from module_name. If
830 empty (the default), all names are imported from the module
831 similar to a "from foo.bar import *" statement.
832 Raises:
833 error.AutoservError: When a name being imported would clobber
834 a name already in namespace.
835 """
836 module = __import__(module_name, {}, {}, names)
837
838 # No names supplied? Import * from the lowest level module.
839 # (Ugh, why do I have to implement this part myself?)
840 if not names:
841 for submodule_name in module_name.split('.')[1:]:
842 module = getattr(module, submodule_name)
843 if hasattr(module, '__all__'):
844 names = getattr(module, '__all__')
845 else:
846 names = dir(module)
847
848 # Install each name into namespace, checking to make sure it
849 # doesn't override anything that already exists.
850 for name in names:
851 # Check for conflicts to help prevent future problems.
852 if name in namespace and protect:
853 if namespace[name] is not getattr(module, name):
854 raise error.AutoservError('importing name '
855 '%s from %s %r would override %r' %
856 (name, module_name, getattr(module, name),
857 namespace[name]))
858 else:
859 # Encourage cleanliness and the use of __all__ for a
860 # more concrete API with less surprises on '*' imports.
861 warnings.warn('%s (%r) being imported from %s for use '
862 'in server control files is not the '
863 'first occurrance of that import.' %
864 (name, namespace[name], module_name))
865
866 namespace[name] = getattr(module, name)
867
868
869 # This is the equivalent of prepending a bunch of import statements to
870 # the front of the control script.
871 namespace.update(os=os, sys=sys)
872 _import_names('autotest_lib.server',
873 ('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',
874 'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))
875 _import_names('autotest_lib.server.subcommand',
876 ('parallel', 'parallel_simple', 'subcommand'))
877 _import_names('autotest_lib.server.utils',
878 ('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))
879 _import_names('autotest_lib.client.common_lib.error')
880 _import_names('autotest_lib.client.common_lib.barrier', ('barrier',))
881
882 # Inject ourself as the job object into other classes within the API.
883 # (Yuck, this injection is a gross thing be part of a public API. -gps)
884 #
885 # XXX Base & SiteAutotest do not appear to use .job. Who does?
886 namespace['autotest'].Autotest.job = self
887 # server.hosts.base_classes.Host uses .job.
888 namespace['hosts'].Host.job = self
889
890
891 def _execute_code(self, code_file, namespace, protect=True):
mbligh2b92b862008-11-22 13:25:32 +0000892 """
893 Execute code using a copy of namespace as a server control script.
mbligh084bc172008-10-18 14:02:45 +0000894
895 Unless protect_namespace is explicitly set to False, the dict will not
896 be modified.
897
898 Args:
899 code_file: The filename of the control file to execute.
900 namespace: A dict containing names to make available during execution.
901 protect: Boolean. If True (the default) a copy of the namespace dict
902 is used during execution to prevent the code from modifying its
903 contents outside of this function. If False the raw dict is
904 passed in and modifications will be allowed.
905 """
906 if protect:
907 namespace = namespace.copy()
908 self._fill_server_control_namespace(namespace, protect=protect)
909 # TODO: Simplify and get rid of the special cases for only 1 machine.
showard3e66e8c2008-10-27 19:20:51 +0000910 if len(self.machines) > 1:
mbligh084bc172008-10-18 14:02:45 +0000911 machines_text = '\n'.join(self.machines) + '\n'
912 # Only rewrite the file if it does not match our machine list.
913 try:
914 machines_f = open(MACHINES_FILENAME, 'r')
915 existing_machines_text = machines_f.read()
916 machines_f.close()
917 except EnvironmentError:
918 existing_machines_text = None
919 if machines_text != existing_machines_text:
920 utils.open_write_close(MACHINES_FILENAME, machines_text)
921 execfile(code_file, namespace, namespace)
jadmanski10646442008-08-13 14:05:21 +0000922
923
924 def _record(self, status_code, subdir, operation, status='',
925 epoch_time=None, optional_fields=None):
926 """
927 Actual function for recording a single line into the status
928 logs. Should never be called directly, only by job.record as
929 this would bypass the console monitor logging.
930 """
931
mbligh2b92b862008-11-22 13:25:32 +0000932 msg = self._render_record(status_code, subdir, operation, status,
933 epoch_time, optional_fields=optional_fields)
jadmanski10646442008-08-13 14:05:21 +0000934
jadmanski779bd292009-03-19 17:33:33 +0000935 status_file = self.get_status_log_path()
jadmanski10646442008-08-13 14:05:21 +0000936 sys.stdout.write(msg)
937 open(status_file, "a").write(msg)
938 if subdir:
jadmanski779bd292009-03-19 17:33:33 +0000939 sub_status_file = self.get_status_log_path(subdir)
940 open(sub_status_file, "a").write(msg)
jadmanski10646442008-08-13 14:05:21 +0000941 self.__parse_status(msg.splitlines())
942
943
944 def __parse_status(self, new_lines):
945 if not self.using_parser:
946 return
947 new_tests = self.parser.process_lines(new_lines)
948 for test in new_tests:
949 self.__insert_test(test)
950
951
952 def __insert_test(self, test):
mbligh2b92b862008-11-22 13:25:32 +0000953 """
954 An internal method to insert a new test result into the
jadmanski10646442008-08-13 14:05:21 +0000955 database. This method will not raise an exception, even if an
956 error occurs during the insert, to avoid failing a test
957 simply because of unexpected database issues."""
showard21baa452008-10-21 00:08:39 +0000958 self.num_tests_run += 1
959 if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):
960 self.num_tests_failed += 1
jadmanski10646442008-08-13 14:05:21 +0000961 try:
962 self.results_db.insert_test(self.job_model, test)
963 except Exception:
964 msg = ("WARNING: An unexpected error occured while "
965 "inserting test results into the database. "
966 "Ignoring error.\n" + traceback.format_exc())
967 print >> sys.stderr, msg
968
mblighcaa62c22008-04-07 21:51:17 +0000969
mbligha7007722009-01-13 00:37:11 +0000970site_server_job = utils.import_site_class(
971 __file__, "autotest_lib.server.site_server_job", "site_server_job",
972 base_server_job)
jadmanski0afbb632008-06-06 21:10:57 +0000973
jadmanski10646442008-08-13 14:05:21 +0000974class server_job(site_server_job, base_server_job):
jadmanski0afbb632008-06-06 21:10:57 +0000975 pass
jadmanskif37df842009-02-11 00:03:26 +0000976
977
978class warning_manager(object):
979 """Class for controlling warning logs. Manages the enabling and disabling
980 of warnings."""
981 def __init__(self):
982 # a map of warning types to a list of disabled time intervals
983 self.disabled_warnings = {}
984
985
986 def is_valid(self, timestamp, warning_type):
987 """Indicates if a warning (based on the time it occured and its type)
988 is a valid warning. A warning is considered "invalid" if this type of
989 warning was marked as "disabled" at the time the warning occured."""
990 disabled_intervals = self.disabled_warnings.get(warning_type, [])
991 for start, end in disabled_intervals:
992 if timestamp >= start and (end is None or timestamp < end):
993 return False
994 return True
995
996
997 def disable_warnings(self, warning_type, current_time_func=time.time):
998 """As of now, disables all further warnings of this type."""
999 intervals = self.disabled_warnings.setdefault(warning_type, [])
1000 if not intervals or intervals[-1][1] is not None:
1001 intervals.append((current_time_func(), None))
1002
1003
1004 def enable_warnings(self, warning_type, current_time_func=time.time):
1005 """As of now, enables all further warnings of this type."""
1006 intervals = self.disabled_warnings.get(warning_type, [])
1007 if intervals and intervals[-1][1] is None:
1008 intervals[-1] = (intervals[-1][0], current_time_func())