blob: 05bd19fbdadff392fa8585be364b83cc9aa9c84f [file] [log] [blame]
mbligh57e78662008-06-17 19:53:49 +00001"""
2The main job wrapper for the server side.
3
4This is the core infrastructure. Derived from the client side job.py
5
6Copyright Martin J. Bligh, Andy Whitcroft 2007
7"""
8
jadmanski6bb32d72009-03-19 20:25:24 +00009import getpass, os, sys, re, stat, tempfile, time, select, subprocess
showardb18134f2009-03-20 20:52:18 +000010import traceback, shutil, warnings, fcntl, pickle, logging, logging.config
jadmanskic09fc152008-10-15 17:56:59 +000011from autotest_lib.client.bin import fd_stack, sysinfo
mbligh09108442008-10-15 16:27:38 +000012from autotest_lib.client.common_lib import error, log, utils, packages
jadmanski043e1132008-11-19 17:10:32 +000013from autotest_lib.server import test, subcommand, profilers
jadmanski10646442008-08-13 14:05:21 +000014from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils
jadmanski10646442008-08-13 14:05:21 +000015
16
mbligh084bc172008-10-18 14:02:45 +000017def _control_segment_path(name):
18 """Get the pathname of the named control segment file."""
jadmanski10646442008-08-13 14:05:21 +000019 server_dir = os.path.dirname(os.path.abspath(__file__))
mbligh084bc172008-10-18 14:02:45 +000020 return os.path.join(server_dir, "control_segments", name)
jadmanski10646442008-08-13 14:05:21 +000021
22
mbligh084bc172008-10-18 14:02:45 +000023CLIENT_CONTROL_FILENAME = 'control'
24SERVER_CONTROL_FILENAME = 'control.srv'
25MACHINES_FILENAME = '.machines'
jadmanski10646442008-08-13 14:05:21 +000026
mbligh084bc172008-10-18 14:02:45 +000027CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')
28CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')
29CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')
mbligh084bc172008-10-18 14:02:45 +000030INSTALL_CONTROL_FILE = _control_segment_path('install')
showard45ae8192008-11-05 19:32:53 +000031CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')
jadmanski10646442008-08-13 14:05:21 +000032
mbligh084bc172008-10-18 14:02:45 +000033VERIFY_CONTROL_FILE = _control_segment_path('verify')
mbligh084bc172008-10-18 14:02:45 +000034REPAIR_CONTROL_FILE = _control_segment_path('repair')
jadmanski10646442008-08-13 14:05:21 +000035
36
mbligh062ed152009-01-13 00:57:14 +000037# by default provide a stub that generates no site data
38def _get_site_job_data_dummy(job):
39 return {}
40
41
jadmanski10646442008-08-13 14:05:21 +000042# load up site-specific code for generating site-specific job data
mbligh062ed152009-01-13 00:57:14 +000043get_site_job_data = utils.import_site_function(__file__,
jadmanskic0a623d2009-03-03 21:11:48 +000044 "autotest_lib.server.site_server_job", "get_site_job_data",
mbligh062ed152009-01-13 00:57:14 +000045 _get_site_job_data_dummy)
jadmanski10646442008-08-13 14:05:21 +000046
47
48class base_server_job(object):
mbligh2b92b862008-11-22 13:25:32 +000049 """
50 The actual job against which we do everything.
jadmanski10646442008-08-13 14:05:21 +000051
52 Properties:
53 autodir
54 The top level autotest directory (/usr/local/autotest).
55 serverdir
56 <autodir>/server/
57 clientdir
58 <autodir>/client/
59 conmuxdir
60 <autodir>/conmux/
61 testdir
62 <autodir>/server/tests/
63 site_testdir
64 <autodir>/server/site_tests/
65 control
66 the control file for this job
mblighb5dac432008-11-27 00:38:44 +000067 drop_caches_between_iterations
68 drop the pagecache between each iteration
jadmanski10646442008-08-13 14:05:21 +000069 """
70
71 STATUS_VERSION = 1
72
73
74 def __init__(self, control, args, resultdir, label, user, machines,
75 client=False, parse_job='',
76 ssh_user='root', ssh_port=22, ssh_pass=''):
77 """
mblighb5dac432008-11-27 00:38:44 +000078 Server side job object.
79
80 Parameters:
81 control: The control file (pathname of)
82 args: args to pass to the control file
83 resultdir: where to throw the results
84 label: label for the job
85 user: Username for the job (email address)
86 client: True if a client-side control file
jadmanski10646442008-08-13 14:05:21 +000087 """
88 path = os.path.dirname(__file__)
89 self.autodir = os.path.abspath(os.path.join(path, '..'))
90 self.serverdir = os.path.join(self.autodir, 'server')
91 self.testdir = os.path.join(self.serverdir, 'tests')
92 self.site_testdir = os.path.join(self.serverdir, 'site_tests')
93 self.tmpdir = os.path.join(self.serverdir, 'tmp')
94 self.conmuxdir = os.path.join(self.autodir, 'conmux')
95 self.clientdir = os.path.join(self.autodir, 'client')
96 self.toolsdir = os.path.join(self.autodir, 'client/tools')
97 if control:
jadmanskie432dd22009-01-30 15:04:51 +000098 self.control = self._load_control_file(control)
jadmanski10646442008-08-13 14:05:21 +000099 else:
showard45ae8192008-11-05 19:32:53 +0000100 self.control = ''
jadmanski10646442008-08-13 14:05:21 +0000101 self.resultdir = resultdir
jadmanski6bb32d72009-03-19 20:25:24 +0000102 self.uncollected_log_file = os.path.join(resultdir, "uncollected_logs")
103 self.debugdir = os.path.join(resultdir, 'debug')
mbligh80e1eba2008-11-19 00:26:18 +0000104 if resultdir:
105 if not os.path.exists(resultdir):
106 os.mkdir(resultdir)
jadmanskidef0c3c2009-03-25 20:07:10 +0000107 if not os.path.exists(self.uncollected_log_file):
108 log_file = open(self.uncollected_log_file, "w")
109 pickle.dump([], log_file)
110 log_file.close()
mbligh80e1eba2008-11-19 00:26:18 +0000111 if not os.path.exists(self.debugdir):
112 os.mkdir(self.debugdir)
jadmanski779bd292009-03-19 17:33:33 +0000113 status_log = self.get_status_log_path()
114 if os.path.exists(status_log):
115 os.remove(status_log)
jadmanski10646442008-08-13 14:05:21 +0000116 self.label = label
117 self.user = user
118 self.args = args
119 self.machines = machines
120 self.client = client
121 self.record_prefix = ''
122 self.warning_loggers = set()
jadmanskif37df842009-02-11 00:03:26 +0000123 self.warning_manager = warning_manager()
jadmanski10646442008-08-13 14:05:21 +0000124 self.ssh_user = ssh_user
125 self.ssh_port = ssh_port
126 self.ssh_pass = ssh_pass
jadmanski23afbec2008-09-17 18:12:07 +0000127 self.run_test_cleanup = True
mbligh09108442008-10-15 16:27:38 +0000128 self.last_boot_tag = None
jadmanski53aaf382008-11-17 16:22:31 +0000129 self.hosts = set()
mblighb5dac432008-11-27 00:38:44 +0000130 self.drop_caches_between_iterations = False
jadmanski10646442008-08-13 14:05:21 +0000131
132 self.stdout = fd_stack.fd_stack(1, sys.stdout)
133 self.stderr = fd_stack.fd_stack(2, sys.stderr)
134
mbligh80e1eba2008-11-19 00:26:18 +0000135 if resultdir:
136 self.sysinfo = sysinfo.sysinfo(self.resultdir)
jadmanski043e1132008-11-19 17:10:32 +0000137 self.profilers = profilers.profilers(self)
jadmanskic09fc152008-10-15 17:56:59 +0000138
jadmanski025099d2008-09-23 14:13:48 +0000139 if not os.access(self.tmpdir, os.W_OK):
140 try:
141 os.makedirs(self.tmpdir, 0700)
142 except os.error, e:
143 # Thrown if the directory already exists, which it may.
144 pass
145
mbligh2b92b862008-11-22 13:25:32 +0000146 if not (os.access(self.tmpdir, os.W_OK) and os.path.isdir(self.tmpdir)):
jadmanski025099d2008-09-23 14:13:48 +0000147 self.tmpdir = os.path.join(tempfile.gettempdir(),
148 'autotest-' + getpass.getuser())
149 try:
150 os.makedirs(self.tmpdir, 0700)
151 except os.error, e:
152 # Thrown if the directory already exists, which it may.
153 # If the problem was something other than the
154 # directory already existing, this chmod should throw as well
155 # exception.
156 os.chmod(self.tmpdir, stat.S_IRWXU)
157
jadmanski10646442008-08-13 14:05:21 +0000158 job_data = {'label' : label, 'user' : user,
159 'hostname' : ','.join(machines),
showard170873e2009-01-07 00:22:26 +0000160 'status_version' : str(self.STATUS_VERSION),
161 'job_started' : str(int(time.time()))}
mbligh80e1eba2008-11-19 00:26:18 +0000162 if self.resultdir:
163 job_data.update(get_site_job_data(self))
164 utils.write_keyval(self.resultdir, job_data)
jadmanski10646442008-08-13 14:05:21 +0000165
166 self.parse_job = parse_job
167 if self.parse_job and len(machines) == 1:
168 self.using_parser = True
169 self.init_parser(resultdir)
170 else:
171 self.using_parser = False
mbligh2b92b862008-11-22 13:25:32 +0000172 self.pkgmgr = packages.PackageManager(self.autodir,
173 run_function_dargs={'timeout':600})
jadmanski10646442008-08-13 14:05:21 +0000174 self.pkgdir = os.path.join(self.autodir, 'packages')
175
showard21baa452008-10-21 00:08:39 +0000176 self.num_tests_run = 0
177 self.num_tests_failed = 0
178
jadmanski550fdc22008-11-20 16:32:08 +0000179 self._register_subcommand_hooks()
180
181
jadmanskie432dd22009-01-30 15:04:51 +0000182 @staticmethod
183 def _load_control_file(path):
184 f = open(path)
185 try:
186 control_file = f.read()
187 finally:
188 f.close()
189 return re.sub('\r', '', control_file)
190
191
jadmanski550fdc22008-11-20 16:32:08 +0000192 def _register_subcommand_hooks(self):
mbligh2b92b862008-11-22 13:25:32 +0000193 """
194 Register some hooks into the subcommand modules that allow us
195 to properly clean up self.hosts created in forked subprocesses.
196 """
jadmanski550fdc22008-11-20 16:32:08 +0000197 def on_fork(cmd):
198 self._existing_hosts_on_fork = set(self.hosts)
199 def on_join(cmd):
200 new_hosts = self.hosts - self._existing_hosts_on_fork
201 for host in new_hosts:
202 host.close()
203 subcommand.subcommand.register_fork_hook(on_fork)
204 subcommand.subcommand.register_join_hook(on_join)
205
jadmanski10646442008-08-13 14:05:21 +0000206
207 def init_parser(self, resultdir):
mbligh2b92b862008-11-22 13:25:32 +0000208 """
209 Start the continuous parsing of resultdir. This sets up
jadmanski10646442008-08-13 14:05:21 +0000210 the database connection and inserts the basic job object into
mbligh2b92b862008-11-22 13:25:32 +0000211 the database if necessary.
212 """
jadmanski10646442008-08-13 14:05:21 +0000213 # redirect parser debugging to .parse.log
214 parse_log = os.path.join(resultdir, '.parse.log')
215 parse_log = open(parse_log, 'w', 0)
216 tko_utils.redirect_parser_debugging(parse_log)
217 # create a job model object and set up the db
218 self.results_db = tko_db.db(autocommit=True)
219 self.parser = status_lib.parser(self.STATUS_VERSION)
220 self.job_model = self.parser.make_job(resultdir)
221 self.parser.start(self.job_model)
222 # check if a job already exists in the db and insert it if
223 # it does not
224 job_idx = self.results_db.find_job(self.parse_job)
225 if job_idx is None:
mbligh2b92b862008-11-22 13:25:32 +0000226 self.results_db.insert_job(self.parse_job, self.job_model)
jadmanski10646442008-08-13 14:05:21 +0000227 else:
mbligh2b92b862008-11-22 13:25:32 +0000228 machine_idx = self.results_db.lookup_machine(self.job_model.machine)
jadmanski10646442008-08-13 14:05:21 +0000229 self.job_model.index = job_idx
230 self.job_model.machine_idx = machine_idx
231
232
233 def cleanup_parser(self):
mbligh2b92b862008-11-22 13:25:32 +0000234 """
235 This should be called after the server job is finished
jadmanski10646442008-08-13 14:05:21 +0000236 to carry out any remaining cleanup (e.g. flushing any
mbligh2b92b862008-11-22 13:25:32 +0000237 remaining test results to the results db)
238 """
jadmanski10646442008-08-13 14:05:21 +0000239 if not self.using_parser:
240 return
241 final_tests = self.parser.end()
242 for test in final_tests:
243 self.__insert_test(test)
244 self.using_parser = False
245
246
247 def verify(self):
248 if not self.machines:
mbligh084bc172008-10-18 14:02:45 +0000249 raise error.AutoservError('No machines specified to verify')
mbligh0fce4112008-11-27 00:37:17 +0000250 if self.resultdir:
251 os.chdir(self.resultdir)
jadmanski10646442008-08-13 14:05:21 +0000252 try:
jadmanskicdd0c402008-09-19 21:21:31 +0000253 namespace = {'machines' : self.machines, 'job' : self,
254 'ssh_user' : self.ssh_user,
255 'ssh_port' : self.ssh_port,
256 'ssh_pass' : self.ssh_pass}
mbligh084bc172008-10-18 14:02:45 +0000257 self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)
jadmanski10646442008-08-13 14:05:21 +0000258 except Exception, e:
mbligh2b92b862008-11-22 13:25:32 +0000259 msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())
jadmanski10646442008-08-13 14:05:21 +0000260 self.record('ABORT', None, None, msg)
261 raise
262
263
264 def repair(self, host_protection):
265 if not self.machines:
266 raise error.AutoservError('No machines specified to repair')
mbligh0fce4112008-11-27 00:37:17 +0000267 if self.resultdir:
268 os.chdir(self.resultdir)
jadmanski10646442008-08-13 14:05:21 +0000269 namespace = {'machines': self.machines, 'job': self,
270 'ssh_user': self.ssh_user, 'ssh_port': self.ssh_port,
271 'ssh_pass': self.ssh_pass,
272 'protection_level': host_protection}
mbligh25c0b8c2009-01-24 01:44:17 +0000273 # no matter what happens during repair (except if it succeeded in
274 # initiating hardware repair procedure), go on to try to reverify
jadmanski10646442008-08-13 14:05:21 +0000275 try:
mbligh2b92b862008-11-22 13:25:32 +0000276 self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)
mbligh25c0b8c2009-01-24 01:44:17 +0000277 except error.AutoservHardwareRepairRequestedError:
278 raise
jadmanski10646442008-08-13 14:05:21 +0000279 except Exception, exc:
280 print 'Exception occured during repair'
281 traceback.print_exc()
mbligh25c0b8c2009-01-24 01:44:17 +0000282
jadmanski10646442008-08-13 14:05:21 +0000283 self.verify()
284
285
286 def precheck(self):
287 """
288 perform any additional checks in derived classes.
289 """
290 pass
291
292
293 def enable_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000294 """
295 Start or restart external logging mechanism.
jadmanski10646442008-08-13 14:05:21 +0000296 """
297 pass
298
299
300 def disable_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000301 """
302 Pause or stop external logging mechanism.
jadmanski10646442008-08-13 14:05:21 +0000303 """
304 pass
305
306
jadmanski23afbec2008-09-17 18:12:07 +0000307 def enable_test_cleanup(self):
mbligh2b92b862008-11-22 13:25:32 +0000308 """
309 By default tests run test.cleanup
310 """
jadmanski23afbec2008-09-17 18:12:07 +0000311 self.run_test_cleanup = True
312
313
314 def disable_test_cleanup(self):
mbligh2b92b862008-11-22 13:25:32 +0000315 """
316 By default tests do not run test.cleanup
317 """
jadmanski23afbec2008-09-17 18:12:07 +0000318 self.run_test_cleanup = False
319
320
jadmanski10646442008-08-13 14:05:21 +0000321 def use_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000322 """
323 Return True if external logging should be used.
jadmanski10646442008-08-13 14:05:21 +0000324 """
325 return False
326
327
328 def parallel_simple(self, function, machines, log=True, timeout=None):
mbligh2b92b862008-11-22 13:25:32 +0000329 """
330 Run 'function' using parallel_simple, with an extra wrapper to handle
331 the necessary setup for continuous parsing, if possible. If continuous
332 parsing is already properly initialized then this should just work.
333 """
334 is_forking = not (len(machines) == 1 and self.machines == machines)
jadmanski4dd1a002008-09-05 20:27:30 +0000335 if self.parse_job and is_forking and log:
jadmanski10646442008-08-13 14:05:21 +0000336 def wrapper(machine):
337 self.parse_job += "/" + machine
338 self.using_parser = True
339 self.machines = [machine]
mbligh2b92b862008-11-22 13:25:32 +0000340 self.resultdir = os.path.join(self.resultdir, machine)
jadmanski609a5f42008-08-26 20:52:42 +0000341 os.chdir(self.resultdir)
showard2bab8f42008-11-12 18:15:22 +0000342 utils.write_keyval(self.resultdir, {"hostname": machine})
jadmanski10646442008-08-13 14:05:21 +0000343 self.init_parser(self.resultdir)
344 result = function(machine)
345 self.cleanup_parser()
346 return result
jadmanski4dd1a002008-09-05 20:27:30 +0000347 elif len(machines) > 1 and log:
jadmanski10646442008-08-13 14:05:21 +0000348 def wrapper(machine):
349 self.resultdir = os.path.join(self.resultdir, machine)
jadmanski609a5f42008-08-26 20:52:42 +0000350 os.chdir(self.resultdir)
mbligh838d82d2009-03-11 17:14:31 +0000351 machine_data = {'hostname' : machine,
352 'status_version' : str(self.STATUS_VERSION)}
353 utils.write_keyval(self.resultdir, machine_data)
jadmanski10646442008-08-13 14:05:21 +0000354 result = function(machine)
355 return result
356 else:
357 wrapper = function
358 subcommand.parallel_simple(wrapper, machines, log, timeout)
359
360
jadmanskie432dd22009-01-30 15:04:51 +0000361 USE_TEMP_DIR = object()
mbligh2b92b862008-11-22 13:25:32 +0000362 def run(self, cleanup=False, install_before=False, install_after=False,
jadmanskie432dd22009-01-30 15:04:51 +0000363 collect_crashdumps=True, namespace={}, control=None,
jadmanskidef0c3c2009-03-25 20:07:10 +0000364 control_file_dir=None, only_collect_crashinfo=False):
jadmanski10646442008-08-13 14:05:21 +0000365 # use a copy so changes don't affect the original dictionary
366 namespace = namespace.copy()
367 machines = self.machines
jadmanskie432dd22009-01-30 15:04:51 +0000368 if control is None:
369 control = self.control
370 if control_file_dir is None:
371 control_file_dir = self.resultdir
jadmanski10646442008-08-13 14:05:21 +0000372
373 self.aborted = False
374 namespace['machines'] = machines
375 namespace['args'] = self.args
376 namespace['job'] = self
377 namespace['ssh_user'] = self.ssh_user
378 namespace['ssh_port'] = self.ssh_port
379 namespace['ssh_pass'] = self.ssh_pass
380 test_start_time = int(time.time())
381
mbligh80e1eba2008-11-19 00:26:18 +0000382 if self.resultdir:
383 os.chdir(self.resultdir)
jadmanski779bd292009-03-19 17:33:33 +0000384 # touch status.log so that the parser knows a job is running here
385 open(self.get_status_log_path(), 'w').close()
mbligh80e1eba2008-11-19 00:26:18 +0000386 self.enable_external_logging()
jadmanskie432dd22009-01-30 15:04:51 +0000387
jadmanskicdd0c402008-09-19 21:21:31 +0000388 collect_crashinfo = True
mblighaebe3b62008-12-22 14:45:40 +0000389 temp_control_file_dir = None
jadmanski10646442008-08-13 14:05:21 +0000390 try:
391 if install_before and machines:
mbligh084bc172008-10-18 14:02:45 +0000392 self._execute_code(INSTALL_CONTROL_FILE, namespace)
jadmanskie432dd22009-01-30 15:04:51 +0000393
jadmanskidef0c3c2009-03-25 20:07:10 +0000394 if not only_collect_crashinfo:
395 # determine the dir to write the control files to
396 cfd_specified = (control_file_dir
397 and control_file_dir is not self.USE_TEMP_DIR)
398 if cfd_specified:
399 temp_control_file_dir = None
400 else:
401 temp_control_file_dir = tempfile.mkdtemp(
402 suffix='temp_control_file_dir')
403 control_file_dir = temp_control_file_dir
404 server_control_file = os.path.join(control_file_dir,
405 SERVER_CONTROL_FILENAME)
406 client_control_file = os.path.join(control_file_dir,
407 CLIENT_CONTROL_FILENAME)
408 if self.client:
409 namespace['control'] = control
410 utils.open_write_close(client_control_file, control)
411 shutil.copy(CLIENT_WRAPPER_CONTROL_FILE,
412 server_control_file)
413 else:
414 utils.open_write_close(server_control_file, control)
415 self._execute_code(server_control_file, namespace)
jadmanski10646442008-08-13 14:05:21 +0000416
jadmanskidef0c3c2009-03-25 20:07:10 +0000417 # no error occured, so we don't need to collect crashinfo
418 collect_crashinfo = False
jadmanski10646442008-08-13 14:05:21 +0000419 finally:
mblighaebe3b62008-12-22 14:45:40 +0000420 if temp_control_file_dir:
jadmanskie432dd22009-01-30 15:04:51 +0000421 # Clean up temp directory used for copies of the control files
mblighaebe3b62008-12-22 14:45:40 +0000422 try:
423 shutil.rmtree(temp_control_file_dir)
424 except Exception, e:
jadmanskie432dd22009-01-30 15:04:51 +0000425 print 'Error %s removing dir %s' % (e,
426 temp_control_file_dir)
427
jadmanskicdd0c402008-09-19 21:21:31 +0000428 if machines and (collect_crashdumps or collect_crashinfo):
jadmanski10646442008-08-13 14:05:21 +0000429 namespace['test_start_time'] = test_start_time
jadmanskicdd0c402008-09-19 21:21:31 +0000430 if collect_crashinfo:
mbligh084bc172008-10-18 14:02:45 +0000431 # includes crashdumps
432 self._execute_code(CRASHINFO_CONTROL_FILE, namespace)
jadmanskicdd0c402008-09-19 21:21:31 +0000433 else:
mbligh084bc172008-10-18 14:02:45 +0000434 self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)
jadmanski6bb32d72009-03-19 20:25:24 +0000435 os.remove(self.uncollected_log_file)
jadmanski10646442008-08-13 14:05:21 +0000436 self.disable_external_logging()
showard45ae8192008-11-05 19:32:53 +0000437 if cleanup and machines:
438 self._execute_code(CLEANUP_CONTROL_FILE, namespace)
jadmanski10646442008-08-13 14:05:21 +0000439 if install_after and machines:
mbligh084bc172008-10-18 14:02:45 +0000440 self._execute_code(INSTALL_CONTROL_FILE, namespace)
jadmanski10646442008-08-13 14:05:21 +0000441
442
443 def run_test(self, url, *args, **dargs):
mbligh2b92b862008-11-22 13:25:32 +0000444 """
445 Summon a test object and run it.
jadmanski10646442008-08-13 14:05:21 +0000446
447 tag
448 tag to add to testname
449 url
450 url of the test to run
451 """
452
453 (group, testname) = self.pkgmgr.get_package_name(url, 'test')
jadmanski10646442008-08-13 14:05:21 +0000454
455 tag = dargs.pop('tag', None)
456 if tag:
mbligh8ad24202009-01-07 16:49:36 +0000457 testname += '.' + str(tag)
jadmanskide292df2008-08-26 20:51:14 +0000458 subdir = testname
jadmanski10646442008-08-13 14:05:21 +0000459
460 outputdir = os.path.join(self.resultdir, subdir)
461 if os.path.exists(outputdir):
462 msg = ("%s already exists, test <%s> may have"
mbligh2b92b862008-11-22 13:25:32 +0000463 " already run with tag <%s>" % (outputdir, testname, tag))
jadmanski10646442008-08-13 14:05:21 +0000464 raise error.TestError(msg)
465 os.mkdir(outputdir)
466
467 def group_func():
468 try:
469 test.runtest(self, url, tag, args, dargs)
470 except error.TestBaseException, e:
471 self.record(e.exit_status, subdir, testname, str(e))
472 raise
473 except Exception, e:
474 info = str(e) + "\n" + traceback.format_exc()
475 self.record('FAIL', subdir, testname, info)
476 raise
477 else:
mbligh2b92b862008-11-22 13:25:32 +0000478 self.record('GOOD', subdir, testname, 'completed successfully')
jadmanskide292df2008-08-26 20:51:14 +0000479
480 result, exc_info = self._run_group(testname, subdir, group_func)
481 if exc_info and isinstance(exc_info[1], error.TestBaseException):
482 return False
483 elif exc_info:
484 raise exc_info[0], exc_info[1], exc_info[2]
485 else:
486 return True
jadmanski10646442008-08-13 14:05:21 +0000487
488
489 def _run_group(self, name, subdir, function, *args, **dargs):
490 """\
491 Underlying method for running something inside of a group.
492 """
jadmanskide292df2008-08-26 20:51:14 +0000493 result, exc_info = None, None
jadmanski10646442008-08-13 14:05:21 +0000494 old_record_prefix = self.record_prefix
495 try:
496 self.record('START', subdir, name)
497 self.record_prefix += '\t'
498 try:
499 result = function(*args, **dargs)
500 finally:
501 self.record_prefix = old_record_prefix
502 except error.TestBaseException, e:
jadmanskib88d6dc2009-01-10 00:33:18 +0000503 self.record("END %s" % e.exit_status, subdir, name)
jadmanskide292df2008-08-26 20:51:14 +0000504 exc_info = sys.exc_info()
jadmanski10646442008-08-13 14:05:21 +0000505 except Exception, e:
506 err_msg = str(e) + '\n'
507 err_msg += traceback.format_exc()
508 self.record('END ABORT', subdir, name, err_msg)
509 raise error.JobError(name + ' failed\n' + traceback.format_exc())
510 else:
511 self.record('END GOOD', subdir, name)
512
jadmanskide292df2008-08-26 20:51:14 +0000513 return result, exc_info
jadmanski10646442008-08-13 14:05:21 +0000514
515
516 def run_group(self, function, *args, **dargs):
517 """\
518 function:
519 subroutine to run
520 *args:
521 arguments for the function
522 """
523
524 name = function.__name__
525
526 # Allow the tag for the group to be specified.
527 tag = dargs.pop('tag', None)
528 if tag:
529 name = tag
530
jadmanskide292df2008-08-26 20:51:14 +0000531 return self._run_group(name, None, function, *args, **dargs)[0]
jadmanski10646442008-08-13 14:05:21 +0000532
533
534 def run_reboot(self, reboot_func, get_kernel_func):
535 """\
536 A specialization of run_group meant specifically for handling
537 a reboot. Includes support for capturing the kernel version
538 after the reboot.
539
540 reboot_func: a function that carries out the reboot
541
542 get_kernel_func: a function that returns a string
543 representing the kernel version.
544 """
545
546 old_record_prefix = self.record_prefix
547 try:
548 self.record('START', None, 'reboot')
549 self.record_prefix += '\t'
550 reboot_func()
551 except Exception, e:
552 self.record_prefix = old_record_prefix
553 err_msg = str(e) + '\n' + traceback.format_exc()
554 self.record('END FAIL', None, 'reboot', err_msg)
555 else:
556 kernel = get_kernel_func()
557 self.record_prefix = old_record_prefix
558 self.record('END GOOD', None, 'reboot',
559 optional_fields={"kernel": kernel})
560
561
jadmanskie432dd22009-01-30 15:04:51 +0000562 def run_control(self, path):
563 """Execute a control file found at path (relative to the autotest
564 path). Intended for executing a control file within a control file,
565 not for running the top-level job control file."""
566 path = os.path.join(self.autodir, path)
567 control_file = self._load_control_file(path)
568 self.run(control=control_file, control_file_dir=self.USE_TEMP_DIR)
569
570
jadmanskic09fc152008-10-15 17:56:59 +0000571 def add_sysinfo_command(self, command, logfile=None, on_every_test=False):
mbligh4395bbd2009-03-25 19:34:17 +0000572 self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),
jadmanskic09fc152008-10-15 17:56:59 +0000573 on_every_test)
574
575
576 def add_sysinfo_logfile(self, file, on_every_test=False):
577 self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)
578
579
580 def _add_sysinfo_loggable(self, loggable, on_every_test):
581 if on_every_test:
582 self.sysinfo.test_loggables.add(loggable)
583 else:
584 self.sysinfo.boot_loggables.add(loggable)
585
586
jadmanski10646442008-08-13 14:05:21 +0000587 def record(self, status_code, subdir, operation, status='',
588 optional_fields=None):
589 """
590 Record job-level status
591
592 The intent is to make this file both machine parseable and
593 human readable. That involves a little more complexity, but
594 really isn't all that bad ;-)
595
596 Format is <status code>\t<subdir>\t<operation>\t<status>
597
mbligh1b3b3762008-09-25 02:46:34 +0000598 status code: see common_lib.log.is_valid_status()
jadmanski10646442008-08-13 14:05:21 +0000599 for valid status definition
600
601 subdir: MUST be a relevant subdirectory in the results,
602 or None, which will be represented as '----'
603
604 operation: description of what you ran (e.g. "dbench", or
605 "mkfs -t foobar /dev/sda9")
606
607 status: error message or "completed sucessfully"
608
609 ------------------------------------------------------------
610
611 Initial tabs indicate indent levels for grouping, and is
612 governed by self.record_prefix
613
614 multiline messages have secondary lines prefaced by a double
615 space (' ')
616
617 Executing this method will trigger the logging of all new
618 warnings to date from the various console loggers.
619 """
620 # poll all our warning loggers for new warnings
621 warnings = self._read_warnings()
622 for timestamp, msg in warnings:
623 self._record("WARN", None, None, msg, timestamp)
624
625 # write out the actual status log line
626 self._record(status_code, subdir, operation, status,
627 optional_fields=optional_fields)
628
629
630 def _read_warnings(self):
jadmanskif37df842009-02-11 00:03:26 +0000631 """Poll all the warning loggers and extract any new warnings that have
632 been logged. If the warnings belong to a category that is currently
633 disabled, this method will discard them and they will no longer be
634 retrievable.
635
636 Returns a list of (timestamp, message) tuples, where timestamp is an
637 integer epoch timestamp."""
jadmanski10646442008-08-13 14:05:21 +0000638 warnings = []
639 while True:
640 # pull in a line of output from every logger that has
641 # output ready to be read
mbligh2b92b862008-11-22 13:25:32 +0000642 loggers, _, _ = select.select(self.warning_loggers, [], [], 0)
jadmanski10646442008-08-13 14:05:21 +0000643 closed_loggers = set()
644 for logger in loggers:
645 line = logger.readline()
646 # record any broken pipes (aka line == empty)
647 if len(line) == 0:
648 closed_loggers.add(logger)
649 continue
jadmanskif37df842009-02-11 00:03:26 +0000650 # parse out the warning
651 timestamp, msgtype, msg = line.split('\t', 2)
652 timestamp = int(timestamp)
653 # if the warning is valid, add it to the results
654 if self.warning_manager.is_valid(timestamp, msgtype):
655 warnings.append((timestamp, msg.strip()))
jadmanski10646442008-08-13 14:05:21 +0000656
657 # stop listening to loggers that are closed
658 self.warning_loggers -= closed_loggers
659
660 # stop if none of the loggers have any output left
661 if not loggers:
662 break
663
664 # sort into timestamp order
665 warnings.sort()
666 return warnings
667
668
jadmanskif37df842009-02-11 00:03:26 +0000669 def disable_warnings(self, warning_type, record=True):
670 self.warning_manager.disable_warnings(warning_type)
671 if record:
672 self.record("INFO", None, None,
673 "disabling %s warnings" % warning_type,
674 {"warnings.disable": warning_type})
675
676
677 def enable_warnings(self, warning_type, record=True):
678 self.warning_manager.enable_warnings(warning_type)
679 if record:
680 self.record("INFO", None, None,
681 "enabling %s warnings" % warning_type,
682 {"warnings.enable": warning_type})
683
684
jadmanski779bd292009-03-19 17:33:33 +0000685 def get_status_log_path(self, subdir=None):
686 """Return the path to the job status log.
687
688 @param subdir - Optional paramter indicating that you want the path
689 to a subdirectory status log.
690
691 @returns The path where the status log should be.
692 """
693 if subdir:
694 return os.path.join(self.resultdir, subdir, "status.log")
695 else:
696 return os.path.join(self.resultdir, "status.log")
697
698
jadmanski6bb32d72009-03-19 20:25:24 +0000699 def _update_uncollected_logs_list(self, update_func):
700 """Updates the uncollected logs list in a multi-process safe manner.
701
702 @param update_func - a function that updates the list of uncollected
703 logs. Should take one parameter, the list to be updated.
704 """
705 log_file = open(self.uncollected_log_file, "r+")
706 fcntl.flock(log_file, fcntl.LOCK_EX)
707 try:
708 uncollected_logs = pickle.load(log_file)
709 update_func(uncollected_logs)
710 log_file.seek(0)
711 log_file.truncate()
712 pickle.dump(uncollected_logs, log_file)
713 finally:
714 fcntl.flock(log_file, fcntl.LOCK_UN)
715 log_file.close()
716
717
718 def add_client_log(self, hostname, remote_path, local_path):
719 """Adds a new set of client logs to the list of uncollected logs,
720 to allow for future log recovery.
721
722 @param host - the hostname of the machine holding the logs
723 @param remote_path - the directory on the remote machine holding logs
724 @param local_path - the local directory to copy the logs into
725 """
726 def update_func(logs_list):
727 logs_list.append((hostname, remote_path, local_path))
728 self._update_uncollected_logs_list(update_func)
729
730
731 def remove_client_log(self, hostname, remote_path, local_path):
732 """Removes a set of client logs from the list of uncollected logs,
733 to allow for future log recovery.
734
735 @param host - the hostname of the machine holding the logs
736 @param remote_path - the directory on the remote machine holding logs
737 @param local_path - the local directory to copy the logs into
738 """
739 def update_func(logs_list):
740 logs_list.remove((hostname, remote_path, local_path))
741 self._update_uncollected_logs_list(update_func)
742
743
jadmanski10646442008-08-13 14:05:21 +0000744 def _render_record(self, status_code, subdir, operation, status='',
745 epoch_time=None, record_prefix=None,
746 optional_fields=None):
747 """
748 Internal Function to generate a record to be written into a
749 status log. For use by server_job.* classes only.
750 """
751 if subdir:
752 if re.match(r'[\n\t]', subdir):
mbligh2b92b862008-11-22 13:25:32 +0000753 raise ValueError('Invalid character in subdir string')
jadmanski10646442008-08-13 14:05:21 +0000754 substr = subdir
755 else:
756 substr = '----'
757
mbligh1b3b3762008-09-25 02:46:34 +0000758 if not log.is_valid_status(status_code):
mbligh2b92b862008-11-22 13:25:32 +0000759 raise ValueError('Invalid status code supplied: %s' % status_code)
jadmanski10646442008-08-13 14:05:21 +0000760 if not operation:
761 operation = '----'
762 if re.match(r'[\n\t]', operation):
mbligh2b92b862008-11-22 13:25:32 +0000763 raise ValueError('Invalid character in operation string')
jadmanski10646442008-08-13 14:05:21 +0000764 operation = operation.rstrip()
765 status = status.rstrip()
766 status = re.sub(r"\t", " ", status)
767 # Ensure any continuation lines are marked so we can
768 # detect them in the status file to ensure it is parsable.
769 status = re.sub(r"\n", "\n" + self.record_prefix + " ", status)
770
771 if not optional_fields:
772 optional_fields = {}
773
774 # Generate timestamps for inclusion in the logs
775 if epoch_time is None:
776 epoch_time = int(time.time())
777 local_time = time.localtime(epoch_time)
778 optional_fields["timestamp"] = str(epoch_time)
779 optional_fields["localtime"] = time.strftime("%b %d %H:%M:%S",
780 local_time)
781
782 fields = [status_code, substr, operation]
783 fields += ["%s=%s" % x for x in optional_fields.iteritems()]
784 fields.append(status)
785
786 if record_prefix is None:
787 record_prefix = self.record_prefix
788
789 msg = '\t'.join(str(x) for x in fields)
jadmanski10646442008-08-13 14:05:21 +0000790 return record_prefix + msg + '\n'
791
792
793 def _record_prerendered(self, msg):
794 """
795 Record a pre-rendered msg into the status logs. The only
796 change this makes to the message is to add on the local
797 indentation. Should not be called outside of server_job.*
798 classes. Unlike _record, this does not write the message
799 to standard output.
800 """
801 lines = []
jadmanski779bd292009-03-19 17:33:33 +0000802 status_file = self.get_status_log_path()
jadmanski10646442008-08-13 14:05:21 +0000803 status_log = open(status_file, 'a')
804 for line in msg.splitlines():
805 line = self.record_prefix + line + '\n'
806 lines.append(line)
807 status_log.write(line)
808 status_log.close()
809 self.__parse_status(lines)
810
811
mbligh084bc172008-10-18 14:02:45 +0000812 def _fill_server_control_namespace(self, namespace, protect=True):
mbligh2b92b862008-11-22 13:25:32 +0000813 """
814 Prepare a namespace to be used when executing server control files.
mbligh084bc172008-10-18 14:02:45 +0000815
816 This sets up the control file API by importing modules and making them
817 available under the appropriate names within namespace.
818
819 For use by _execute_code().
820
821 Args:
822 namespace: The namespace dictionary to fill in.
823 protect: Boolean. If True (the default) any operation that would
824 clobber an existing entry in namespace will cause an error.
825 Raises:
826 error.AutoservError: When a name would be clobbered by import.
827 """
828 def _import_names(module_name, names=()):
mbligh2b92b862008-11-22 13:25:32 +0000829 """
830 Import a module and assign named attributes into namespace.
mbligh084bc172008-10-18 14:02:45 +0000831
832 Args:
833 module_name: The string module name.
834 names: A limiting list of names to import from module_name. If
835 empty (the default), all names are imported from the module
836 similar to a "from foo.bar import *" statement.
837 Raises:
838 error.AutoservError: When a name being imported would clobber
839 a name already in namespace.
840 """
841 module = __import__(module_name, {}, {}, names)
842
843 # No names supplied? Import * from the lowest level module.
844 # (Ugh, why do I have to implement this part myself?)
845 if not names:
846 for submodule_name in module_name.split('.')[1:]:
847 module = getattr(module, submodule_name)
848 if hasattr(module, '__all__'):
849 names = getattr(module, '__all__')
850 else:
851 names = dir(module)
852
853 # Install each name into namespace, checking to make sure it
854 # doesn't override anything that already exists.
855 for name in names:
856 # Check for conflicts to help prevent future problems.
857 if name in namespace and protect:
858 if namespace[name] is not getattr(module, name):
859 raise error.AutoservError('importing name '
860 '%s from %s %r would override %r' %
861 (name, module_name, getattr(module, name),
862 namespace[name]))
863 else:
864 # Encourage cleanliness and the use of __all__ for a
865 # more concrete API with less surprises on '*' imports.
866 warnings.warn('%s (%r) being imported from %s for use '
867 'in server control files is not the '
868 'first occurrance of that import.' %
869 (name, namespace[name], module_name))
870
871 namespace[name] = getattr(module, name)
872
873
874 # This is the equivalent of prepending a bunch of import statements to
875 # the front of the control script.
876 namespace.update(os=os, sys=sys)
877 _import_names('autotest_lib.server',
878 ('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',
879 'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))
880 _import_names('autotest_lib.server.subcommand',
881 ('parallel', 'parallel_simple', 'subcommand'))
882 _import_names('autotest_lib.server.utils',
883 ('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))
884 _import_names('autotest_lib.client.common_lib.error')
885 _import_names('autotest_lib.client.common_lib.barrier', ('barrier',))
886
887 # Inject ourself as the job object into other classes within the API.
888 # (Yuck, this injection is a gross thing be part of a public API. -gps)
889 #
890 # XXX Base & SiteAutotest do not appear to use .job. Who does?
891 namespace['autotest'].Autotest.job = self
892 # server.hosts.base_classes.Host uses .job.
893 namespace['hosts'].Host.job = self
894
895
896 def _execute_code(self, code_file, namespace, protect=True):
mbligh2b92b862008-11-22 13:25:32 +0000897 """
898 Execute code using a copy of namespace as a server control script.
mbligh084bc172008-10-18 14:02:45 +0000899
900 Unless protect_namespace is explicitly set to False, the dict will not
901 be modified.
902
903 Args:
904 code_file: The filename of the control file to execute.
905 namespace: A dict containing names to make available during execution.
906 protect: Boolean. If True (the default) a copy of the namespace dict
907 is used during execution to prevent the code from modifying its
908 contents outside of this function. If False the raw dict is
909 passed in and modifications will be allowed.
910 """
911 if protect:
912 namespace = namespace.copy()
913 self._fill_server_control_namespace(namespace, protect=protect)
914 # TODO: Simplify and get rid of the special cases for only 1 machine.
showard3e66e8c2008-10-27 19:20:51 +0000915 if len(self.machines) > 1:
mbligh084bc172008-10-18 14:02:45 +0000916 machines_text = '\n'.join(self.machines) + '\n'
917 # Only rewrite the file if it does not match our machine list.
918 try:
919 machines_f = open(MACHINES_FILENAME, 'r')
920 existing_machines_text = machines_f.read()
921 machines_f.close()
922 except EnvironmentError:
923 existing_machines_text = None
924 if machines_text != existing_machines_text:
925 utils.open_write_close(MACHINES_FILENAME, machines_text)
926 execfile(code_file, namespace, namespace)
jadmanski10646442008-08-13 14:05:21 +0000927
928
929 def _record(self, status_code, subdir, operation, status='',
930 epoch_time=None, optional_fields=None):
931 """
932 Actual function for recording a single line into the status
933 logs. Should never be called directly, only by job.record as
934 this would bypass the console monitor logging.
935 """
936
mbligh2b92b862008-11-22 13:25:32 +0000937 msg = self._render_record(status_code, subdir, operation, status,
938 epoch_time, optional_fields=optional_fields)
jadmanski10646442008-08-13 14:05:21 +0000939
jadmanski779bd292009-03-19 17:33:33 +0000940 status_file = self.get_status_log_path()
jadmanski10646442008-08-13 14:05:21 +0000941 sys.stdout.write(msg)
942 open(status_file, "a").write(msg)
943 if subdir:
jadmanski779bd292009-03-19 17:33:33 +0000944 sub_status_file = self.get_status_log_path(subdir)
945 open(sub_status_file, "a").write(msg)
jadmanski10646442008-08-13 14:05:21 +0000946 self.__parse_status(msg.splitlines())
947
948
949 def __parse_status(self, new_lines):
950 if not self.using_parser:
951 return
952 new_tests = self.parser.process_lines(new_lines)
953 for test in new_tests:
954 self.__insert_test(test)
955
956
957 def __insert_test(self, test):
mbligh2b92b862008-11-22 13:25:32 +0000958 """
959 An internal method to insert a new test result into the
jadmanski10646442008-08-13 14:05:21 +0000960 database. This method will not raise an exception, even if an
961 error occurs during the insert, to avoid failing a test
962 simply because of unexpected database issues."""
showard21baa452008-10-21 00:08:39 +0000963 self.num_tests_run += 1
964 if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):
965 self.num_tests_failed += 1
jadmanski10646442008-08-13 14:05:21 +0000966 try:
967 self.results_db.insert_test(self.job_model, test)
968 except Exception:
969 msg = ("WARNING: An unexpected error occured while "
970 "inserting test results into the database. "
971 "Ignoring error.\n" + traceback.format_exc())
972 print >> sys.stderr, msg
973
mblighcaa62c22008-04-07 21:51:17 +0000974
mbligha7007722009-01-13 00:37:11 +0000975site_server_job = utils.import_site_class(
976 __file__, "autotest_lib.server.site_server_job", "site_server_job",
977 base_server_job)
jadmanski0afbb632008-06-06 21:10:57 +0000978
jadmanski10646442008-08-13 14:05:21 +0000979class server_job(site_server_job, base_server_job):
jadmanski0afbb632008-06-06 21:10:57 +0000980 pass
jadmanskif37df842009-02-11 00:03:26 +0000981
982
983class warning_manager(object):
984 """Class for controlling warning logs. Manages the enabling and disabling
985 of warnings."""
986 def __init__(self):
987 # a map of warning types to a list of disabled time intervals
988 self.disabled_warnings = {}
989
990
991 def is_valid(self, timestamp, warning_type):
992 """Indicates if a warning (based on the time it occured and its type)
993 is a valid warning. A warning is considered "invalid" if this type of
994 warning was marked as "disabled" at the time the warning occured."""
995 disabled_intervals = self.disabled_warnings.get(warning_type, [])
996 for start, end in disabled_intervals:
997 if timestamp >= start and (end is None or timestamp < end):
998 return False
999 return True
1000
1001
1002 def disable_warnings(self, warning_type, current_time_func=time.time):
1003 """As of now, disables all further warnings of this type."""
1004 intervals = self.disabled_warnings.setdefault(warning_type, [])
1005 if not intervals or intervals[-1][1] is not None:
1006 intervals.append((current_time_func(), None))
1007
1008
1009 def enable_warnings(self, warning_type, current_time_func=time.time):
1010 """As of now, enables all further warnings of this type."""
1011 intervals = self.disabled_warnings.get(warning_type, [])
1012 if intervals and intervals[-1][1] is None:
1013 intervals[-1] = (intervals[-1][0], current_time_func())