blob: 7fdd9d08c8accd439d9dd9709d367a021f3f0198 [file] [log] [blame]
mbligh57e78662008-06-17 19:53:49 +00001"""
2The main job wrapper for the server side.
3
4This is the core infrastructure. Derived from the client side job.py
5
6Copyright Martin J. Bligh, Andy Whitcroft 2007
7"""
8
jadmanski6bb32d72009-03-19 20:25:24 +00009import getpass, os, sys, re, stat, tempfile, time, select, subprocess
showardb18134f2009-03-20 20:52:18 +000010import traceback, shutil, warnings, fcntl, pickle, logging, logging.config
jadmanskic09fc152008-10-15 17:56:59 +000011from autotest_lib.client.bin import fd_stack, sysinfo
mbligh09108442008-10-15 16:27:38 +000012from autotest_lib.client.common_lib import error, log, utils, packages
jadmanski043e1132008-11-19 17:10:32 +000013from autotest_lib.server import test, subcommand, profilers
jadmanski10646442008-08-13 14:05:21 +000014from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils
jadmanski10646442008-08-13 14:05:21 +000015
16
mbligh084bc172008-10-18 14:02:45 +000017def _control_segment_path(name):
18 """Get the pathname of the named control segment file."""
jadmanski10646442008-08-13 14:05:21 +000019 server_dir = os.path.dirname(os.path.abspath(__file__))
mbligh084bc172008-10-18 14:02:45 +000020 return os.path.join(server_dir, "control_segments", name)
jadmanski10646442008-08-13 14:05:21 +000021
22
mbligh084bc172008-10-18 14:02:45 +000023CLIENT_CONTROL_FILENAME = 'control'
24SERVER_CONTROL_FILENAME = 'control.srv'
25MACHINES_FILENAME = '.machines'
jadmanski10646442008-08-13 14:05:21 +000026
mbligh084bc172008-10-18 14:02:45 +000027CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')
28CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')
29CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')
mbligh084bc172008-10-18 14:02:45 +000030INSTALL_CONTROL_FILE = _control_segment_path('install')
showard45ae8192008-11-05 19:32:53 +000031CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')
jadmanski10646442008-08-13 14:05:21 +000032
mbligh084bc172008-10-18 14:02:45 +000033VERIFY_CONTROL_FILE = _control_segment_path('verify')
mbligh084bc172008-10-18 14:02:45 +000034REPAIR_CONTROL_FILE = _control_segment_path('repair')
jadmanski10646442008-08-13 14:05:21 +000035
36
mbligh062ed152009-01-13 00:57:14 +000037# by default provide a stub that generates no site data
38def _get_site_job_data_dummy(job):
39 return {}
40
41
jadmanski10646442008-08-13 14:05:21 +000042# load up site-specific code for generating site-specific job data
mbligh062ed152009-01-13 00:57:14 +000043get_site_job_data = utils.import_site_function(__file__,
jadmanskic0a623d2009-03-03 21:11:48 +000044 "autotest_lib.server.site_server_job", "get_site_job_data",
mbligh062ed152009-01-13 00:57:14 +000045 _get_site_job_data_dummy)
jadmanski10646442008-08-13 14:05:21 +000046
47
48class base_server_job(object):
mbligh2b92b862008-11-22 13:25:32 +000049 """
50 The actual job against which we do everything.
jadmanski10646442008-08-13 14:05:21 +000051
52 Properties:
53 autodir
54 The top level autotest directory (/usr/local/autotest).
55 serverdir
56 <autodir>/server/
57 clientdir
58 <autodir>/client/
59 conmuxdir
60 <autodir>/conmux/
61 testdir
62 <autodir>/server/tests/
63 site_testdir
64 <autodir>/server/site_tests/
65 control
66 the control file for this job
mblighb5dac432008-11-27 00:38:44 +000067 drop_caches_between_iterations
68 drop the pagecache between each iteration
jadmanski10646442008-08-13 14:05:21 +000069 """
70
71 STATUS_VERSION = 1
jadmanski16a7ff72009-04-01 18:19:53 +000072 WARNING_DISABLE_DELAY = 5
jadmanski10646442008-08-13 14:05:21 +000073
74 def __init__(self, control, args, resultdir, label, user, machines,
75 client=False, parse_job='',
76 ssh_user='root', ssh_port=22, ssh_pass=''):
77 """
mblighb5dac432008-11-27 00:38:44 +000078 Server side job object.
79
80 Parameters:
81 control: The control file (pathname of)
82 args: args to pass to the control file
83 resultdir: where to throw the results
84 label: label for the job
85 user: Username for the job (email address)
86 client: True if a client-side control file
jadmanski10646442008-08-13 14:05:21 +000087 """
88 path = os.path.dirname(__file__)
89 self.autodir = os.path.abspath(os.path.join(path, '..'))
90 self.serverdir = os.path.join(self.autodir, 'server')
91 self.testdir = os.path.join(self.serverdir, 'tests')
92 self.site_testdir = os.path.join(self.serverdir, 'site_tests')
93 self.tmpdir = os.path.join(self.serverdir, 'tmp')
94 self.conmuxdir = os.path.join(self.autodir, 'conmux')
95 self.clientdir = os.path.join(self.autodir, 'client')
96 self.toolsdir = os.path.join(self.autodir, 'client/tools')
97 if control:
jadmanskie432dd22009-01-30 15:04:51 +000098 self.control = self._load_control_file(control)
jadmanski10646442008-08-13 14:05:21 +000099 else:
showard45ae8192008-11-05 19:32:53 +0000100 self.control = ''
jadmanski10646442008-08-13 14:05:21 +0000101 self.resultdir = resultdir
mbligha788dc42009-03-26 21:10:16 +0000102 self.uncollected_log_file = None
mbligh80e1eba2008-11-19 00:26:18 +0000103 if resultdir:
mbligha788dc42009-03-26 21:10:16 +0000104 self.uncollected_log_file = os.path.join(resultdir, "uncollected_logs")
105 self.debugdir = os.path.join(resultdir, 'debug')
106
mbligh80e1eba2008-11-19 00:26:18 +0000107 if not os.path.exists(resultdir):
108 os.mkdir(resultdir)
jadmanskidef0c3c2009-03-25 20:07:10 +0000109 if not os.path.exists(self.uncollected_log_file):
110 log_file = open(self.uncollected_log_file, "w")
111 pickle.dump([], log_file)
112 log_file.close()
mbligh80e1eba2008-11-19 00:26:18 +0000113 if not os.path.exists(self.debugdir):
114 os.mkdir(self.debugdir)
jadmanski779bd292009-03-19 17:33:33 +0000115 status_log = self.get_status_log_path()
116 if os.path.exists(status_log):
117 os.remove(status_log)
jadmanski10646442008-08-13 14:05:21 +0000118 self.label = label
119 self.user = user
120 self.args = args
121 self.machines = machines
122 self.client = client
123 self.record_prefix = ''
124 self.warning_loggers = set()
jadmanskif37df842009-02-11 00:03:26 +0000125 self.warning_manager = warning_manager()
jadmanski10646442008-08-13 14:05:21 +0000126 self.ssh_user = ssh_user
127 self.ssh_port = ssh_port
128 self.ssh_pass = ssh_pass
jadmanski23afbec2008-09-17 18:12:07 +0000129 self.run_test_cleanup = True
mbligh09108442008-10-15 16:27:38 +0000130 self.last_boot_tag = None
jadmanski53aaf382008-11-17 16:22:31 +0000131 self.hosts = set()
mblighb5dac432008-11-27 00:38:44 +0000132 self.drop_caches_between_iterations = False
jadmanski10646442008-08-13 14:05:21 +0000133
134 self.stdout = fd_stack.fd_stack(1, sys.stdout)
135 self.stderr = fd_stack.fd_stack(2, sys.stderr)
136
mbligh80e1eba2008-11-19 00:26:18 +0000137 if resultdir:
138 self.sysinfo = sysinfo.sysinfo(self.resultdir)
jadmanski043e1132008-11-19 17:10:32 +0000139 self.profilers = profilers.profilers(self)
jadmanskic09fc152008-10-15 17:56:59 +0000140
jadmanski025099d2008-09-23 14:13:48 +0000141 if not os.access(self.tmpdir, os.W_OK):
142 try:
143 os.makedirs(self.tmpdir, 0700)
144 except os.error, e:
145 # Thrown if the directory already exists, which it may.
146 pass
147
mbligh2b92b862008-11-22 13:25:32 +0000148 if not (os.access(self.tmpdir, os.W_OK) and os.path.isdir(self.tmpdir)):
jadmanski025099d2008-09-23 14:13:48 +0000149 self.tmpdir = os.path.join(tempfile.gettempdir(),
150 'autotest-' + getpass.getuser())
151 try:
152 os.makedirs(self.tmpdir, 0700)
153 except os.error, e:
154 # Thrown if the directory already exists, which it may.
155 # If the problem was something other than the
156 # directory already existing, this chmod should throw as well
157 # exception.
158 os.chmod(self.tmpdir, stat.S_IRWXU)
159
jadmanski10646442008-08-13 14:05:21 +0000160 job_data = {'label' : label, 'user' : user,
161 'hostname' : ','.join(machines),
showard170873e2009-01-07 00:22:26 +0000162 'status_version' : str(self.STATUS_VERSION),
163 'job_started' : str(int(time.time()))}
mbligh80e1eba2008-11-19 00:26:18 +0000164 if self.resultdir:
jadmanski58962982009-04-21 19:54:34 +0000165 # only write these keyvals out on the first job in a resultdir
166 if 'job_started' not in utils.read_keyval(self.resultdir):
167 job_data.update(get_site_job_data(self))
168 utils.write_keyval(self.resultdir, job_data)
jadmanski10646442008-08-13 14:05:21 +0000169
170 self.parse_job = parse_job
171 if self.parse_job and len(machines) == 1:
172 self.using_parser = True
173 self.init_parser(resultdir)
174 else:
175 self.using_parser = False
mbligh2b92b862008-11-22 13:25:32 +0000176 self.pkgmgr = packages.PackageManager(self.autodir,
177 run_function_dargs={'timeout':600})
jadmanski10646442008-08-13 14:05:21 +0000178 self.pkgdir = os.path.join(self.autodir, 'packages')
179
showard21baa452008-10-21 00:08:39 +0000180 self.num_tests_run = 0
181 self.num_tests_failed = 0
182
jadmanski550fdc22008-11-20 16:32:08 +0000183 self._register_subcommand_hooks()
184
185
jadmanskie432dd22009-01-30 15:04:51 +0000186 @staticmethod
187 def _load_control_file(path):
188 f = open(path)
189 try:
190 control_file = f.read()
191 finally:
192 f.close()
193 return re.sub('\r', '', control_file)
194
195
jadmanski550fdc22008-11-20 16:32:08 +0000196 def _register_subcommand_hooks(self):
mbligh2b92b862008-11-22 13:25:32 +0000197 """
198 Register some hooks into the subcommand modules that allow us
199 to properly clean up self.hosts created in forked subprocesses.
200 """
jadmanski550fdc22008-11-20 16:32:08 +0000201 def on_fork(cmd):
202 self._existing_hosts_on_fork = set(self.hosts)
203 def on_join(cmd):
204 new_hosts = self.hosts - self._existing_hosts_on_fork
205 for host in new_hosts:
206 host.close()
207 subcommand.subcommand.register_fork_hook(on_fork)
208 subcommand.subcommand.register_join_hook(on_join)
209
jadmanski10646442008-08-13 14:05:21 +0000210
211 def init_parser(self, resultdir):
mbligh2b92b862008-11-22 13:25:32 +0000212 """
213 Start the continuous parsing of resultdir. This sets up
jadmanski10646442008-08-13 14:05:21 +0000214 the database connection and inserts the basic job object into
mbligh2b92b862008-11-22 13:25:32 +0000215 the database if necessary.
216 """
jadmanski10646442008-08-13 14:05:21 +0000217 # redirect parser debugging to .parse.log
218 parse_log = os.path.join(resultdir, '.parse.log')
219 parse_log = open(parse_log, 'w', 0)
220 tko_utils.redirect_parser_debugging(parse_log)
221 # create a job model object and set up the db
222 self.results_db = tko_db.db(autocommit=True)
223 self.parser = status_lib.parser(self.STATUS_VERSION)
224 self.job_model = self.parser.make_job(resultdir)
225 self.parser.start(self.job_model)
226 # check if a job already exists in the db and insert it if
227 # it does not
228 job_idx = self.results_db.find_job(self.parse_job)
229 if job_idx is None:
mbligh2b92b862008-11-22 13:25:32 +0000230 self.results_db.insert_job(self.parse_job, self.job_model)
jadmanski10646442008-08-13 14:05:21 +0000231 else:
mbligh2b92b862008-11-22 13:25:32 +0000232 machine_idx = self.results_db.lookup_machine(self.job_model.machine)
jadmanski10646442008-08-13 14:05:21 +0000233 self.job_model.index = job_idx
234 self.job_model.machine_idx = machine_idx
235
236
237 def cleanup_parser(self):
mbligh2b92b862008-11-22 13:25:32 +0000238 """
239 This should be called after the server job is finished
jadmanski10646442008-08-13 14:05:21 +0000240 to carry out any remaining cleanup (e.g. flushing any
mbligh2b92b862008-11-22 13:25:32 +0000241 remaining test results to the results db)
242 """
jadmanski10646442008-08-13 14:05:21 +0000243 if not self.using_parser:
244 return
245 final_tests = self.parser.end()
246 for test in final_tests:
247 self.__insert_test(test)
248 self.using_parser = False
249
250
251 def verify(self):
252 if not self.machines:
mbligh084bc172008-10-18 14:02:45 +0000253 raise error.AutoservError('No machines specified to verify')
mbligh0fce4112008-11-27 00:37:17 +0000254 if self.resultdir:
255 os.chdir(self.resultdir)
jadmanski10646442008-08-13 14:05:21 +0000256 try:
jadmanskicdd0c402008-09-19 21:21:31 +0000257 namespace = {'machines' : self.machines, 'job' : self,
258 'ssh_user' : self.ssh_user,
259 'ssh_port' : self.ssh_port,
260 'ssh_pass' : self.ssh_pass}
mbligh084bc172008-10-18 14:02:45 +0000261 self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)
jadmanski10646442008-08-13 14:05:21 +0000262 except Exception, e:
mbligh2b92b862008-11-22 13:25:32 +0000263 msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())
jadmanski10646442008-08-13 14:05:21 +0000264 self.record('ABORT', None, None, msg)
265 raise
266
267
268 def repair(self, host_protection):
269 if not self.machines:
270 raise error.AutoservError('No machines specified to repair')
mbligh0fce4112008-11-27 00:37:17 +0000271 if self.resultdir:
272 os.chdir(self.resultdir)
jadmanski10646442008-08-13 14:05:21 +0000273 namespace = {'machines': self.machines, 'job': self,
274 'ssh_user': self.ssh_user, 'ssh_port': self.ssh_port,
275 'ssh_pass': self.ssh_pass,
276 'protection_level': host_protection}
mbligh25c0b8c2009-01-24 01:44:17 +0000277
mbligh0931b0a2009-04-08 17:44:48 +0000278 self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)
jadmanski10646442008-08-13 14:05:21 +0000279
280
281 def precheck(self):
282 """
283 perform any additional checks in derived classes.
284 """
285 pass
286
287
288 def enable_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000289 """
290 Start or restart external logging mechanism.
jadmanski10646442008-08-13 14:05:21 +0000291 """
292 pass
293
294
295 def disable_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000296 """
297 Pause or stop external logging mechanism.
jadmanski10646442008-08-13 14:05:21 +0000298 """
299 pass
300
301
jadmanski23afbec2008-09-17 18:12:07 +0000302 def enable_test_cleanup(self):
mbligh2b92b862008-11-22 13:25:32 +0000303 """
304 By default tests run test.cleanup
305 """
jadmanski23afbec2008-09-17 18:12:07 +0000306 self.run_test_cleanup = True
307
308
309 def disable_test_cleanup(self):
mbligh2b92b862008-11-22 13:25:32 +0000310 """
311 By default tests do not run test.cleanup
312 """
jadmanski23afbec2008-09-17 18:12:07 +0000313 self.run_test_cleanup = False
314
315
jadmanski10646442008-08-13 14:05:21 +0000316 def use_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000317 """
318 Return True if external logging should be used.
jadmanski10646442008-08-13 14:05:21 +0000319 """
320 return False
321
322
323 def parallel_simple(self, function, machines, log=True, timeout=None):
mbligh2b92b862008-11-22 13:25:32 +0000324 """
325 Run 'function' using parallel_simple, with an extra wrapper to handle
326 the necessary setup for continuous parsing, if possible. If continuous
327 parsing is already properly initialized then this should just work.
328 """
329 is_forking = not (len(machines) == 1 and self.machines == machines)
jadmanski4dd1a002008-09-05 20:27:30 +0000330 if self.parse_job and is_forking and log:
jadmanski10646442008-08-13 14:05:21 +0000331 def wrapper(machine):
332 self.parse_job += "/" + machine
333 self.using_parser = True
334 self.machines = [machine]
mbligh2b92b862008-11-22 13:25:32 +0000335 self.resultdir = os.path.join(self.resultdir, machine)
jadmanski609a5f42008-08-26 20:52:42 +0000336 os.chdir(self.resultdir)
showard2bab8f42008-11-12 18:15:22 +0000337 utils.write_keyval(self.resultdir, {"hostname": machine})
jadmanski10646442008-08-13 14:05:21 +0000338 self.init_parser(self.resultdir)
339 result = function(machine)
340 self.cleanup_parser()
341 return result
jadmanski4dd1a002008-09-05 20:27:30 +0000342 elif len(machines) > 1 and log:
jadmanski10646442008-08-13 14:05:21 +0000343 def wrapper(machine):
344 self.resultdir = os.path.join(self.resultdir, machine)
jadmanski609a5f42008-08-26 20:52:42 +0000345 os.chdir(self.resultdir)
mbligh838d82d2009-03-11 17:14:31 +0000346 machine_data = {'hostname' : machine,
347 'status_version' : str(self.STATUS_VERSION)}
348 utils.write_keyval(self.resultdir, machine_data)
jadmanski10646442008-08-13 14:05:21 +0000349 result = function(machine)
350 return result
351 else:
352 wrapper = function
353 subcommand.parallel_simple(wrapper, machines, log, timeout)
354
355
jadmanskie432dd22009-01-30 15:04:51 +0000356 USE_TEMP_DIR = object()
mbligh2b92b862008-11-22 13:25:32 +0000357 def run(self, cleanup=False, install_before=False, install_after=False,
jadmanskie432dd22009-01-30 15:04:51 +0000358 collect_crashdumps=True, namespace={}, control=None,
jadmanskidef0c3c2009-03-25 20:07:10 +0000359 control_file_dir=None, only_collect_crashinfo=False):
jadmanski10646442008-08-13 14:05:21 +0000360 # use a copy so changes don't affect the original dictionary
361 namespace = namespace.copy()
362 machines = self.machines
jadmanskie432dd22009-01-30 15:04:51 +0000363 if control is None:
364 control = self.control
365 if control_file_dir is None:
366 control_file_dir = self.resultdir
jadmanski10646442008-08-13 14:05:21 +0000367
368 self.aborted = False
369 namespace['machines'] = machines
370 namespace['args'] = self.args
371 namespace['job'] = self
372 namespace['ssh_user'] = self.ssh_user
373 namespace['ssh_port'] = self.ssh_port
374 namespace['ssh_pass'] = self.ssh_pass
375 test_start_time = int(time.time())
376
mbligh80e1eba2008-11-19 00:26:18 +0000377 if self.resultdir:
378 os.chdir(self.resultdir)
jadmanski779bd292009-03-19 17:33:33 +0000379 # touch status.log so that the parser knows a job is running here
jadmanski382303a2009-04-21 19:53:39 +0000380 open(self.get_status_log_path(), 'a').close()
mbligh80e1eba2008-11-19 00:26:18 +0000381 self.enable_external_logging()
jadmanskie432dd22009-01-30 15:04:51 +0000382
jadmanskicdd0c402008-09-19 21:21:31 +0000383 collect_crashinfo = True
mblighaebe3b62008-12-22 14:45:40 +0000384 temp_control_file_dir = None
jadmanski10646442008-08-13 14:05:21 +0000385 try:
386 if install_before and machines:
mbligh084bc172008-10-18 14:02:45 +0000387 self._execute_code(INSTALL_CONTROL_FILE, namespace)
jadmanskie432dd22009-01-30 15:04:51 +0000388
jadmanskidef0c3c2009-03-25 20:07:10 +0000389 if not only_collect_crashinfo:
390 # determine the dir to write the control files to
391 cfd_specified = (control_file_dir
392 and control_file_dir is not self.USE_TEMP_DIR)
393 if cfd_specified:
394 temp_control_file_dir = None
395 else:
396 temp_control_file_dir = tempfile.mkdtemp(
397 suffix='temp_control_file_dir')
398 control_file_dir = temp_control_file_dir
399 server_control_file = os.path.join(control_file_dir,
400 SERVER_CONTROL_FILENAME)
401 client_control_file = os.path.join(control_file_dir,
402 CLIENT_CONTROL_FILENAME)
403 if self.client:
404 namespace['control'] = control
405 utils.open_write_close(client_control_file, control)
406 shutil.copy(CLIENT_WRAPPER_CONTROL_FILE,
407 server_control_file)
408 else:
409 utils.open_write_close(server_control_file, control)
410 self._execute_code(server_control_file, namespace)
jadmanski10646442008-08-13 14:05:21 +0000411
jadmanskidef0c3c2009-03-25 20:07:10 +0000412 # no error occured, so we don't need to collect crashinfo
413 collect_crashinfo = False
jadmanski10646442008-08-13 14:05:21 +0000414 finally:
mblighaebe3b62008-12-22 14:45:40 +0000415 if temp_control_file_dir:
jadmanskie432dd22009-01-30 15:04:51 +0000416 # Clean up temp directory used for copies of the control files
mblighaebe3b62008-12-22 14:45:40 +0000417 try:
418 shutil.rmtree(temp_control_file_dir)
419 except Exception, e:
jadmanskie432dd22009-01-30 15:04:51 +0000420 print 'Error %s removing dir %s' % (e,
421 temp_control_file_dir)
422
jadmanskicdd0c402008-09-19 21:21:31 +0000423 if machines and (collect_crashdumps or collect_crashinfo):
jadmanski10646442008-08-13 14:05:21 +0000424 namespace['test_start_time'] = test_start_time
jadmanskicdd0c402008-09-19 21:21:31 +0000425 if collect_crashinfo:
mbligh084bc172008-10-18 14:02:45 +0000426 # includes crashdumps
427 self._execute_code(CRASHINFO_CONTROL_FILE, namespace)
jadmanskicdd0c402008-09-19 21:21:31 +0000428 else:
mbligh084bc172008-10-18 14:02:45 +0000429 self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)
mbligha788dc42009-03-26 21:10:16 +0000430 if self.uncollected_log_file:
431 os.remove(self.uncollected_log_file)
jadmanski10646442008-08-13 14:05:21 +0000432 self.disable_external_logging()
showard45ae8192008-11-05 19:32:53 +0000433 if cleanup and machines:
434 self._execute_code(CLEANUP_CONTROL_FILE, namespace)
jadmanski10646442008-08-13 14:05:21 +0000435 if install_after and machines:
mbligh084bc172008-10-18 14:02:45 +0000436 self._execute_code(INSTALL_CONTROL_FILE, namespace)
jadmanski10646442008-08-13 14:05:21 +0000437
438
439 def run_test(self, url, *args, **dargs):
mbligh2b92b862008-11-22 13:25:32 +0000440 """
441 Summon a test object and run it.
jadmanski10646442008-08-13 14:05:21 +0000442
443 tag
444 tag to add to testname
445 url
446 url of the test to run
447 """
448
449 (group, testname) = self.pkgmgr.get_package_name(url, 'test')
jadmanski10646442008-08-13 14:05:21 +0000450
451 tag = dargs.pop('tag', None)
452 if tag:
mbligh8ad24202009-01-07 16:49:36 +0000453 testname += '.' + str(tag)
jadmanskide292df2008-08-26 20:51:14 +0000454 subdir = testname
jadmanski10646442008-08-13 14:05:21 +0000455
456 outputdir = os.path.join(self.resultdir, subdir)
457 if os.path.exists(outputdir):
458 msg = ("%s already exists, test <%s> may have"
mbligh2b92b862008-11-22 13:25:32 +0000459 " already run with tag <%s>" % (outputdir, testname, tag))
jadmanski10646442008-08-13 14:05:21 +0000460 raise error.TestError(msg)
461 os.mkdir(outputdir)
462
463 def group_func():
464 try:
465 test.runtest(self, url, tag, args, dargs)
466 except error.TestBaseException, e:
467 self.record(e.exit_status, subdir, testname, str(e))
468 raise
469 except Exception, e:
470 info = str(e) + "\n" + traceback.format_exc()
471 self.record('FAIL', subdir, testname, info)
472 raise
473 else:
mbligh2b92b862008-11-22 13:25:32 +0000474 self.record('GOOD', subdir, testname, 'completed successfully')
jadmanskide292df2008-08-26 20:51:14 +0000475
476 result, exc_info = self._run_group(testname, subdir, group_func)
477 if exc_info and isinstance(exc_info[1], error.TestBaseException):
478 return False
479 elif exc_info:
480 raise exc_info[0], exc_info[1], exc_info[2]
481 else:
482 return True
jadmanski10646442008-08-13 14:05:21 +0000483
484
485 def _run_group(self, name, subdir, function, *args, **dargs):
486 """\
487 Underlying method for running something inside of a group.
488 """
jadmanskide292df2008-08-26 20:51:14 +0000489 result, exc_info = None, None
jadmanski10646442008-08-13 14:05:21 +0000490 old_record_prefix = self.record_prefix
491 try:
492 self.record('START', subdir, name)
493 self.record_prefix += '\t'
494 try:
495 result = function(*args, **dargs)
496 finally:
497 self.record_prefix = old_record_prefix
498 except error.TestBaseException, e:
jadmanskib88d6dc2009-01-10 00:33:18 +0000499 self.record("END %s" % e.exit_status, subdir, name)
jadmanskide292df2008-08-26 20:51:14 +0000500 exc_info = sys.exc_info()
jadmanski10646442008-08-13 14:05:21 +0000501 except Exception, e:
502 err_msg = str(e) + '\n'
503 err_msg += traceback.format_exc()
504 self.record('END ABORT', subdir, name, err_msg)
505 raise error.JobError(name + ' failed\n' + traceback.format_exc())
506 else:
507 self.record('END GOOD', subdir, name)
508
jadmanskide292df2008-08-26 20:51:14 +0000509 return result, exc_info
jadmanski10646442008-08-13 14:05:21 +0000510
511
512 def run_group(self, function, *args, **dargs):
513 """\
514 function:
515 subroutine to run
516 *args:
517 arguments for the function
518 """
519
520 name = function.__name__
521
522 # Allow the tag for the group to be specified.
523 tag = dargs.pop('tag', None)
524 if tag:
525 name = tag
526
jadmanskide292df2008-08-26 20:51:14 +0000527 return self._run_group(name, None, function, *args, **dargs)[0]
jadmanski10646442008-08-13 14:05:21 +0000528
529
530 def run_reboot(self, reboot_func, get_kernel_func):
531 """\
532 A specialization of run_group meant specifically for handling
533 a reboot. Includes support for capturing the kernel version
534 after the reboot.
535
536 reboot_func: a function that carries out the reboot
537
538 get_kernel_func: a function that returns a string
539 representing the kernel version.
540 """
541
542 old_record_prefix = self.record_prefix
543 try:
544 self.record('START', None, 'reboot')
545 self.record_prefix += '\t'
546 reboot_func()
547 except Exception, e:
548 self.record_prefix = old_record_prefix
549 err_msg = str(e) + '\n' + traceback.format_exc()
550 self.record('END FAIL', None, 'reboot', err_msg)
jadmanski4b51d542009-04-08 14:17:16 +0000551 raise
jadmanski10646442008-08-13 14:05:21 +0000552 else:
553 kernel = get_kernel_func()
554 self.record_prefix = old_record_prefix
555 self.record('END GOOD', None, 'reboot',
556 optional_fields={"kernel": kernel})
557
558
jadmanskie432dd22009-01-30 15:04:51 +0000559 def run_control(self, path):
560 """Execute a control file found at path (relative to the autotest
561 path). Intended for executing a control file within a control file,
562 not for running the top-level job control file."""
563 path = os.path.join(self.autodir, path)
564 control_file = self._load_control_file(path)
565 self.run(control=control_file, control_file_dir=self.USE_TEMP_DIR)
566
567
jadmanskic09fc152008-10-15 17:56:59 +0000568 def add_sysinfo_command(self, command, logfile=None, on_every_test=False):
mbligh4395bbd2009-03-25 19:34:17 +0000569 self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),
jadmanskic09fc152008-10-15 17:56:59 +0000570 on_every_test)
571
572
573 def add_sysinfo_logfile(self, file, on_every_test=False):
574 self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)
575
576
577 def _add_sysinfo_loggable(self, loggable, on_every_test):
578 if on_every_test:
579 self.sysinfo.test_loggables.add(loggable)
580 else:
581 self.sysinfo.boot_loggables.add(loggable)
582
583
jadmanski10646442008-08-13 14:05:21 +0000584 def record(self, status_code, subdir, operation, status='',
585 optional_fields=None):
586 """
587 Record job-level status
588
589 The intent is to make this file both machine parseable and
590 human readable. That involves a little more complexity, but
591 really isn't all that bad ;-)
592
593 Format is <status code>\t<subdir>\t<operation>\t<status>
594
mbligh1b3b3762008-09-25 02:46:34 +0000595 status code: see common_lib.log.is_valid_status()
jadmanski10646442008-08-13 14:05:21 +0000596 for valid status definition
597
598 subdir: MUST be a relevant subdirectory in the results,
599 or None, which will be represented as '----'
600
601 operation: description of what you ran (e.g. "dbench", or
602 "mkfs -t foobar /dev/sda9")
603
604 status: error message or "completed sucessfully"
605
606 ------------------------------------------------------------
607
608 Initial tabs indicate indent levels for grouping, and is
609 governed by self.record_prefix
610
611 multiline messages have secondary lines prefaced by a double
612 space (' ')
613
614 Executing this method will trigger the logging of all new
615 warnings to date from the various console loggers.
616 """
617 # poll all our warning loggers for new warnings
618 warnings = self._read_warnings()
jadmanski2de83112009-04-01 18:21:04 +0000619 old_record_prefix = self.record_prefix
620 try:
621 if status_code.startswith("END "):
622 self.record_prefix += "\t"
623 for timestamp, msg in warnings:
624 self._record("WARN", None, None, msg, timestamp)
625 finally:
626 self.record_prefix = old_record_prefix
jadmanski10646442008-08-13 14:05:21 +0000627
628 # write out the actual status log line
629 self._record(status_code, subdir, operation, status,
630 optional_fields=optional_fields)
631
632
633 def _read_warnings(self):
jadmanskif37df842009-02-11 00:03:26 +0000634 """Poll all the warning loggers and extract any new warnings that have
635 been logged. If the warnings belong to a category that is currently
636 disabled, this method will discard them and they will no longer be
637 retrievable.
638
639 Returns a list of (timestamp, message) tuples, where timestamp is an
640 integer epoch timestamp."""
jadmanski10646442008-08-13 14:05:21 +0000641 warnings = []
642 while True:
643 # pull in a line of output from every logger that has
644 # output ready to be read
mbligh2b92b862008-11-22 13:25:32 +0000645 loggers, _, _ = select.select(self.warning_loggers, [], [], 0)
jadmanski10646442008-08-13 14:05:21 +0000646 closed_loggers = set()
647 for logger in loggers:
648 line = logger.readline()
649 # record any broken pipes (aka line == empty)
650 if len(line) == 0:
651 closed_loggers.add(logger)
652 continue
jadmanskif37df842009-02-11 00:03:26 +0000653 # parse out the warning
654 timestamp, msgtype, msg = line.split('\t', 2)
655 timestamp = int(timestamp)
656 # if the warning is valid, add it to the results
657 if self.warning_manager.is_valid(timestamp, msgtype):
658 warnings.append((timestamp, msg.strip()))
jadmanski10646442008-08-13 14:05:21 +0000659
660 # stop listening to loggers that are closed
661 self.warning_loggers -= closed_loggers
662
663 # stop if none of the loggers have any output left
664 if not loggers:
665 break
666
667 # sort into timestamp order
668 warnings.sort()
669 return warnings
670
671
jadmanski16a7ff72009-04-01 18:19:53 +0000672 def disable_warnings(self, warning_type):
jadmanskif37df842009-02-11 00:03:26 +0000673 self.warning_manager.disable_warnings(warning_type)
jadmanski16a7ff72009-04-01 18:19:53 +0000674 self.record("INFO", None, None,
675 "disabling %s warnings" % warning_type,
676 {"warnings.disable": warning_type})
677 time.sleep(self.WARNING_DISABLE_DELAY)
jadmanskif37df842009-02-11 00:03:26 +0000678
679
jadmanski16a7ff72009-04-01 18:19:53 +0000680 def enable_warnings(self, warning_type):
681 time.sleep(self.WARNING_DISABLE_DELAY)
jadmanskif37df842009-02-11 00:03:26 +0000682 self.warning_manager.enable_warnings(warning_type)
jadmanski16a7ff72009-04-01 18:19:53 +0000683 self.record("INFO", None, None,
684 "enabling %s warnings" % warning_type,
685 {"warnings.enable": warning_type})
jadmanskif37df842009-02-11 00:03:26 +0000686
687
jadmanski779bd292009-03-19 17:33:33 +0000688 def get_status_log_path(self, subdir=None):
689 """Return the path to the job status log.
690
691 @param subdir - Optional paramter indicating that you want the path
692 to a subdirectory status log.
693
694 @returns The path where the status log should be.
695 """
mbligh210bae62009-04-01 18:33:13 +0000696 if self.resultdir:
697 if subdir:
698 return os.path.join(self.resultdir, subdir, "status.log")
699 else:
700 return os.path.join(self.resultdir, "status.log")
jadmanski779bd292009-03-19 17:33:33 +0000701 else:
mbligh210bae62009-04-01 18:33:13 +0000702 return None
jadmanski779bd292009-03-19 17:33:33 +0000703
704
jadmanski6bb32d72009-03-19 20:25:24 +0000705 def _update_uncollected_logs_list(self, update_func):
706 """Updates the uncollected logs list in a multi-process safe manner.
707
708 @param update_func - a function that updates the list of uncollected
709 logs. Should take one parameter, the list to be updated.
710 """
mbligha788dc42009-03-26 21:10:16 +0000711 if self.uncollected_log_file:
712 log_file = open(self.uncollected_log_file, "r+")
713 fcntl.flock(log_file, fcntl.LOCK_EX)
jadmanski6bb32d72009-03-19 20:25:24 +0000714 try:
715 uncollected_logs = pickle.load(log_file)
716 update_func(uncollected_logs)
717 log_file.seek(0)
718 log_file.truncate()
719 pickle.dump(uncollected_logs, log_file)
720 finally:
721 fcntl.flock(log_file, fcntl.LOCK_UN)
722 log_file.close()
723
724
725 def add_client_log(self, hostname, remote_path, local_path):
726 """Adds a new set of client logs to the list of uncollected logs,
727 to allow for future log recovery.
728
729 @param host - the hostname of the machine holding the logs
730 @param remote_path - the directory on the remote machine holding logs
731 @param local_path - the local directory to copy the logs into
732 """
733 def update_func(logs_list):
734 logs_list.append((hostname, remote_path, local_path))
735 self._update_uncollected_logs_list(update_func)
736
737
738 def remove_client_log(self, hostname, remote_path, local_path):
739 """Removes a set of client logs from the list of uncollected logs,
740 to allow for future log recovery.
741
742 @param host - the hostname of the machine holding the logs
743 @param remote_path - the directory on the remote machine holding logs
744 @param local_path - the local directory to copy the logs into
745 """
746 def update_func(logs_list):
747 logs_list.remove((hostname, remote_path, local_path))
748 self._update_uncollected_logs_list(update_func)
749
750
jadmanski10646442008-08-13 14:05:21 +0000751 def _render_record(self, status_code, subdir, operation, status='',
752 epoch_time=None, record_prefix=None,
753 optional_fields=None):
754 """
755 Internal Function to generate a record to be written into a
756 status log. For use by server_job.* classes only.
757 """
758 if subdir:
759 if re.match(r'[\n\t]', subdir):
mbligh2b92b862008-11-22 13:25:32 +0000760 raise ValueError('Invalid character in subdir string')
jadmanski10646442008-08-13 14:05:21 +0000761 substr = subdir
762 else:
763 substr = '----'
764
mbligh1b3b3762008-09-25 02:46:34 +0000765 if not log.is_valid_status(status_code):
mbligh2b92b862008-11-22 13:25:32 +0000766 raise ValueError('Invalid status code supplied: %s' % status_code)
jadmanski10646442008-08-13 14:05:21 +0000767 if not operation:
768 operation = '----'
769 if re.match(r'[\n\t]', operation):
mbligh2b92b862008-11-22 13:25:32 +0000770 raise ValueError('Invalid character in operation string')
jadmanski10646442008-08-13 14:05:21 +0000771 operation = operation.rstrip()
772 status = status.rstrip()
773 status = re.sub(r"\t", " ", status)
774 # Ensure any continuation lines are marked so we can
775 # detect them in the status file to ensure it is parsable.
776 status = re.sub(r"\n", "\n" + self.record_prefix + " ", status)
777
778 if not optional_fields:
779 optional_fields = {}
780
781 # Generate timestamps for inclusion in the logs
782 if epoch_time is None:
783 epoch_time = int(time.time())
784 local_time = time.localtime(epoch_time)
785 optional_fields["timestamp"] = str(epoch_time)
786 optional_fields["localtime"] = time.strftime("%b %d %H:%M:%S",
787 local_time)
788
789 fields = [status_code, substr, operation]
790 fields += ["%s=%s" % x for x in optional_fields.iteritems()]
791 fields.append(status)
792
793 if record_prefix is None:
794 record_prefix = self.record_prefix
795
796 msg = '\t'.join(str(x) for x in fields)
jadmanski10646442008-08-13 14:05:21 +0000797 return record_prefix + msg + '\n'
798
799
800 def _record_prerendered(self, msg):
801 """
802 Record a pre-rendered msg into the status logs. The only
803 change this makes to the message is to add on the local
804 indentation. Should not be called outside of server_job.*
805 classes. Unlike _record, this does not write the message
806 to standard output.
807 """
808 lines = []
jadmanski779bd292009-03-19 17:33:33 +0000809 status_file = self.get_status_log_path()
jadmanski10646442008-08-13 14:05:21 +0000810 status_log = open(status_file, 'a')
811 for line in msg.splitlines():
812 line = self.record_prefix + line + '\n'
813 lines.append(line)
814 status_log.write(line)
815 status_log.close()
816 self.__parse_status(lines)
817
818
mbligh084bc172008-10-18 14:02:45 +0000819 def _fill_server_control_namespace(self, namespace, protect=True):
mbligh2b92b862008-11-22 13:25:32 +0000820 """
821 Prepare a namespace to be used when executing server control files.
mbligh084bc172008-10-18 14:02:45 +0000822
823 This sets up the control file API by importing modules and making them
824 available under the appropriate names within namespace.
825
826 For use by _execute_code().
827
828 Args:
829 namespace: The namespace dictionary to fill in.
830 protect: Boolean. If True (the default) any operation that would
831 clobber an existing entry in namespace will cause an error.
832 Raises:
833 error.AutoservError: When a name would be clobbered by import.
834 """
835 def _import_names(module_name, names=()):
mbligh2b92b862008-11-22 13:25:32 +0000836 """
837 Import a module and assign named attributes into namespace.
mbligh084bc172008-10-18 14:02:45 +0000838
839 Args:
840 module_name: The string module name.
841 names: A limiting list of names to import from module_name. If
842 empty (the default), all names are imported from the module
843 similar to a "from foo.bar import *" statement.
844 Raises:
845 error.AutoservError: When a name being imported would clobber
846 a name already in namespace.
847 """
848 module = __import__(module_name, {}, {}, names)
849
850 # No names supplied? Import * from the lowest level module.
851 # (Ugh, why do I have to implement this part myself?)
852 if not names:
853 for submodule_name in module_name.split('.')[1:]:
854 module = getattr(module, submodule_name)
855 if hasattr(module, '__all__'):
856 names = getattr(module, '__all__')
857 else:
858 names = dir(module)
859
860 # Install each name into namespace, checking to make sure it
861 # doesn't override anything that already exists.
862 for name in names:
863 # Check for conflicts to help prevent future problems.
864 if name in namespace and protect:
865 if namespace[name] is not getattr(module, name):
866 raise error.AutoservError('importing name '
867 '%s from %s %r would override %r' %
868 (name, module_name, getattr(module, name),
869 namespace[name]))
870 else:
871 # Encourage cleanliness and the use of __all__ for a
872 # more concrete API with less surprises on '*' imports.
873 warnings.warn('%s (%r) being imported from %s for use '
874 'in server control files is not the '
875 'first occurrance of that import.' %
876 (name, namespace[name], module_name))
877
878 namespace[name] = getattr(module, name)
879
880
881 # This is the equivalent of prepending a bunch of import statements to
882 # the front of the control script.
883 namespace.update(os=os, sys=sys)
884 _import_names('autotest_lib.server',
885 ('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',
886 'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))
887 _import_names('autotest_lib.server.subcommand',
888 ('parallel', 'parallel_simple', 'subcommand'))
889 _import_names('autotest_lib.server.utils',
890 ('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))
891 _import_names('autotest_lib.client.common_lib.error')
892 _import_names('autotest_lib.client.common_lib.barrier', ('barrier',))
893
894 # Inject ourself as the job object into other classes within the API.
895 # (Yuck, this injection is a gross thing be part of a public API. -gps)
896 #
897 # XXX Base & SiteAutotest do not appear to use .job. Who does?
898 namespace['autotest'].Autotest.job = self
899 # server.hosts.base_classes.Host uses .job.
900 namespace['hosts'].Host.job = self
901
902
903 def _execute_code(self, code_file, namespace, protect=True):
mbligh2b92b862008-11-22 13:25:32 +0000904 """
905 Execute code using a copy of namespace as a server control script.
mbligh084bc172008-10-18 14:02:45 +0000906
907 Unless protect_namespace is explicitly set to False, the dict will not
908 be modified.
909
910 Args:
911 code_file: The filename of the control file to execute.
912 namespace: A dict containing names to make available during execution.
913 protect: Boolean. If True (the default) a copy of the namespace dict
914 is used during execution to prevent the code from modifying its
915 contents outside of this function. If False the raw dict is
916 passed in and modifications will be allowed.
917 """
918 if protect:
919 namespace = namespace.copy()
920 self._fill_server_control_namespace(namespace, protect=protect)
921 # TODO: Simplify and get rid of the special cases for only 1 machine.
showard3e66e8c2008-10-27 19:20:51 +0000922 if len(self.machines) > 1:
mbligh084bc172008-10-18 14:02:45 +0000923 machines_text = '\n'.join(self.machines) + '\n'
924 # Only rewrite the file if it does not match our machine list.
925 try:
926 machines_f = open(MACHINES_FILENAME, 'r')
927 existing_machines_text = machines_f.read()
928 machines_f.close()
929 except EnvironmentError:
930 existing_machines_text = None
931 if machines_text != existing_machines_text:
932 utils.open_write_close(MACHINES_FILENAME, machines_text)
933 execfile(code_file, namespace, namespace)
jadmanski10646442008-08-13 14:05:21 +0000934
935
936 def _record(self, status_code, subdir, operation, status='',
937 epoch_time=None, optional_fields=None):
938 """
939 Actual function for recording a single line into the status
940 logs. Should never be called directly, only by job.record as
941 this would bypass the console monitor logging.
942 """
943
mbligh2b92b862008-11-22 13:25:32 +0000944 msg = self._render_record(status_code, subdir, operation, status,
945 epoch_time, optional_fields=optional_fields)
jadmanski10646442008-08-13 14:05:21 +0000946
jadmanski779bd292009-03-19 17:33:33 +0000947 status_file = self.get_status_log_path()
jadmanski10646442008-08-13 14:05:21 +0000948 sys.stdout.write(msg)
mbligh210bae62009-04-01 18:33:13 +0000949 if status_file:
950 open(status_file, "a").write(msg)
jadmanski10646442008-08-13 14:05:21 +0000951 if subdir:
jadmanski779bd292009-03-19 17:33:33 +0000952 sub_status_file = self.get_status_log_path(subdir)
953 open(sub_status_file, "a").write(msg)
jadmanski10646442008-08-13 14:05:21 +0000954 self.__parse_status(msg.splitlines())
955
956
957 def __parse_status(self, new_lines):
958 if not self.using_parser:
959 return
960 new_tests = self.parser.process_lines(new_lines)
961 for test in new_tests:
962 self.__insert_test(test)
963
964
965 def __insert_test(self, test):
mbligh2b92b862008-11-22 13:25:32 +0000966 """
967 An internal method to insert a new test result into the
jadmanski10646442008-08-13 14:05:21 +0000968 database. This method will not raise an exception, even if an
969 error occurs during the insert, to avoid failing a test
970 simply because of unexpected database issues."""
showard21baa452008-10-21 00:08:39 +0000971 self.num_tests_run += 1
972 if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):
973 self.num_tests_failed += 1
jadmanski10646442008-08-13 14:05:21 +0000974 try:
975 self.results_db.insert_test(self.job_model, test)
976 except Exception:
977 msg = ("WARNING: An unexpected error occured while "
978 "inserting test results into the database. "
979 "Ignoring error.\n" + traceback.format_exc())
980 print >> sys.stderr, msg
981
mblighcaa62c22008-04-07 21:51:17 +0000982
mbligha7007722009-01-13 00:37:11 +0000983site_server_job = utils.import_site_class(
984 __file__, "autotest_lib.server.site_server_job", "site_server_job",
985 base_server_job)
jadmanski0afbb632008-06-06 21:10:57 +0000986
jadmanski10646442008-08-13 14:05:21 +0000987class server_job(site_server_job, base_server_job):
jadmanski0afbb632008-06-06 21:10:57 +0000988 pass
jadmanskif37df842009-02-11 00:03:26 +0000989
990
991class warning_manager(object):
992 """Class for controlling warning logs. Manages the enabling and disabling
993 of warnings."""
994 def __init__(self):
995 # a map of warning types to a list of disabled time intervals
996 self.disabled_warnings = {}
997
998
999 def is_valid(self, timestamp, warning_type):
1000 """Indicates if a warning (based on the time it occured and its type)
1001 is a valid warning. A warning is considered "invalid" if this type of
1002 warning was marked as "disabled" at the time the warning occured."""
1003 disabled_intervals = self.disabled_warnings.get(warning_type, [])
1004 for start, end in disabled_intervals:
1005 if timestamp >= start and (end is None or timestamp < end):
1006 return False
1007 return True
1008
1009
1010 def disable_warnings(self, warning_type, current_time_func=time.time):
1011 """As of now, disables all further warnings of this type."""
1012 intervals = self.disabled_warnings.setdefault(warning_type, [])
1013 if not intervals or intervals[-1][1] is not None:
jadmanski16a7ff72009-04-01 18:19:53 +00001014 intervals.append((int(current_time_func()), None))
jadmanskif37df842009-02-11 00:03:26 +00001015
1016
1017 def enable_warnings(self, warning_type, current_time_func=time.time):
1018 """As of now, enables all further warnings of this type."""
1019 intervals = self.disabled_warnings.get(warning_type, [])
1020 if intervals and intervals[-1][1] is None:
jadmanski16a7ff72009-04-01 18:19:53 +00001021 intervals[-1] = (intervals[-1][0], int(current_time_func()))