blob: e066ced4a796d20cdd68b3c471b21c18bfc84e50 [file] [log] [blame]
mbligh57e78662008-06-17 19:53:49 +00001"""
2The main job wrapper for the server side.
3
4This is the core infrastructure. Derived from the client side job.py
5
6Copyright Martin J. Bligh, Andy Whitcroft 2007
7"""
8
jadmanski6bb32d72009-03-19 20:25:24 +00009import getpass, os, sys, re, stat, tempfile, time, select, subprocess
showardb18134f2009-03-20 20:52:18 +000010import traceback, shutil, warnings, fcntl, pickle, logging, logging.config
jadmanskic09fc152008-10-15 17:56:59 +000011from autotest_lib.client.bin import fd_stack, sysinfo
mbligh09108442008-10-15 16:27:38 +000012from autotest_lib.client.common_lib import error, log, utils, packages
jadmanski043e1132008-11-19 17:10:32 +000013from autotest_lib.server import test, subcommand, profilers
jadmanski10646442008-08-13 14:05:21 +000014from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils
jadmanski10646442008-08-13 14:05:21 +000015
16
mbligh084bc172008-10-18 14:02:45 +000017def _control_segment_path(name):
18 """Get the pathname of the named control segment file."""
jadmanski10646442008-08-13 14:05:21 +000019 server_dir = os.path.dirname(os.path.abspath(__file__))
mbligh084bc172008-10-18 14:02:45 +000020 return os.path.join(server_dir, "control_segments", name)
jadmanski10646442008-08-13 14:05:21 +000021
22
mbligh084bc172008-10-18 14:02:45 +000023CLIENT_CONTROL_FILENAME = 'control'
24SERVER_CONTROL_FILENAME = 'control.srv'
25MACHINES_FILENAME = '.machines'
jadmanski10646442008-08-13 14:05:21 +000026
mbligh084bc172008-10-18 14:02:45 +000027CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')
28CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')
29CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')
mbligh084bc172008-10-18 14:02:45 +000030INSTALL_CONTROL_FILE = _control_segment_path('install')
showard45ae8192008-11-05 19:32:53 +000031CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')
jadmanski10646442008-08-13 14:05:21 +000032
mbligh084bc172008-10-18 14:02:45 +000033VERIFY_CONTROL_FILE = _control_segment_path('verify')
mbligh084bc172008-10-18 14:02:45 +000034REPAIR_CONTROL_FILE = _control_segment_path('repair')
jadmanski10646442008-08-13 14:05:21 +000035
36
mbligh062ed152009-01-13 00:57:14 +000037# by default provide a stub that generates no site data
38def _get_site_job_data_dummy(job):
39 return {}
40
41
jadmanski10646442008-08-13 14:05:21 +000042# load up site-specific code for generating site-specific job data
mbligh062ed152009-01-13 00:57:14 +000043get_site_job_data = utils.import_site_function(__file__,
jadmanskic0a623d2009-03-03 21:11:48 +000044 "autotest_lib.server.site_server_job", "get_site_job_data",
mbligh062ed152009-01-13 00:57:14 +000045 _get_site_job_data_dummy)
jadmanski10646442008-08-13 14:05:21 +000046
47
48class base_server_job(object):
mbligh2b92b862008-11-22 13:25:32 +000049 """
50 The actual job against which we do everything.
jadmanski10646442008-08-13 14:05:21 +000051
52 Properties:
53 autodir
54 The top level autotest directory (/usr/local/autotest).
55 serverdir
56 <autodir>/server/
57 clientdir
58 <autodir>/client/
59 conmuxdir
60 <autodir>/conmux/
61 testdir
62 <autodir>/server/tests/
63 site_testdir
64 <autodir>/server/site_tests/
65 control
66 the control file for this job
mblighb5dac432008-11-27 00:38:44 +000067 drop_caches_between_iterations
68 drop the pagecache between each iteration
jadmanski10646442008-08-13 14:05:21 +000069 """
70
71 STATUS_VERSION = 1
jadmanski16a7ff72009-04-01 18:19:53 +000072 WARNING_DISABLE_DELAY = 5
jadmanski10646442008-08-13 14:05:21 +000073
74 def __init__(self, control, args, resultdir, label, user, machines,
75 client=False, parse_job='',
76 ssh_user='root', ssh_port=22, ssh_pass=''):
77 """
mblighb5dac432008-11-27 00:38:44 +000078 Server side job object.
79
80 Parameters:
81 control: The control file (pathname of)
82 args: args to pass to the control file
83 resultdir: where to throw the results
84 label: label for the job
85 user: Username for the job (email address)
86 client: True if a client-side control file
jadmanski10646442008-08-13 14:05:21 +000087 """
88 path = os.path.dirname(__file__)
89 self.autodir = os.path.abspath(os.path.join(path, '..'))
90 self.serverdir = os.path.join(self.autodir, 'server')
91 self.testdir = os.path.join(self.serverdir, 'tests')
92 self.site_testdir = os.path.join(self.serverdir, 'site_tests')
93 self.tmpdir = os.path.join(self.serverdir, 'tmp')
94 self.conmuxdir = os.path.join(self.autodir, 'conmux')
95 self.clientdir = os.path.join(self.autodir, 'client')
96 self.toolsdir = os.path.join(self.autodir, 'client/tools')
97 if control:
jadmanskie432dd22009-01-30 15:04:51 +000098 self.control = self._load_control_file(control)
jadmanski10646442008-08-13 14:05:21 +000099 else:
showard45ae8192008-11-05 19:32:53 +0000100 self.control = ''
jadmanski10646442008-08-13 14:05:21 +0000101 self.resultdir = resultdir
mbligha788dc42009-03-26 21:10:16 +0000102 self.uncollected_log_file = None
mbligh80e1eba2008-11-19 00:26:18 +0000103 if resultdir:
mbligha788dc42009-03-26 21:10:16 +0000104 self.uncollected_log_file = os.path.join(resultdir, "uncollected_logs")
105 self.debugdir = os.path.join(resultdir, 'debug')
106
mbligh80e1eba2008-11-19 00:26:18 +0000107 if not os.path.exists(resultdir):
108 os.mkdir(resultdir)
jadmanskidef0c3c2009-03-25 20:07:10 +0000109 if not os.path.exists(self.uncollected_log_file):
110 log_file = open(self.uncollected_log_file, "w")
111 pickle.dump([], log_file)
112 log_file.close()
mbligh80e1eba2008-11-19 00:26:18 +0000113 if not os.path.exists(self.debugdir):
114 os.mkdir(self.debugdir)
jadmanski10646442008-08-13 14:05:21 +0000115 self.label = label
116 self.user = user
117 self.args = args
118 self.machines = machines
119 self.client = client
120 self.record_prefix = ''
121 self.warning_loggers = set()
jadmanskif37df842009-02-11 00:03:26 +0000122 self.warning_manager = warning_manager()
jadmanski10646442008-08-13 14:05:21 +0000123 self.ssh_user = ssh_user
124 self.ssh_port = ssh_port
125 self.ssh_pass = ssh_pass
jadmanski23afbec2008-09-17 18:12:07 +0000126 self.run_test_cleanup = True
mbligh09108442008-10-15 16:27:38 +0000127 self.last_boot_tag = None
jadmanski53aaf382008-11-17 16:22:31 +0000128 self.hosts = set()
mblighb5dac432008-11-27 00:38:44 +0000129 self.drop_caches_between_iterations = False
jadmanski10646442008-08-13 14:05:21 +0000130
131 self.stdout = fd_stack.fd_stack(1, sys.stdout)
132 self.stderr = fd_stack.fd_stack(2, sys.stderr)
133
mbligh80e1eba2008-11-19 00:26:18 +0000134 if resultdir:
135 self.sysinfo = sysinfo.sysinfo(self.resultdir)
jadmanski043e1132008-11-19 17:10:32 +0000136 self.profilers = profilers.profilers(self)
jadmanskic09fc152008-10-15 17:56:59 +0000137
jadmanski025099d2008-09-23 14:13:48 +0000138 if not os.access(self.tmpdir, os.W_OK):
139 try:
140 os.makedirs(self.tmpdir, 0700)
141 except os.error, e:
142 # Thrown if the directory already exists, which it may.
143 pass
144
mbligh2b92b862008-11-22 13:25:32 +0000145 if not (os.access(self.tmpdir, os.W_OK) and os.path.isdir(self.tmpdir)):
jadmanski025099d2008-09-23 14:13:48 +0000146 self.tmpdir = os.path.join(tempfile.gettempdir(),
147 'autotest-' + getpass.getuser())
148 try:
149 os.makedirs(self.tmpdir, 0700)
150 except os.error, e:
151 # Thrown if the directory already exists, which it may.
152 # If the problem was something other than the
153 # directory already existing, this chmod should throw as well
154 # exception.
155 os.chmod(self.tmpdir, stat.S_IRWXU)
156
jadmanski10646442008-08-13 14:05:21 +0000157 job_data = {'label' : label, 'user' : user,
158 'hostname' : ','.join(machines),
showard170873e2009-01-07 00:22:26 +0000159 'status_version' : str(self.STATUS_VERSION),
160 'job_started' : str(int(time.time()))}
mbligh80e1eba2008-11-19 00:26:18 +0000161 if self.resultdir:
jadmanski58962982009-04-21 19:54:34 +0000162 # only write these keyvals out on the first job in a resultdir
163 if 'job_started' not in utils.read_keyval(self.resultdir):
164 job_data.update(get_site_job_data(self))
165 utils.write_keyval(self.resultdir, job_data)
jadmanski10646442008-08-13 14:05:21 +0000166
167 self.parse_job = parse_job
168 if self.parse_job and len(machines) == 1:
169 self.using_parser = True
170 self.init_parser(resultdir)
171 else:
172 self.using_parser = False
mbligh2b92b862008-11-22 13:25:32 +0000173 self.pkgmgr = packages.PackageManager(self.autodir,
174 run_function_dargs={'timeout':600})
jadmanski10646442008-08-13 14:05:21 +0000175 self.pkgdir = os.path.join(self.autodir, 'packages')
176
showard21baa452008-10-21 00:08:39 +0000177 self.num_tests_run = 0
178 self.num_tests_failed = 0
179
jadmanski550fdc22008-11-20 16:32:08 +0000180 self._register_subcommand_hooks()
181
182
jadmanskie432dd22009-01-30 15:04:51 +0000183 @staticmethod
184 def _load_control_file(path):
185 f = open(path)
186 try:
187 control_file = f.read()
188 finally:
189 f.close()
190 return re.sub('\r', '', control_file)
191
192
jadmanski550fdc22008-11-20 16:32:08 +0000193 def _register_subcommand_hooks(self):
mbligh2b92b862008-11-22 13:25:32 +0000194 """
195 Register some hooks into the subcommand modules that allow us
196 to properly clean up self.hosts created in forked subprocesses.
197 """
jadmanski550fdc22008-11-20 16:32:08 +0000198 def on_fork(cmd):
199 self._existing_hosts_on_fork = set(self.hosts)
200 def on_join(cmd):
201 new_hosts = self.hosts - self._existing_hosts_on_fork
202 for host in new_hosts:
203 host.close()
204 subcommand.subcommand.register_fork_hook(on_fork)
205 subcommand.subcommand.register_join_hook(on_join)
206
jadmanski10646442008-08-13 14:05:21 +0000207
208 def init_parser(self, resultdir):
mbligh2b92b862008-11-22 13:25:32 +0000209 """
210 Start the continuous parsing of resultdir. This sets up
jadmanski10646442008-08-13 14:05:21 +0000211 the database connection and inserts the basic job object into
mbligh2b92b862008-11-22 13:25:32 +0000212 the database if necessary.
213 """
jadmanski10646442008-08-13 14:05:21 +0000214 # redirect parser debugging to .parse.log
215 parse_log = os.path.join(resultdir, '.parse.log')
216 parse_log = open(parse_log, 'w', 0)
217 tko_utils.redirect_parser_debugging(parse_log)
218 # create a job model object and set up the db
219 self.results_db = tko_db.db(autocommit=True)
220 self.parser = status_lib.parser(self.STATUS_VERSION)
221 self.job_model = self.parser.make_job(resultdir)
222 self.parser.start(self.job_model)
223 # check if a job already exists in the db and insert it if
224 # it does not
225 job_idx = self.results_db.find_job(self.parse_job)
226 if job_idx is None:
mbligh2b92b862008-11-22 13:25:32 +0000227 self.results_db.insert_job(self.parse_job, self.job_model)
jadmanski10646442008-08-13 14:05:21 +0000228 else:
mbligh2b92b862008-11-22 13:25:32 +0000229 machine_idx = self.results_db.lookup_machine(self.job_model.machine)
jadmanski10646442008-08-13 14:05:21 +0000230 self.job_model.index = job_idx
231 self.job_model.machine_idx = machine_idx
232
233
234 def cleanup_parser(self):
mbligh2b92b862008-11-22 13:25:32 +0000235 """
236 This should be called after the server job is finished
jadmanski10646442008-08-13 14:05:21 +0000237 to carry out any remaining cleanup (e.g. flushing any
mbligh2b92b862008-11-22 13:25:32 +0000238 remaining test results to the results db)
239 """
jadmanski10646442008-08-13 14:05:21 +0000240 if not self.using_parser:
241 return
242 final_tests = self.parser.end()
243 for test in final_tests:
244 self.__insert_test(test)
245 self.using_parser = False
246
247
248 def verify(self):
249 if not self.machines:
mbligh084bc172008-10-18 14:02:45 +0000250 raise error.AutoservError('No machines specified to verify')
mbligh0fce4112008-11-27 00:37:17 +0000251 if self.resultdir:
252 os.chdir(self.resultdir)
jadmanski10646442008-08-13 14:05:21 +0000253 try:
jadmanskicdd0c402008-09-19 21:21:31 +0000254 namespace = {'machines' : self.machines, 'job' : self,
255 'ssh_user' : self.ssh_user,
256 'ssh_port' : self.ssh_port,
257 'ssh_pass' : self.ssh_pass}
mbligh084bc172008-10-18 14:02:45 +0000258 self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)
jadmanski10646442008-08-13 14:05:21 +0000259 except Exception, e:
mbligh2b92b862008-11-22 13:25:32 +0000260 msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())
jadmanski10646442008-08-13 14:05:21 +0000261 self.record('ABORT', None, None, msg)
262 raise
263
264
265 def repair(self, host_protection):
266 if not self.machines:
267 raise error.AutoservError('No machines specified to repair')
mbligh0fce4112008-11-27 00:37:17 +0000268 if self.resultdir:
269 os.chdir(self.resultdir)
jadmanski10646442008-08-13 14:05:21 +0000270 namespace = {'machines': self.machines, 'job': self,
271 'ssh_user': self.ssh_user, 'ssh_port': self.ssh_port,
272 'ssh_pass': self.ssh_pass,
273 'protection_level': host_protection}
mbligh25c0b8c2009-01-24 01:44:17 +0000274
mbligh0931b0a2009-04-08 17:44:48 +0000275 self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)
jadmanski10646442008-08-13 14:05:21 +0000276
277
278 def precheck(self):
279 """
280 perform any additional checks in derived classes.
281 """
282 pass
283
284
285 def enable_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000286 """
287 Start or restart external logging mechanism.
jadmanski10646442008-08-13 14:05:21 +0000288 """
289 pass
290
291
292 def disable_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000293 """
294 Pause or stop external logging mechanism.
jadmanski10646442008-08-13 14:05:21 +0000295 """
296 pass
297
298
jadmanski23afbec2008-09-17 18:12:07 +0000299 def enable_test_cleanup(self):
mbligh2b92b862008-11-22 13:25:32 +0000300 """
301 By default tests run test.cleanup
302 """
jadmanski23afbec2008-09-17 18:12:07 +0000303 self.run_test_cleanup = True
304
305
306 def disable_test_cleanup(self):
mbligh2b92b862008-11-22 13:25:32 +0000307 """
308 By default tests do not run test.cleanup
309 """
jadmanski23afbec2008-09-17 18:12:07 +0000310 self.run_test_cleanup = False
311
312
jadmanski10646442008-08-13 14:05:21 +0000313 def use_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000314 """
315 Return True if external logging should be used.
jadmanski10646442008-08-13 14:05:21 +0000316 """
317 return False
318
319
320 def parallel_simple(self, function, machines, log=True, timeout=None):
mbligh2b92b862008-11-22 13:25:32 +0000321 """
322 Run 'function' using parallel_simple, with an extra wrapper to handle
323 the necessary setup for continuous parsing, if possible. If continuous
324 parsing is already properly initialized then this should just work.
325 """
326 is_forking = not (len(machines) == 1 and self.machines == machines)
jadmanski4dd1a002008-09-05 20:27:30 +0000327 if self.parse_job and is_forking and log:
jadmanski10646442008-08-13 14:05:21 +0000328 def wrapper(machine):
329 self.parse_job += "/" + machine
330 self.using_parser = True
331 self.machines = [machine]
mbligh2b92b862008-11-22 13:25:32 +0000332 self.resultdir = os.path.join(self.resultdir, machine)
jadmanski609a5f42008-08-26 20:52:42 +0000333 os.chdir(self.resultdir)
showard2bab8f42008-11-12 18:15:22 +0000334 utils.write_keyval(self.resultdir, {"hostname": machine})
jadmanski10646442008-08-13 14:05:21 +0000335 self.init_parser(self.resultdir)
336 result = function(machine)
337 self.cleanup_parser()
338 return result
jadmanski4dd1a002008-09-05 20:27:30 +0000339 elif len(machines) > 1 and log:
jadmanski10646442008-08-13 14:05:21 +0000340 def wrapper(machine):
341 self.resultdir = os.path.join(self.resultdir, machine)
jadmanski609a5f42008-08-26 20:52:42 +0000342 os.chdir(self.resultdir)
mbligh838d82d2009-03-11 17:14:31 +0000343 machine_data = {'hostname' : machine,
344 'status_version' : str(self.STATUS_VERSION)}
345 utils.write_keyval(self.resultdir, machine_data)
jadmanski10646442008-08-13 14:05:21 +0000346 result = function(machine)
347 return result
348 else:
349 wrapper = function
350 subcommand.parallel_simple(wrapper, machines, log, timeout)
351
352
jadmanskie432dd22009-01-30 15:04:51 +0000353 USE_TEMP_DIR = object()
mbligh2b92b862008-11-22 13:25:32 +0000354 def run(self, cleanup=False, install_before=False, install_after=False,
jadmanskie432dd22009-01-30 15:04:51 +0000355 collect_crashdumps=True, namespace={}, control=None,
jadmanskidef0c3c2009-03-25 20:07:10 +0000356 control_file_dir=None, only_collect_crashinfo=False):
jadmanski10646442008-08-13 14:05:21 +0000357 # use a copy so changes don't affect the original dictionary
358 namespace = namespace.copy()
359 machines = self.machines
jadmanskie432dd22009-01-30 15:04:51 +0000360 if control is None:
361 control = self.control
362 if control_file_dir is None:
363 control_file_dir = self.resultdir
jadmanski10646442008-08-13 14:05:21 +0000364
365 self.aborted = False
366 namespace['machines'] = machines
367 namespace['args'] = self.args
368 namespace['job'] = self
369 namespace['ssh_user'] = self.ssh_user
370 namespace['ssh_port'] = self.ssh_port
371 namespace['ssh_pass'] = self.ssh_pass
372 test_start_time = int(time.time())
373
mbligh80e1eba2008-11-19 00:26:18 +0000374 if self.resultdir:
375 os.chdir(self.resultdir)
jadmanski779bd292009-03-19 17:33:33 +0000376 # touch status.log so that the parser knows a job is running here
jadmanski382303a2009-04-21 19:53:39 +0000377 open(self.get_status_log_path(), 'a').close()
mbligh80e1eba2008-11-19 00:26:18 +0000378 self.enable_external_logging()
jadmanskie432dd22009-01-30 15:04:51 +0000379
jadmanskicdd0c402008-09-19 21:21:31 +0000380 collect_crashinfo = True
mblighaebe3b62008-12-22 14:45:40 +0000381 temp_control_file_dir = None
jadmanski10646442008-08-13 14:05:21 +0000382 try:
383 if install_before and machines:
mbligh084bc172008-10-18 14:02:45 +0000384 self._execute_code(INSTALL_CONTROL_FILE, namespace)
jadmanskie432dd22009-01-30 15:04:51 +0000385
jadmanskidef0c3c2009-03-25 20:07:10 +0000386 if not only_collect_crashinfo:
387 # determine the dir to write the control files to
388 cfd_specified = (control_file_dir
389 and control_file_dir is not self.USE_TEMP_DIR)
390 if cfd_specified:
391 temp_control_file_dir = None
392 else:
393 temp_control_file_dir = tempfile.mkdtemp(
394 suffix='temp_control_file_dir')
395 control_file_dir = temp_control_file_dir
396 server_control_file = os.path.join(control_file_dir,
397 SERVER_CONTROL_FILENAME)
398 client_control_file = os.path.join(control_file_dir,
399 CLIENT_CONTROL_FILENAME)
400 if self.client:
401 namespace['control'] = control
402 utils.open_write_close(client_control_file, control)
403 shutil.copy(CLIENT_WRAPPER_CONTROL_FILE,
404 server_control_file)
405 else:
406 utils.open_write_close(server_control_file, control)
407 self._execute_code(server_control_file, namespace)
jadmanski10646442008-08-13 14:05:21 +0000408
jadmanskidef0c3c2009-03-25 20:07:10 +0000409 # no error occured, so we don't need to collect crashinfo
410 collect_crashinfo = False
jadmanski10646442008-08-13 14:05:21 +0000411 finally:
mblighaebe3b62008-12-22 14:45:40 +0000412 if temp_control_file_dir:
jadmanskie432dd22009-01-30 15:04:51 +0000413 # Clean up temp directory used for copies of the control files
mblighaebe3b62008-12-22 14:45:40 +0000414 try:
415 shutil.rmtree(temp_control_file_dir)
416 except Exception, e:
jadmanskie432dd22009-01-30 15:04:51 +0000417 print 'Error %s removing dir %s' % (e,
418 temp_control_file_dir)
419
jadmanskicdd0c402008-09-19 21:21:31 +0000420 if machines and (collect_crashdumps or collect_crashinfo):
jadmanski10646442008-08-13 14:05:21 +0000421 namespace['test_start_time'] = test_start_time
jadmanskicdd0c402008-09-19 21:21:31 +0000422 if collect_crashinfo:
mbligh084bc172008-10-18 14:02:45 +0000423 # includes crashdumps
424 self._execute_code(CRASHINFO_CONTROL_FILE, namespace)
jadmanskicdd0c402008-09-19 21:21:31 +0000425 else:
mbligh084bc172008-10-18 14:02:45 +0000426 self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)
mbligha788dc42009-03-26 21:10:16 +0000427 if self.uncollected_log_file:
428 os.remove(self.uncollected_log_file)
jadmanski10646442008-08-13 14:05:21 +0000429 self.disable_external_logging()
showard45ae8192008-11-05 19:32:53 +0000430 if cleanup and machines:
431 self._execute_code(CLEANUP_CONTROL_FILE, namespace)
jadmanski10646442008-08-13 14:05:21 +0000432 if install_after and machines:
mbligh084bc172008-10-18 14:02:45 +0000433 self._execute_code(INSTALL_CONTROL_FILE, namespace)
jadmanski10646442008-08-13 14:05:21 +0000434
435
436 def run_test(self, url, *args, **dargs):
mbligh2b92b862008-11-22 13:25:32 +0000437 """
438 Summon a test object and run it.
jadmanski10646442008-08-13 14:05:21 +0000439
440 tag
441 tag to add to testname
442 url
443 url of the test to run
444 """
445
446 (group, testname) = self.pkgmgr.get_package_name(url, 'test')
jadmanski10646442008-08-13 14:05:21 +0000447
448 tag = dargs.pop('tag', None)
449 if tag:
mbligh8ad24202009-01-07 16:49:36 +0000450 testname += '.' + str(tag)
jadmanskide292df2008-08-26 20:51:14 +0000451 subdir = testname
jadmanski10646442008-08-13 14:05:21 +0000452
453 outputdir = os.path.join(self.resultdir, subdir)
454 if os.path.exists(outputdir):
455 msg = ("%s already exists, test <%s> may have"
mbligh2b92b862008-11-22 13:25:32 +0000456 " already run with tag <%s>" % (outputdir, testname, tag))
jadmanski10646442008-08-13 14:05:21 +0000457 raise error.TestError(msg)
458 os.mkdir(outputdir)
459
460 def group_func():
461 try:
462 test.runtest(self, url, tag, args, dargs)
463 except error.TestBaseException, e:
464 self.record(e.exit_status, subdir, testname, str(e))
465 raise
466 except Exception, e:
467 info = str(e) + "\n" + traceback.format_exc()
468 self.record('FAIL', subdir, testname, info)
469 raise
470 else:
mbligh2b92b862008-11-22 13:25:32 +0000471 self.record('GOOD', subdir, testname, 'completed successfully')
jadmanskide292df2008-08-26 20:51:14 +0000472
473 result, exc_info = self._run_group(testname, subdir, group_func)
474 if exc_info and isinstance(exc_info[1], error.TestBaseException):
475 return False
476 elif exc_info:
477 raise exc_info[0], exc_info[1], exc_info[2]
478 else:
479 return True
jadmanski10646442008-08-13 14:05:21 +0000480
481
482 def _run_group(self, name, subdir, function, *args, **dargs):
483 """\
484 Underlying method for running something inside of a group.
485 """
jadmanskide292df2008-08-26 20:51:14 +0000486 result, exc_info = None, None
jadmanski10646442008-08-13 14:05:21 +0000487 old_record_prefix = self.record_prefix
488 try:
489 self.record('START', subdir, name)
490 self.record_prefix += '\t'
491 try:
492 result = function(*args, **dargs)
493 finally:
494 self.record_prefix = old_record_prefix
495 except error.TestBaseException, e:
jadmanskib88d6dc2009-01-10 00:33:18 +0000496 self.record("END %s" % e.exit_status, subdir, name)
jadmanskide292df2008-08-26 20:51:14 +0000497 exc_info = sys.exc_info()
jadmanski10646442008-08-13 14:05:21 +0000498 except Exception, e:
499 err_msg = str(e) + '\n'
500 err_msg += traceback.format_exc()
501 self.record('END ABORT', subdir, name, err_msg)
502 raise error.JobError(name + ' failed\n' + traceback.format_exc())
503 else:
504 self.record('END GOOD', subdir, name)
505
jadmanskide292df2008-08-26 20:51:14 +0000506 return result, exc_info
jadmanski10646442008-08-13 14:05:21 +0000507
508
509 def run_group(self, function, *args, **dargs):
510 """\
511 function:
512 subroutine to run
513 *args:
514 arguments for the function
515 """
516
517 name = function.__name__
518
519 # Allow the tag for the group to be specified.
520 tag = dargs.pop('tag', None)
521 if tag:
522 name = tag
523
jadmanskide292df2008-08-26 20:51:14 +0000524 return self._run_group(name, None, function, *args, **dargs)[0]
jadmanski10646442008-08-13 14:05:21 +0000525
526
527 def run_reboot(self, reboot_func, get_kernel_func):
528 """\
529 A specialization of run_group meant specifically for handling
530 a reboot. Includes support for capturing the kernel version
531 after the reboot.
532
533 reboot_func: a function that carries out the reboot
534
535 get_kernel_func: a function that returns a string
536 representing the kernel version.
537 """
538
539 old_record_prefix = self.record_prefix
540 try:
541 self.record('START', None, 'reboot')
542 self.record_prefix += '\t'
543 reboot_func()
544 except Exception, e:
545 self.record_prefix = old_record_prefix
546 err_msg = str(e) + '\n' + traceback.format_exc()
547 self.record('END FAIL', None, 'reboot', err_msg)
jadmanski4b51d542009-04-08 14:17:16 +0000548 raise
jadmanski10646442008-08-13 14:05:21 +0000549 else:
550 kernel = get_kernel_func()
551 self.record_prefix = old_record_prefix
552 self.record('END GOOD', None, 'reboot',
553 optional_fields={"kernel": kernel})
554
555
jadmanskie432dd22009-01-30 15:04:51 +0000556 def run_control(self, path):
557 """Execute a control file found at path (relative to the autotest
558 path). Intended for executing a control file within a control file,
559 not for running the top-level job control file."""
560 path = os.path.join(self.autodir, path)
561 control_file = self._load_control_file(path)
562 self.run(control=control_file, control_file_dir=self.USE_TEMP_DIR)
563
564
jadmanskic09fc152008-10-15 17:56:59 +0000565 def add_sysinfo_command(self, command, logfile=None, on_every_test=False):
mbligh4395bbd2009-03-25 19:34:17 +0000566 self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),
jadmanskic09fc152008-10-15 17:56:59 +0000567 on_every_test)
568
569
570 def add_sysinfo_logfile(self, file, on_every_test=False):
571 self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)
572
573
574 def _add_sysinfo_loggable(self, loggable, on_every_test):
575 if on_every_test:
576 self.sysinfo.test_loggables.add(loggable)
577 else:
578 self.sysinfo.boot_loggables.add(loggable)
579
580
jadmanski10646442008-08-13 14:05:21 +0000581 def record(self, status_code, subdir, operation, status='',
582 optional_fields=None):
583 """
584 Record job-level status
585
586 The intent is to make this file both machine parseable and
587 human readable. That involves a little more complexity, but
588 really isn't all that bad ;-)
589
590 Format is <status code>\t<subdir>\t<operation>\t<status>
591
mbligh1b3b3762008-09-25 02:46:34 +0000592 status code: see common_lib.log.is_valid_status()
jadmanski10646442008-08-13 14:05:21 +0000593 for valid status definition
594
595 subdir: MUST be a relevant subdirectory in the results,
596 or None, which will be represented as '----'
597
598 operation: description of what you ran (e.g. "dbench", or
599 "mkfs -t foobar /dev/sda9")
600
601 status: error message or "completed sucessfully"
602
603 ------------------------------------------------------------
604
605 Initial tabs indicate indent levels for grouping, and is
606 governed by self.record_prefix
607
608 multiline messages have secondary lines prefaced by a double
609 space (' ')
610
611 Executing this method will trigger the logging of all new
612 warnings to date from the various console loggers.
613 """
614 # poll all our warning loggers for new warnings
615 warnings = self._read_warnings()
jadmanski2de83112009-04-01 18:21:04 +0000616 old_record_prefix = self.record_prefix
617 try:
618 if status_code.startswith("END "):
619 self.record_prefix += "\t"
620 for timestamp, msg in warnings:
621 self._record("WARN", None, None, msg, timestamp)
622 finally:
623 self.record_prefix = old_record_prefix
jadmanski10646442008-08-13 14:05:21 +0000624
625 # write out the actual status log line
626 self._record(status_code, subdir, operation, status,
627 optional_fields=optional_fields)
628
629
630 def _read_warnings(self):
jadmanskif37df842009-02-11 00:03:26 +0000631 """Poll all the warning loggers and extract any new warnings that have
632 been logged. If the warnings belong to a category that is currently
633 disabled, this method will discard them and they will no longer be
634 retrievable.
635
636 Returns a list of (timestamp, message) tuples, where timestamp is an
637 integer epoch timestamp."""
jadmanski10646442008-08-13 14:05:21 +0000638 warnings = []
639 while True:
640 # pull in a line of output from every logger that has
641 # output ready to be read
mbligh2b92b862008-11-22 13:25:32 +0000642 loggers, _, _ = select.select(self.warning_loggers, [], [], 0)
jadmanski10646442008-08-13 14:05:21 +0000643 closed_loggers = set()
644 for logger in loggers:
645 line = logger.readline()
646 # record any broken pipes (aka line == empty)
647 if len(line) == 0:
648 closed_loggers.add(logger)
649 continue
jadmanskif37df842009-02-11 00:03:26 +0000650 # parse out the warning
651 timestamp, msgtype, msg = line.split('\t', 2)
652 timestamp = int(timestamp)
653 # if the warning is valid, add it to the results
654 if self.warning_manager.is_valid(timestamp, msgtype):
655 warnings.append((timestamp, msg.strip()))
jadmanski10646442008-08-13 14:05:21 +0000656
657 # stop listening to loggers that are closed
658 self.warning_loggers -= closed_loggers
659
660 # stop if none of the loggers have any output left
661 if not loggers:
662 break
663
664 # sort into timestamp order
665 warnings.sort()
666 return warnings
667
668
jadmanski16a7ff72009-04-01 18:19:53 +0000669 def disable_warnings(self, warning_type):
jadmanskif37df842009-02-11 00:03:26 +0000670 self.warning_manager.disable_warnings(warning_type)
jadmanski16a7ff72009-04-01 18:19:53 +0000671 self.record("INFO", None, None,
672 "disabling %s warnings" % warning_type,
673 {"warnings.disable": warning_type})
674 time.sleep(self.WARNING_DISABLE_DELAY)
jadmanskif37df842009-02-11 00:03:26 +0000675
676
jadmanski16a7ff72009-04-01 18:19:53 +0000677 def enable_warnings(self, warning_type):
678 time.sleep(self.WARNING_DISABLE_DELAY)
jadmanskif37df842009-02-11 00:03:26 +0000679 self.warning_manager.enable_warnings(warning_type)
jadmanski16a7ff72009-04-01 18:19:53 +0000680 self.record("INFO", None, None,
681 "enabling %s warnings" % warning_type,
682 {"warnings.enable": warning_type})
jadmanskif37df842009-02-11 00:03:26 +0000683
684
jadmanski779bd292009-03-19 17:33:33 +0000685 def get_status_log_path(self, subdir=None):
686 """Return the path to the job status log.
687
688 @param subdir - Optional paramter indicating that you want the path
689 to a subdirectory status log.
690
691 @returns The path where the status log should be.
692 """
mbligh210bae62009-04-01 18:33:13 +0000693 if self.resultdir:
694 if subdir:
695 return os.path.join(self.resultdir, subdir, "status.log")
696 else:
697 return os.path.join(self.resultdir, "status.log")
jadmanski779bd292009-03-19 17:33:33 +0000698 else:
mbligh210bae62009-04-01 18:33:13 +0000699 return None
jadmanski779bd292009-03-19 17:33:33 +0000700
701
jadmanski6bb32d72009-03-19 20:25:24 +0000702 def _update_uncollected_logs_list(self, update_func):
703 """Updates the uncollected logs list in a multi-process safe manner.
704
705 @param update_func - a function that updates the list of uncollected
706 logs. Should take one parameter, the list to be updated.
707 """
mbligha788dc42009-03-26 21:10:16 +0000708 if self.uncollected_log_file:
709 log_file = open(self.uncollected_log_file, "r+")
710 fcntl.flock(log_file, fcntl.LOCK_EX)
jadmanski6bb32d72009-03-19 20:25:24 +0000711 try:
712 uncollected_logs = pickle.load(log_file)
713 update_func(uncollected_logs)
714 log_file.seek(0)
715 log_file.truncate()
716 pickle.dump(uncollected_logs, log_file)
jadmanski3bff9092009-04-22 18:09:47 +0000717 log_file.flush()
jadmanski6bb32d72009-03-19 20:25:24 +0000718 finally:
719 fcntl.flock(log_file, fcntl.LOCK_UN)
720 log_file.close()
721
722
723 def add_client_log(self, hostname, remote_path, local_path):
724 """Adds a new set of client logs to the list of uncollected logs,
725 to allow for future log recovery.
726
727 @param host - the hostname of the machine holding the logs
728 @param remote_path - the directory on the remote machine holding logs
729 @param local_path - the local directory to copy the logs into
730 """
731 def update_func(logs_list):
732 logs_list.append((hostname, remote_path, local_path))
733 self._update_uncollected_logs_list(update_func)
734
735
736 def remove_client_log(self, hostname, remote_path, local_path):
737 """Removes a set of client logs from the list of uncollected logs,
738 to allow for future log recovery.
739
740 @param host - the hostname of the machine holding the logs
741 @param remote_path - the directory on the remote machine holding logs
742 @param local_path - the local directory to copy the logs into
743 """
744 def update_func(logs_list):
745 logs_list.remove((hostname, remote_path, local_path))
746 self._update_uncollected_logs_list(update_func)
747
748
jadmanski10646442008-08-13 14:05:21 +0000749 def _render_record(self, status_code, subdir, operation, status='',
750 epoch_time=None, record_prefix=None,
751 optional_fields=None):
752 """
753 Internal Function to generate a record to be written into a
754 status log. For use by server_job.* classes only.
755 """
756 if subdir:
757 if re.match(r'[\n\t]', subdir):
mbligh2b92b862008-11-22 13:25:32 +0000758 raise ValueError('Invalid character in subdir string')
jadmanski10646442008-08-13 14:05:21 +0000759 substr = subdir
760 else:
761 substr = '----'
762
mbligh1b3b3762008-09-25 02:46:34 +0000763 if not log.is_valid_status(status_code):
mbligh2b92b862008-11-22 13:25:32 +0000764 raise ValueError('Invalid status code supplied: %s' % status_code)
jadmanski10646442008-08-13 14:05:21 +0000765 if not operation:
766 operation = '----'
767 if re.match(r'[\n\t]', operation):
mbligh2b92b862008-11-22 13:25:32 +0000768 raise ValueError('Invalid character in operation string')
jadmanski10646442008-08-13 14:05:21 +0000769 operation = operation.rstrip()
770 status = status.rstrip()
771 status = re.sub(r"\t", " ", status)
772 # Ensure any continuation lines are marked so we can
773 # detect them in the status file to ensure it is parsable.
774 status = re.sub(r"\n", "\n" + self.record_prefix + " ", status)
775
776 if not optional_fields:
777 optional_fields = {}
778
779 # Generate timestamps for inclusion in the logs
780 if epoch_time is None:
781 epoch_time = int(time.time())
782 local_time = time.localtime(epoch_time)
783 optional_fields["timestamp"] = str(epoch_time)
784 optional_fields["localtime"] = time.strftime("%b %d %H:%M:%S",
785 local_time)
786
787 fields = [status_code, substr, operation]
788 fields += ["%s=%s" % x for x in optional_fields.iteritems()]
789 fields.append(status)
790
791 if record_prefix is None:
792 record_prefix = self.record_prefix
793
794 msg = '\t'.join(str(x) for x in fields)
jadmanski10646442008-08-13 14:05:21 +0000795 return record_prefix + msg + '\n'
796
797
798 def _record_prerendered(self, msg):
799 """
800 Record a pre-rendered msg into the status logs. The only
801 change this makes to the message is to add on the local
802 indentation. Should not be called outside of server_job.*
803 classes. Unlike _record, this does not write the message
804 to standard output.
805 """
806 lines = []
jadmanski779bd292009-03-19 17:33:33 +0000807 status_file = self.get_status_log_path()
jadmanski10646442008-08-13 14:05:21 +0000808 status_log = open(status_file, 'a')
809 for line in msg.splitlines():
810 line = self.record_prefix + line + '\n'
811 lines.append(line)
812 status_log.write(line)
813 status_log.close()
814 self.__parse_status(lines)
815
816
mbligh084bc172008-10-18 14:02:45 +0000817 def _fill_server_control_namespace(self, namespace, protect=True):
mbligh2b92b862008-11-22 13:25:32 +0000818 """
819 Prepare a namespace to be used when executing server control files.
mbligh084bc172008-10-18 14:02:45 +0000820
821 This sets up the control file API by importing modules and making them
822 available under the appropriate names within namespace.
823
824 For use by _execute_code().
825
826 Args:
827 namespace: The namespace dictionary to fill in.
828 protect: Boolean. If True (the default) any operation that would
829 clobber an existing entry in namespace will cause an error.
830 Raises:
831 error.AutoservError: When a name would be clobbered by import.
832 """
833 def _import_names(module_name, names=()):
mbligh2b92b862008-11-22 13:25:32 +0000834 """
835 Import a module and assign named attributes into namespace.
mbligh084bc172008-10-18 14:02:45 +0000836
837 Args:
838 module_name: The string module name.
839 names: A limiting list of names to import from module_name. If
840 empty (the default), all names are imported from the module
841 similar to a "from foo.bar import *" statement.
842 Raises:
843 error.AutoservError: When a name being imported would clobber
844 a name already in namespace.
845 """
846 module = __import__(module_name, {}, {}, names)
847
848 # No names supplied? Import * from the lowest level module.
849 # (Ugh, why do I have to implement this part myself?)
850 if not names:
851 for submodule_name in module_name.split('.')[1:]:
852 module = getattr(module, submodule_name)
853 if hasattr(module, '__all__'):
854 names = getattr(module, '__all__')
855 else:
856 names = dir(module)
857
858 # Install each name into namespace, checking to make sure it
859 # doesn't override anything that already exists.
860 for name in names:
861 # Check for conflicts to help prevent future problems.
862 if name in namespace and protect:
863 if namespace[name] is not getattr(module, name):
864 raise error.AutoservError('importing name '
865 '%s from %s %r would override %r' %
866 (name, module_name, getattr(module, name),
867 namespace[name]))
868 else:
869 # Encourage cleanliness and the use of __all__ for a
870 # more concrete API with less surprises on '*' imports.
871 warnings.warn('%s (%r) being imported from %s for use '
872 'in server control files is not the '
873 'first occurrance of that import.' %
874 (name, namespace[name], module_name))
875
876 namespace[name] = getattr(module, name)
877
878
879 # This is the equivalent of prepending a bunch of import statements to
880 # the front of the control script.
881 namespace.update(os=os, sys=sys)
882 _import_names('autotest_lib.server',
883 ('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',
884 'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))
885 _import_names('autotest_lib.server.subcommand',
886 ('parallel', 'parallel_simple', 'subcommand'))
887 _import_names('autotest_lib.server.utils',
888 ('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))
889 _import_names('autotest_lib.client.common_lib.error')
890 _import_names('autotest_lib.client.common_lib.barrier', ('barrier',))
891
892 # Inject ourself as the job object into other classes within the API.
893 # (Yuck, this injection is a gross thing be part of a public API. -gps)
894 #
895 # XXX Base & SiteAutotest do not appear to use .job. Who does?
896 namespace['autotest'].Autotest.job = self
897 # server.hosts.base_classes.Host uses .job.
898 namespace['hosts'].Host.job = self
899
900
901 def _execute_code(self, code_file, namespace, protect=True):
mbligh2b92b862008-11-22 13:25:32 +0000902 """
903 Execute code using a copy of namespace as a server control script.
mbligh084bc172008-10-18 14:02:45 +0000904
905 Unless protect_namespace is explicitly set to False, the dict will not
906 be modified.
907
908 Args:
909 code_file: The filename of the control file to execute.
910 namespace: A dict containing names to make available during execution.
911 protect: Boolean. If True (the default) a copy of the namespace dict
912 is used during execution to prevent the code from modifying its
913 contents outside of this function. If False the raw dict is
914 passed in and modifications will be allowed.
915 """
916 if protect:
917 namespace = namespace.copy()
918 self._fill_server_control_namespace(namespace, protect=protect)
919 # TODO: Simplify and get rid of the special cases for only 1 machine.
showard3e66e8c2008-10-27 19:20:51 +0000920 if len(self.machines) > 1:
mbligh084bc172008-10-18 14:02:45 +0000921 machines_text = '\n'.join(self.machines) + '\n'
922 # Only rewrite the file if it does not match our machine list.
923 try:
924 machines_f = open(MACHINES_FILENAME, 'r')
925 existing_machines_text = machines_f.read()
926 machines_f.close()
927 except EnvironmentError:
928 existing_machines_text = None
929 if machines_text != existing_machines_text:
930 utils.open_write_close(MACHINES_FILENAME, machines_text)
931 execfile(code_file, namespace, namespace)
jadmanski10646442008-08-13 14:05:21 +0000932
933
934 def _record(self, status_code, subdir, operation, status='',
935 epoch_time=None, optional_fields=None):
936 """
937 Actual function for recording a single line into the status
938 logs. Should never be called directly, only by job.record as
939 this would bypass the console monitor logging.
940 """
941
mbligh2b92b862008-11-22 13:25:32 +0000942 msg = self._render_record(status_code, subdir, operation, status,
943 epoch_time, optional_fields=optional_fields)
jadmanski10646442008-08-13 14:05:21 +0000944
jadmanski779bd292009-03-19 17:33:33 +0000945 status_file = self.get_status_log_path()
jadmanski10646442008-08-13 14:05:21 +0000946 sys.stdout.write(msg)
mbligh210bae62009-04-01 18:33:13 +0000947 if status_file:
948 open(status_file, "a").write(msg)
jadmanski10646442008-08-13 14:05:21 +0000949 if subdir:
jadmanski779bd292009-03-19 17:33:33 +0000950 sub_status_file = self.get_status_log_path(subdir)
951 open(sub_status_file, "a").write(msg)
jadmanski10646442008-08-13 14:05:21 +0000952 self.__parse_status(msg.splitlines())
953
954
955 def __parse_status(self, new_lines):
956 if not self.using_parser:
957 return
958 new_tests = self.parser.process_lines(new_lines)
959 for test in new_tests:
960 self.__insert_test(test)
961
962
963 def __insert_test(self, test):
mbligh2b92b862008-11-22 13:25:32 +0000964 """
965 An internal method to insert a new test result into the
jadmanski10646442008-08-13 14:05:21 +0000966 database. This method will not raise an exception, even if an
967 error occurs during the insert, to avoid failing a test
968 simply because of unexpected database issues."""
showard21baa452008-10-21 00:08:39 +0000969 self.num_tests_run += 1
970 if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):
971 self.num_tests_failed += 1
jadmanski10646442008-08-13 14:05:21 +0000972 try:
973 self.results_db.insert_test(self.job_model, test)
974 except Exception:
975 msg = ("WARNING: An unexpected error occured while "
976 "inserting test results into the database. "
977 "Ignoring error.\n" + traceback.format_exc())
978 print >> sys.stderr, msg
979
mblighcaa62c22008-04-07 21:51:17 +0000980
mbligha7007722009-01-13 00:37:11 +0000981site_server_job = utils.import_site_class(
982 __file__, "autotest_lib.server.site_server_job", "site_server_job",
983 base_server_job)
jadmanski0afbb632008-06-06 21:10:57 +0000984
jadmanski10646442008-08-13 14:05:21 +0000985class server_job(site_server_job, base_server_job):
jadmanski0afbb632008-06-06 21:10:57 +0000986 pass
jadmanskif37df842009-02-11 00:03:26 +0000987
988
989class warning_manager(object):
990 """Class for controlling warning logs. Manages the enabling and disabling
991 of warnings."""
992 def __init__(self):
993 # a map of warning types to a list of disabled time intervals
994 self.disabled_warnings = {}
995
996
997 def is_valid(self, timestamp, warning_type):
998 """Indicates if a warning (based on the time it occured and its type)
999 is a valid warning. A warning is considered "invalid" if this type of
1000 warning was marked as "disabled" at the time the warning occured."""
1001 disabled_intervals = self.disabled_warnings.get(warning_type, [])
1002 for start, end in disabled_intervals:
1003 if timestamp >= start and (end is None or timestamp < end):
1004 return False
1005 return True
1006
1007
1008 def disable_warnings(self, warning_type, current_time_func=time.time):
1009 """As of now, disables all further warnings of this type."""
1010 intervals = self.disabled_warnings.setdefault(warning_type, [])
1011 if not intervals or intervals[-1][1] is not None:
jadmanski16a7ff72009-04-01 18:19:53 +00001012 intervals.append((int(current_time_func()), None))
jadmanskif37df842009-02-11 00:03:26 +00001013
1014
1015 def enable_warnings(self, warning_type, current_time_func=time.time):
1016 """As of now, enables all further warnings of this type."""
1017 intervals = self.disabled_warnings.get(warning_type, [])
1018 if intervals and intervals[-1][1] is None:
jadmanski16a7ff72009-04-01 18:19:53 +00001019 intervals[-1] = (intervals[-1][0], int(current_time_func()))