blob: 13464ec363d3e9530cbd317bbac3d8d0c09a201a [file] [log] [blame]
mbligh57e78662008-06-17 19:53:49 +00001"""
2The main job wrapper for the server side.
3
4This is the core infrastructure. Derived from the client side job.py
5
6Copyright Martin J. Bligh, Andy Whitcroft 2007
7"""
8
jadmanski6bb32d72009-03-19 20:25:24 +00009import getpass, os, sys, re, stat, tempfile, time, select, subprocess
showardb18134f2009-03-20 20:52:18 +000010import traceback, shutil, warnings, fcntl, pickle, logging, logging.config
jadmanskic09fc152008-10-15 17:56:59 +000011from autotest_lib.client.bin import fd_stack, sysinfo
mbligh09108442008-10-15 16:27:38 +000012from autotest_lib.client.common_lib import error, log, utils, packages
jadmanski043e1132008-11-19 17:10:32 +000013from autotest_lib.server import test, subcommand, profilers
jadmanski10646442008-08-13 14:05:21 +000014from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils
jadmanski10646442008-08-13 14:05:21 +000015
16
mbligh084bc172008-10-18 14:02:45 +000017def _control_segment_path(name):
18 """Get the pathname of the named control segment file."""
jadmanski10646442008-08-13 14:05:21 +000019 server_dir = os.path.dirname(os.path.abspath(__file__))
mbligh084bc172008-10-18 14:02:45 +000020 return os.path.join(server_dir, "control_segments", name)
jadmanski10646442008-08-13 14:05:21 +000021
22
mbligh084bc172008-10-18 14:02:45 +000023CLIENT_CONTROL_FILENAME = 'control'
24SERVER_CONTROL_FILENAME = 'control.srv'
25MACHINES_FILENAME = '.machines'
jadmanski10646442008-08-13 14:05:21 +000026
mbligh084bc172008-10-18 14:02:45 +000027CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')
28CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')
29CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')
mbligh084bc172008-10-18 14:02:45 +000030INSTALL_CONTROL_FILE = _control_segment_path('install')
showard45ae8192008-11-05 19:32:53 +000031CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')
jadmanski10646442008-08-13 14:05:21 +000032
mbligh084bc172008-10-18 14:02:45 +000033VERIFY_CONTROL_FILE = _control_segment_path('verify')
mbligh084bc172008-10-18 14:02:45 +000034REPAIR_CONTROL_FILE = _control_segment_path('repair')
jadmanski10646442008-08-13 14:05:21 +000035
36
mbligh062ed152009-01-13 00:57:14 +000037# by default provide a stub that generates no site data
38def _get_site_job_data_dummy(job):
39 return {}
40
41
jadmanski10646442008-08-13 14:05:21 +000042# load up site-specific code for generating site-specific job data
mbligh062ed152009-01-13 00:57:14 +000043get_site_job_data = utils.import_site_function(__file__,
jadmanskic0a623d2009-03-03 21:11:48 +000044 "autotest_lib.server.site_server_job", "get_site_job_data",
mbligh062ed152009-01-13 00:57:14 +000045 _get_site_job_data_dummy)
jadmanski10646442008-08-13 14:05:21 +000046
47
48class base_server_job(object):
mbligh2b92b862008-11-22 13:25:32 +000049 """
50 The actual job against which we do everything.
jadmanski10646442008-08-13 14:05:21 +000051
52 Properties:
53 autodir
54 The top level autotest directory (/usr/local/autotest).
55 serverdir
56 <autodir>/server/
57 clientdir
58 <autodir>/client/
59 conmuxdir
60 <autodir>/conmux/
61 testdir
62 <autodir>/server/tests/
63 site_testdir
64 <autodir>/server/site_tests/
65 control
66 the control file for this job
mblighb5dac432008-11-27 00:38:44 +000067 drop_caches_between_iterations
68 drop the pagecache between each iteration
jadmanski10646442008-08-13 14:05:21 +000069 """
70
71 STATUS_VERSION = 1
jadmanski16a7ff72009-04-01 18:19:53 +000072 WARNING_DISABLE_DELAY = 5
jadmanski10646442008-08-13 14:05:21 +000073
74 def __init__(self, control, args, resultdir, label, user, machines,
75 client=False, parse_job='',
76 ssh_user='root', ssh_port=22, ssh_pass=''):
77 """
mblighb5dac432008-11-27 00:38:44 +000078 Server side job object.
79
80 Parameters:
81 control: The control file (pathname of)
82 args: args to pass to the control file
83 resultdir: where to throw the results
84 label: label for the job
85 user: Username for the job (email address)
86 client: True if a client-side control file
jadmanski10646442008-08-13 14:05:21 +000087 """
88 path = os.path.dirname(__file__)
89 self.autodir = os.path.abspath(os.path.join(path, '..'))
90 self.serverdir = os.path.join(self.autodir, 'server')
91 self.testdir = os.path.join(self.serverdir, 'tests')
92 self.site_testdir = os.path.join(self.serverdir, 'site_tests')
93 self.tmpdir = os.path.join(self.serverdir, 'tmp')
94 self.conmuxdir = os.path.join(self.autodir, 'conmux')
95 self.clientdir = os.path.join(self.autodir, 'client')
96 self.toolsdir = os.path.join(self.autodir, 'client/tools')
97 if control:
jadmanskie432dd22009-01-30 15:04:51 +000098 self.control = self._load_control_file(control)
jadmanski10646442008-08-13 14:05:21 +000099 else:
showard45ae8192008-11-05 19:32:53 +0000100 self.control = ''
jadmanski10646442008-08-13 14:05:21 +0000101 self.resultdir = resultdir
mbligha788dc42009-03-26 21:10:16 +0000102 self.uncollected_log_file = None
mbligh80e1eba2008-11-19 00:26:18 +0000103 if resultdir:
mbligha788dc42009-03-26 21:10:16 +0000104 self.uncollected_log_file = os.path.join(resultdir, "uncollected_logs")
105 self.debugdir = os.path.join(resultdir, 'debug')
106
mbligh80e1eba2008-11-19 00:26:18 +0000107 if not os.path.exists(resultdir):
108 os.mkdir(resultdir)
jadmanskidef0c3c2009-03-25 20:07:10 +0000109 if not os.path.exists(self.uncollected_log_file):
110 log_file = open(self.uncollected_log_file, "w")
111 pickle.dump([], log_file)
112 log_file.close()
mbligh80e1eba2008-11-19 00:26:18 +0000113 if not os.path.exists(self.debugdir):
114 os.mkdir(self.debugdir)
jadmanski779bd292009-03-19 17:33:33 +0000115 status_log = self.get_status_log_path()
116 if os.path.exists(status_log):
117 os.remove(status_log)
jadmanski10646442008-08-13 14:05:21 +0000118 self.label = label
119 self.user = user
120 self.args = args
121 self.machines = machines
122 self.client = client
123 self.record_prefix = ''
124 self.warning_loggers = set()
jadmanskif37df842009-02-11 00:03:26 +0000125 self.warning_manager = warning_manager()
jadmanski10646442008-08-13 14:05:21 +0000126 self.ssh_user = ssh_user
127 self.ssh_port = ssh_port
128 self.ssh_pass = ssh_pass
jadmanski23afbec2008-09-17 18:12:07 +0000129 self.run_test_cleanup = True
mbligh09108442008-10-15 16:27:38 +0000130 self.last_boot_tag = None
jadmanski53aaf382008-11-17 16:22:31 +0000131 self.hosts = set()
mblighb5dac432008-11-27 00:38:44 +0000132 self.drop_caches_between_iterations = False
jadmanski10646442008-08-13 14:05:21 +0000133
134 self.stdout = fd_stack.fd_stack(1, sys.stdout)
135 self.stderr = fd_stack.fd_stack(2, sys.stderr)
136
mbligh80e1eba2008-11-19 00:26:18 +0000137 if resultdir:
138 self.sysinfo = sysinfo.sysinfo(self.resultdir)
jadmanski043e1132008-11-19 17:10:32 +0000139 self.profilers = profilers.profilers(self)
jadmanskic09fc152008-10-15 17:56:59 +0000140
jadmanski025099d2008-09-23 14:13:48 +0000141 if not os.access(self.tmpdir, os.W_OK):
142 try:
143 os.makedirs(self.tmpdir, 0700)
144 except os.error, e:
145 # Thrown if the directory already exists, which it may.
146 pass
147
mbligh2b92b862008-11-22 13:25:32 +0000148 if not (os.access(self.tmpdir, os.W_OK) and os.path.isdir(self.tmpdir)):
jadmanski025099d2008-09-23 14:13:48 +0000149 self.tmpdir = os.path.join(tempfile.gettempdir(),
150 'autotest-' + getpass.getuser())
151 try:
152 os.makedirs(self.tmpdir, 0700)
153 except os.error, e:
154 # Thrown if the directory already exists, which it may.
155 # If the problem was something other than the
156 # directory already existing, this chmod should throw as well
157 # exception.
158 os.chmod(self.tmpdir, stat.S_IRWXU)
159
jadmanski10646442008-08-13 14:05:21 +0000160 job_data = {'label' : label, 'user' : user,
161 'hostname' : ','.join(machines),
showard170873e2009-01-07 00:22:26 +0000162 'status_version' : str(self.STATUS_VERSION),
163 'job_started' : str(int(time.time()))}
mbligh80e1eba2008-11-19 00:26:18 +0000164 if self.resultdir:
165 job_data.update(get_site_job_data(self))
166 utils.write_keyval(self.resultdir, job_data)
jadmanski10646442008-08-13 14:05:21 +0000167
168 self.parse_job = parse_job
169 if self.parse_job and len(machines) == 1:
170 self.using_parser = True
171 self.init_parser(resultdir)
172 else:
173 self.using_parser = False
mbligh2b92b862008-11-22 13:25:32 +0000174 self.pkgmgr = packages.PackageManager(self.autodir,
175 run_function_dargs={'timeout':600})
jadmanski10646442008-08-13 14:05:21 +0000176 self.pkgdir = os.path.join(self.autodir, 'packages')
177
showard21baa452008-10-21 00:08:39 +0000178 self.num_tests_run = 0
179 self.num_tests_failed = 0
180
jadmanski550fdc22008-11-20 16:32:08 +0000181 self._register_subcommand_hooks()
182
183
jadmanskie432dd22009-01-30 15:04:51 +0000184 @staticmethod
185 def _load_control_file(path):
186 f = open(path)
187 try:
188 control_file = f.read()
189 finally:
190 f.close()
191 return re.sub('\r', '', control_file)
192
193
jadmanski550fdc22008-11-20 16:32:08 +0000194 def _register_subcommand_hooks(self):
mbligh2b92b862008-11-22 13:25:32 +0000195 """
196 Register some hooks into the subcommand modules that allow us
197 to properly clean up self.hosts created in forked subprocesses.
198 """
jadmanski550fdc22008-11-20 16:32:08 +0000199 def on_fork(cmd):
200 self._existing_hosts_on_fork = set(self.hosts)
201 def on_join(cmd):
202 new_hosts = self.hosts - self._existing_hosts_on_fork
203 for host in new_hosts:
204 host.close()
205 subcommand.subcommand.register_fork_hook(on_fork)
206 subcommand.subcommand.register_join_hook(on_join)
207
jadmanski10646442008-08-13 14:05:21 +0000208
209 def init_parser(self, resultdir):
mbligh2b92b862008-11-22 13:25:32 +0000210 """
211 Start the continuous parsing of resultdir. This sets up
jadmanski10646442008-08-13 14:05:21 +0000212 the database connection and inserts the basic job object into
mbligh2b92b862008-11-22 13:25:32 +0000213 the database if necessary.
214 """
jadmanski10646442008-08-13 14:05:21 +0000215 # redirect parser debugging to .parse.log
216 parse_log = os.path.join(resultdir, '.parse.log')
217 parse_log = open(parse_log, 'w', 0)
218 tko_utils.redirect_parser_debugging(parse_log)
219 # create a job model object and set up the db
220 self.results_db = tko_db.db(autocommit=True)
221 self.parser = status_lib.parser(self.STATUS_VERSION)
222 self.job_model = self.parser.make_job(resultdir)
223 self.parser.start(self.job_model)
224 # check if a job already exists in the db and insert it if
225 # it does not
226 job_idx = self.results_db.find_job(self.parse_job)
227 if job_idx is None:
mbligh2b92b862008-11-22 13:25:32 +0000228 self.results_db.insert_job(self.parse_job, self.job_model)
jadmanski10646442008-08-13 14:05:21 +0000229 else:
mbligh2b92b862008-11-22 13:25:32 +0000230 machine_idx = self.results_db.lookup_machine(self.job_model.machine)
jadmanski10646442008-08-13 14:05:21 +0000231 self.job_model.index = job_idx
232 self.job_model.machine_idx = machine_idx
233
234
235 def cleanup_parser(self):
mbligh2b92b862008-11-22 13:25:32 +0000236 """
237 This should be called after the server job is finished
jadmanski10646442008-08-13 14:05:21 +0000238 to carry out any remaining cleanup (e.g. flushing any
mbligh2b92b862008-11-22 13:25:32 +0000239 remaining test results to the results db)
240 """
jadmanski10646442008-08-13 14:05:21 +0000241 if not self.using_parser:
242 return
243 final_tests = self.parser.end()
244 for test in final_tests:
245 self.__insert_test(test)
246 self.using_parser = False
247
248
249 def verify(self):
250 if not self.machines:
mbligh084bc172008-10-18 14:02:45 +0000251 raise error.AutoservError('No machines specified to verify')
mbligh0fce4112008-11-27 00:37:17 +0000252 if self.resultdir:
253 os.chdir(self.resultdir)
jadmanski10646442008-08-13 14:05:21 +0000254 try:
jadmanskicdd0c402008-09-19 21:21:31 +0000255 namespace = {'machines' : self.machines, 'job' : self,
256 'ssh_user' : self.ssh_user,
257 'ssh_port' : self.ssh_port,
258 'ssh_pass' : self.ssh_pass}
mbligh084bc172008-10-18 14:02:45 +0000259 self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)
jadmanski10646442008-08-13 14:05:21 +0000260 except Exception, e:
mbligh2b92b862008-11-22 13:25:32 +0000261 msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())
jadmanski10646442008-08-13 14:05:21 +0000262 self.record('ABORT', None, None, msg)
263 raise
264
265
266 def repair(self, host_protection):
267 if not self.machines:
268 raise error.AutoservError('No machines specified to repair')
mbligh0fce4112008-11-27 00:37:17 +0000269 if self.resultdir:
270 os.chdir(self.resultdir)
jadmanski10646442008-08-13 14:05:21 +0000271 namespace = {'machines': self.machines, 'job': self,
272 'ssh_user': self.ssh_user, 'ssh_port': self.ssh_port,
273 'ssh_pass': self.ssh_pass,
274 'protection_level': host_protection}
mbligh25c0b8c2009-01-24 01:44:17 +0000275 # no matter what happens during repair (except if it succeeded in
276 # initiating hardware repair procedure), go on to try to reverify
jadmanski10646442008-08-13 14:05:21 +0000277 try:
mbligh2b92b862008-11-22 13:25:32 +0000278 self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)
mbligh25c0b8c2009-01-24 01:44:17 +0000279 except error.AutoservHardwareRepairRequestedError:
280 raise
jadmanski10646442008-08-13 14:05:21 +0000281 except Exception, exc:
282 print 'Exception occured during repair'
283 traceback.print_exc()
mbligh25c0b8c2009-01-24 01:44:17 +0000284
jadmanski10646442008-08-13 14:05:21 +0000285 self.verify()
286
287
288 def precheck(self):
289 """
290 perform any additional checks in derived classes.
291 """
292 pass
293
294
295 def enable_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000296 """
297 Start or restart external logging mechanism.
jadmanski10646442008-08-13 14:05:21 +0000298 """
299 pass
300
301
302 def disable_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000303 """
304 Pause or stop external logging mechanism.
jadmanski10646442008-08-13 14:05:21 +0000305 """
306 pass
307
308
jadmanski23afbec2008-09-17 18:12:07 +0000309 def enable_test_cleanup(self):
mbligh2b92b862008-11-22 13:25:32 +0000310 """
311 By default tests run test.cleanup
312 """
jadmanski23afbec2008-09-17 18:12:07 +0000313 self.run_test_cleanup = True
314
315
316 def disable_test_cleanup(self):
mbligh2b92b862008-11-22 13:25:32 +0000317 """
318 By default tests do not run test.cleanup
319 """
jadmanski23afbec2008-09-17 18:12:07 +0000320 self.run_test_cleanup = False
321
322
jadmanski10646442008-08-13 14:05:21 +0000323 def use_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000324 """
325 Return True if external logging should be used.
jadmanski10646442008-08-13 14:05:21 +0000326 """
327 return False
328
329
330 def parallel_simple(self, function, machines, log=True, timeout=None):
mbligh2b92b862008-11-22 13:25:32 +0000331 """
332 Run 'function' using parallel_simple, with an extra wrapper to handle
333 the necessary setup for continuous parsing, if possible. If continuous
334 parsing is already properly initialized then this should just work.
335 """
336 is_forking = not (len(machines) == 1 and self.machines == machines)
jadmanski4dd1a002008-09-05 20:27:30 +0000337 if self.parse_job and is_forking and log:
jadmanski10646442008-08-13 14:05:21 +0000338 def wrapper(machine):
339 self.parse_job += "/" + machine
340 self.using_parser = True
341 self.machines = [machine]
mbligh2b92b862008-11-22 13:25:32 +0000342 self.resultdir = os.path.join(self.resultdir, machine)
jadmanski609a5f42008-08-26 20:52:42 +0000343 os.chdir(self.resultdir)
showard2bab8f42008-11-12 18:15:22 +0000344 utils.write_keyval(self.resultdir, {"hostname": machine})
jadmanski10646442008-08-13 14:05:21 +0000345 self.init_parser(self.resultdir)
346 result = function(machine)
347 self.cleanup_parser()
348 return result
jadmanski4dd1a002008-09-05 20:27:30 +0000349 elif len(machines) > 1 and log:
jadmanski10646442008-08-13 14:05:21 +0000350 def wrapper(machine):
351 self.resultdir = os.path.join(self.resultdir, machine)
jadmanski609a5f42008-08-26 20:52:42 +0000352 os.chdir(self.resultdir)
mbligh838d82d2009-03-11 17:14:31 +0000353 machine_data = {'hostname' : machine,
354 'status_version' : str(self.STATUS_VERSION)}
355 utils.write_keyval(self.resultdir, machine_data)
jadmanski10646442008-08-13 14:05:21 +0000356 result = function(machine)
357 return result
358 else:
359 wrapper = function
360 subcommand.parallel_simple(wrapper, machines, log, timeout)
361
362
jadmanskie432dd22009-01-30 15:04:51 +0000363 USE_TEMP_DIR = object()
mbligh2b92b862008-11-22 13:25:32 +0000364 def run(self, cleanup=False, install_before=False, install_after=False,
jadmanskie432dd22009-01-30 15:04:51 +0000365 collect_crashdumps=True, namespace={}, control=None,
jadmanskidef0c3c2009-03-25 20:07:10 +0000366 control_file_dir=None, only_collect_crashinfo=False):
jadmanski10646442008-08-13 14:05:21 +0000367 # use a copy so changes don't affect the original dictionary
368 namespace = namespace.copy()
369 machines = self.machines
jadmanskie432dd22009-01-30 15:04:51 +0000370 if control is None:
371 control = self.control
372 if control_file_dir is None:
373 control_file_dir = self.resultdir
jadmanski10646442008-08-13 14:05:21 +0000374
375 self.aborted = False
376 namespace['machines'] = machines
377 namespace['args'] = self.args
378 namespace['job'] = self
379 namespace['ssh_user'] = self.ssh_user
380 namespace['ssh_port'] = self.ssh_port
381 namespace['ssh_pass'] = self.ssh_pass
382 test_start_time = int(time.time())
383
mbligh80e1eba2008-11-19 00:26:18 +0000384 if self.resultdir:
385 os.chdir(self.resultdir)
jadmanski779bd292009-03-19 17:33:33 +0000386 # touch status.log so that the parser knows a job is running here
387 open(self.get_status_log_path(), 'w').close()
mbligh80e1eba2008-11-19 00:26:18 +0000388 self.enable_external_logging()
jadmanskie432dd22009-01-30 15:04:51 +0000389
jadmanskicdd0c402008-09-19 21:21:31 +0000390 collect_crashinfo = True
mblighaebe3b62008-12-22 14:45:40 +0000391 temp_control_file_dir = None
jadmanski10646442008-08-13 14:05:21 +0000392 try:
393 if install_before and machines:
mbligh084bc172008-10-18 14:02:45 +0000394 self._execute_code(INSTALL_CONTROL_FILE, namespace)
jadmanskie432dd22009-01-30 15:04:51 +0000395
jadmanskidef0c3c2009-03-25 20:07:10 +0000396 if not only_collect_crashinfo:
397 # determine the dir to write the control files to
398 cfd_specified = (control_file_dir
399 and control_file_dir is not self.USE_TEMP_DIR)
400 if cfd_specified:
401 temp_control_file_dir = None
402 else:
403 temp_control_file_dir = tempfile.mkdtemp(
404 suffix='temp_control_file_dir')
405 control_file_dir = temp_control_file_dir
406 server_control_file = os.path.join(control_file_dir,
407 SERVER_CONTROL_FILENAME)
408 client_control_file = os.path.join(control_file_dir,
409 CLIENT_CONTROL_FILENAME)
410 if self.client:
411 namespace['control'] = control
412 utils.open_write_close(client_control_file, control)
413 shutil.copy(CLIENT_WRAPPER_CONTROL_FILE,
414 server_control_file)
415 else:
416 utils.open_write_close(server_control_file, control)
417 self._execute_code(server_control_file, namespace)
jadmanski10646442008-08-13 14:05:21 +0000418
jadmanskidef0c3c2009-03-25 20:07:10 +0000419 # no error occured, so we don't need to collect crashinfo
420 collect_crashinfo = False
jadmanski10646442008-08-13 14:05:21 +0000421 finally:
mblighaebe3b62008-12-22 14:45:40 +0000422 if temp_control_file_dir:
jadmanskie432dd22009-01-30 15:04:51 +0000423 # Clean up temp directory used for copies of the control files
mblighaebe3b62008-12-22 14:45:40 +0000424 try:
425 shutil.rmtree(temp_control_file_dir)
426 except Exception, e:
jadmanskie432dd22009-01-30 15:04:51 +0000427 print 'Error %s removing dir %s' % (e,
428 temp_control_file_dir)
429
jadmanskicdd0c402008-09-19 21:21:31 +0000430 if machines and (collect_crashdumps or collect_crashinfo):
jadmanski10646442008-08-13 14:05:21 +0000431 namespace['test_start_time'] = test_start_time
jadmanskicdd0c402008-09-19 21:21:31 +0000432 if collect_crashinfo:
mbligh084bc172008-10-18 14:02:45 +0000433 # includes crashdumps
434 self._execute_code(CRASHINFO_CONTROL_FILE, namespace)
jadmanskicdd0c402008-09-19 21:21:31 +0000435 else:
mbligh084bc172008-10-18 14:02:45 +0000436 self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)
mbligha788dc42009-03-26 21:10:16 +0000437 if self.uncollected_log_file:
438 os.remove(self.uncollected_log_file)
jadmanski10646442008-08-13 14:05:21 +0000439 self.disable_external_logging()
showard45ae8192008-11-05 19:32:53 +0000440 if cleanup and machines:
441 self._execute_code(CLEANUP_CONTROL_FILE, namespace)
jadmanski10646442008-08-13 14:05:21 +0000442 if install_after and machines:
mbligh084bc172008-10-18 14:02:45 +0000443 self._execute_code(INSTALL_CONTROL_FILE, namespace)
jadmanski10646442008-08-13 14:05:21 +0000444
445
446 def run_test(self, url, *args, **dargs):
mbligh2b92b862008-11-22 13:25:32 +0000447 """
448 Summon a test object and run it.
jadmanski10646442008-08-13 14:05:21 +0000449
450 tag
451 tag to add to testname
452 url
453 url of the test to run
454 """
455
456 (group, testname) = self.pkgmgr.get_package_name(url, 'test')
jadmanski10646442008-08-13 14:05:21 +0000457
458 tag = dargs.pop('tag', None)
459 if tag:
mbligh8ad24202009-01-07 16:49:36 +0000460 testname += '.' + str(tag)
jadmanskide292df2008-08-26 20:51:14 +0000461 subdir = testname
jadmanski10646442008-08-13 14:05:21 +0000462
463 outputdir = os.path.join(self.resultdir, subdir)
464 if os.path.exists(outputdir):
465 msg = ("%s already exists, test <%s> may have"
mbligh2b92b862008-11-22 13:25:32 +0000466 " already run with tag <%s>" % (outputdir, testname, tag))
jadmanski10646442008-08-13 14:05:21 +0000467 raise error.TestError(msg)
468 os.mkdir(outputdir)
469
470 def group_func():
471 try:
472 test.runtest(self, url, tag, args, dargs)
473 except error.TestBaseException, e:
474 self.record(e.exit_status, subdir, testname, str(e))
475 raise
476 except Exception, e:
477 info = str(e) + "\n" + traceback.format_exc()
478 self.record('FAIL', subdir, testname, info)
479 raise
480 else:
mbligh2b92b862008-11-22 13:25:32 +0000481 self.record('GOOD', subdir, testname, 'completed successfully')
jadmanskide292df2008-08-26 20:51:14 +0000482
483 result, exc_info = self._run_group(testname, subdir, group_func)
484 if exc_info and isinstance(exc_info[1], error.TestBaseException):
485 return False
486 elif exc_info:
487 raise exc_info[0], exc_info[1], exc_info[2]
488 else:
489 return True
jadmanski10646442008-08-13 14:05:21 +0000490
491
492 def _run_group(self, name, subdir, function, *args, **dargs):
493 """\
494 Underlying method for running something inside of a group.
495 """
jadmanskide292df2008-08-26 20:51:14 +0000496 result, exc_info = None, None
jadmanski10646442008-08-13 14:05:21 +0000497 old_record_prefix = self.record_prefix
498 try:
499 self.record('START', subdir, name)
500 self.record_prefix += '\t'
501 try:
502 result = function(*args, **dargs)
503 finally:
504 self.record_prefix = old_record_prefix
505 except error.TestBaseException, e:
jadmanskib88d6dc2009-01-10 00:33:18 +0000506 self.record("END %s" % e.exit_status, subdir, name)
jadmanskide292df2008-08-26 20:51:14 +0000507 exc_info = sys.exc_info()
jadmanski10646442008-08-13 14:05:21 +0000508 except Exception, e:
509 err_msg = str(e) + '\n'
510 err_msg += traceback.format_exc()
511 self.record('END ABORT', subdir, name, err_msg)
512 raise error.JobError(name + ' failed\n' + traceback.format_exc())
513 else:
514 self.record('END GOOD', subdir, name)
515
jadmanskide292df2008-08-26 20:51:14 +0000516 return result, exc_info
jadmanski10646442008-08-13 14:05:21 +0000517
518
519 def run_group(self, function, *args, **dargs):
520 """\
521 function:
522 subroutine to run
523 *args:
524 arguments for the function
525 """
526
527 name = function.__name__
528
529 # Allow the tag for the group to be specified.
530 tag = dargs.pop('tag', None)
531 if tag:
532 name = tag
533
jadmanskide292df2008-08-26 20:51:14 +0000534 return self._run_group(name, None, function, *args, **dargs)[0]
jadmanski10646442008-08-13 14:05:21 +0000535
536
537 def run_reboot(self, reboot_func, get_kernel_func):
538 """\
539 A specialization of run_group meant specifically for handling
540 a reboot. Includes support for capturing the kernel version
541 after the reboot.
542
543 reboot_func: a function that carries out the reboot
544
545 get_kernel_func: a function that returns a string
546 representing the kernel version.
547 """
548
549 old_record_prefix = self.record_prefix
550 try:
551 self.record('START', None, 'reboot')
552 self.record_prefix += '\t'
553 reboot_func()
554 except Exception, e:
555 self.record_prefix = old_record_prefix
556 err_msg = str(e) + '\n' + traceback.format_exc()
557 self.record('END FAIL', None, 'reboot', err_msg)
558 else:
559 kernel = get_kernel_func()
560 self.record_prefix = old_record_prefix
561 self.record('END GOOD', None, 'reboot',
562 optional_fields={"kernel": kernel})
563
564
jadmanskie432dd22009-01-30 15:04:51 +0000565 def run_control(self, path):
566 """Execute a control file found at path (relative to the autotest
567 path). Intended for executing a control file within a control file,
568 not for running the top-level job control file."""
569 path = os.path.join(self.autodir, path)
570 control_file = self._load_control_file(path)
571 self.run(control=control_file, control_file_dir=self.USE_TEMP_DIR)
572
573
jadmanskic09fc152008-10-15 17:56:59 +0000574 def add_sysinfo_command(self, command, logfile=None, on_every_test=False):
mbligh4395bbd2009-03-25 19:34:17 +0000575 self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),
jadmanskic09fc152008-10-15 17:56:59 +0000576 on_every_test)
577
578
579 def add_sysinfo_logfile(self, file, on_every_test=False):
580 self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)
581
582
583 def _add_sysinfo_loggable(self, loggable, on_every_test):
584 if on_every_test:
585 self.sysinfo.test_loggables.add(loggable)
586 else:
587 self.sysinfo.boot_loggables.add(loggable)
588
589
jadmanski10646442008-08-13 14:05:21 +0000590 def record(self, status_code, subdir, operation, status='',
591 optional_fields=None):
592 """
593 Record job-level status
594
595 The intent is to make this file both machine parseable and
596 human readable. That involves a little more complexity, but
597 really isn't all that bad ;-)
598
599 Format is <status code>\t<subdir>\t<operation>\t<status>
600
mbligh1b3b3762008-09-25 02:46:34 +0000601 status code: see common_lib.log.is_valid_status()
jadmanski10646442008-08-13 14:05:21 +0000602 for valid status definition
603
604 subdir: MUST be a relevant subdirectory in the results,
605 or None, which will be represented as '----'
606
607 operation: description of what you ran (e.g. "dbench", or
608 "mkfs -t foobar /dev/sda9")
609
610 status: error message or "completed sucessfully"
611
612 ------------------------------------------------------------
613
614 Initial tabs indicate indent levels for grouping, and is
615 governed by self.record_prefix
616
617 multiline messages have secondary lines prefaced by a double
618 space (' ')
619
620 Executing this method will trigger the logging of all new
621 warnings to date from the various console loggers.
622 """
623 # poll all our warning loggers for new warnings
624 warnings = self._read_warnings()
jadmanski2de83112009-04-01 18:21:04 +0000625 old_record_prefix = self.record_prefix
626 try:
627 if status_code.startswith("END "):
628 self.record_prefix += "\t"
629 for timestamp, msg in warnings:
630 self._record("WARN", None, None, msg, timestamp)
631 finally:
632 self.record_prefix = old_record_prefix
jadmanski10646442008-08-13 14:05:21 +0000633
634 # write out the actual status log line
635 self._record(status_code, subdir, operation, status,
636 optional_fields=optional_fields)
637
638
639 def _read_warnings(self):
jadmanskif37df842009-02-11 00:03:26 +0000640 """Poll all the warning loggers and extract any new warnings that have
641 been logged. If the warnings belong to a category that is currently
642 disabled, this method will discard them and they will no longer be
643 retrievable.
644
645 Returns a list of (timestamp, message) tuples, where timestamp is an
646 integer epoch timestamp."""
jadmanski10646442008-08-13 14:05:21 +0000647 warnings = []
648 while True:
649 # pull in a line of output from every logger that has
650 # output ready to be read
mbligh2b92b862008-11-22 13:25:32 +0000651 loggers, _, _ = select.select(self.warning_loggers, [], [], 0)
jadmanski10646442008-08-13 14:05:21 +0000652 closed_loggers = set()
653 for logger in loggers:
654 line = logger.readline()
655 # record any broken pipes (aka line == empty)
656 if len(line) == 0:
657 closed_loggers.add(logger)
658 continue
jadmanskif37df842009-02-11 00:03:26 +0000659 # parse out the warning
660 timestamp, msgtype, msg = line.split('\t', 2)
661 timestamp = int(timestamp)
662 # if the warning is valid, add it to the results
663 if self.warning_manager.is_valid(timestamp, msgtype):
664 warnings.append((timestamp, msg.strip()))
jadmanski10646442008-08-13 14:05:21 +0000665
666 # stop listening to loggers that are closed
667 self.warning_loggers -= closed_loggers
668
669 # stop if none of the loggers have any output left
670 if not loggers:
671 break
672
673 # sort into timestamp order
674 warnings.sort()
675 return warnings
676
677
jadmanski16a7ff72009-04-01 18:19:53 +0000678 def disable_warnings(self, warning_type):
jadmanskif37df842009-02-11 00:03:26 +0000679 self.warning_manager.disable_warnings(warning_type)
jadmanski16a7ff72009-04-01 18:19:53 +0000680 self.record("INFO", None, None,
681 "disabling %s warnings" % warning_type,
682 {"warnings.disable": warning_type})
683 time.sleep(self.WARNING_DISABLE_DELAY)
jadmanskif37df842009-02-11 00:03:26 +0000684
685
jadmanski16a7ff72009-04-01 18:19:53 +0000686 def enable_warnings(self, warning_type):
687 time.sleep(self.WARNING_DISABLE_DELAY)
jadmanskif37df842009-02-11 00:03:26 +0000688 self.warning_manager.enable_warnings(warning_type)
jadmanski16a7ff72009-04-01 18:19:53 +0000689 self.record("INFO", None, None,
690 "enabling %s warnings" % warning_type,
691 {"warnings.enable": warning_type})
jadmanskif37df842009-02-11 00:03:26 +0000692
693
jadmanski779bd292009-03-19 17:33:33 +0000694 def get_status_log_path(self, subdir=None):
695 """Return the path to the job status log.
696
697 @param subdir - Optional paramter indicating that you want the path
698 to a subdirectory status log.
699
700 @returns The path where the status log should be.
701 """
mbligh210bae62009-04-01 18:33:13 +0000702 if self.resultdir:
703 if subdir:
704 return os.path.join(self.resultdir, subdir, "status.log")
705 else:
706 return os.path.join(self.resultdir, "status.log")
jadmanski779bd292009-03-19 17:33:33 +0000707 else:
mbligh210bae62009-04-01 18:33:13 +0000708 return None
jadmanski779bd292009-03-19 17:33:33 +0000709
710
jadmanski6bb32d72009-03-19 20:25:24 +0000711 def _update_uncollected_logs_list(self, update_func):
712 """Updates the uncollected logs list in a multi-process safe manner.
713
714 @param update_func - a function that updates the list of uncollected
715 logs. Should take one parameter, the list to be updated.
716 """
mbligha788dc42009-03-26 21:10:16 +0000717 if self.uncollected_log_file:
718 log_file = open(self.uncollected_log_file, "r+")
719 fcntl.flock(log_file, fcntl.LOCK_EX)
jadmanski6bb32d72009-03-19 20:25:24 +0000720 try:
721 uncollected_logs = pickle.load(log_file)
722 update_func(uncollected_logs)
723 log_file.seek(0)
724 log_file.truncate()
725 pickle.dump(uncollected_logs, log_file)
726 finally:
727 fcntl.flock(log_file, fcntl.LOCK_UN)
728 log_file.close()
729
730
731 def add_client_log(self, hostname, remote_path, local_path):
732 """Adds a new set of client logs to the list of uncollected logs,
733 to allow for future log recovery.
734
735 @param host - the hostname of the machine holding the logs
736 @param remote_path - the directory on the remote machine holding logs
737 @param local_path - the local directory to copy the logs into
738 """
739 def update_func(logs_list):
740 logs_list.append((hostname, remote_path, local_path))
741 self._update_uncollected_logs_list(update_func)
742
743
744 def remove_client_log(self, hostname, remote_path, local_path):
745 """Removes a set of client logs from the list of uncollected logs,
746 to allow for future log recovery.
747
748 @param host - the hostname of the machine holding the logs
749 @param remote_path - the directory on the remote machine holding logs
750 @param local_path - the local directory to copy the logs into
751 """
752 def update_func(logs_list):
753 logs_list.remove((hostname, remote_path, local_path))
754 self._update_uncollected_logs_list(update_func)
755
756
jadmanski10646442008-08-13 14:05:21 +0000757 def _render_record(self, status_code, subdir, operation, status='',
758 epoch_time=None, record_prefix=None,
759 optional_fields=None):
760 """
761 Internal Function to generate a record to be written into a
762 status log. For use by server_job.* classes only.
763 """
764 if subdir:
765 if re.match(r'[\n\t]', subdir):
mbligh2b92b862008-11-22 13:25:32 +0000766 raise ValueError('Invalid character in subdir string')
jadmanski10646442008-08-13 14:05:21 +0000767 substr = subdir
768 else:
769 substr = '----'
770
mbligh1b3b3762008-09-25 02:46:34 +0000771 if not log.is_valid_status(status_code):
mbligh2b92b862008-11-22 13:25:32 +0000772 raise ValueError('Invalid status code supplied: %s' % status_code)
jadmanski10646442008-08-13 14:05:21 +0000773 if not operation:
774 operation = '----'
775 if re.match(r'[\n\t]', operation):
mbligh2b92b862008-11-22 13:25:32 +0000776 raise ValueError('Invalid character in operation string')
jadmanski10646442008-08-13 14:05:21 +0000777 operation = operation.rstrip()
778 status = status.rstrip()
779 status = re.sub(r"\t", " ", status)
780 # Ensure any continuation lines are marked so we can
781 # detect them in the status file to ensure it is parsable.
782 status = re.sub(r"\n", "\n" + self.record_prefix + " ", status)
783
784 if not optional_fields:
785 optional_fields = {}
786
787 # Generate timestamps for inclusion in the logs
788 if epoch_time is None:
789 epoch_time = int(time.time())
790 local_time = time.localtime(epoch_time)
791 optional_fields["timestamp"] = str(epoch_time)
792 optional_fields["localtime"] = time.strftime("%b %d %H:%M:%S",
793 local_time)
794
795 fields = [status_code, substr, operation]
796 fields += ["%s=%s" % x for x in optional_fields.iteritems()]
797 fields.append(status)
798
799 if record_prefix is None:
800 record_prefix = self.record_prefix
801
802 msg = '\t'.join(str(x) for x in fields)
jadmanski10646442008-08-13 14:05:21 +0000803 return record_prefix + msg + '\n'
804
805
806 def _record_prerendered(self, msg):
807 """
808 Record a pre-rendered msg into the status logs. The only
809 change this makes to the message is to add on the local
810 indentation. Should not be called outside of server_job.*
811 classes. Unlike _record, this does not write the message
812 to standard output.
813 """
814 lines = []
jadmanski779bd292009-03-19 17:33:33 +0000815 status_file = self.get_status_log_path()
jadmanski10646442008-08-13 14:05:21 +0000816 status_log = open(status_file, 'a')
817 for line in msg.splitlines():
818 line = self.record_prefix + line + '\n'
819 lines.append(line)
820 status_log.write(line)
821 status_log.close()
822 self.__parse_status(lines)
823
824
mbligh084bc172008-10-18 14:02:45 +0000825 def _fill_server_control_namespace(self, namespace, protect=True):
mbligh2b92b862008-11-22 13:25:32 +0000826 """
827 Prepare a namespace to be used when executing server control files.
mbligh084bc172008-10-18 14:02:45 +0000828
829 This sets up the control file API by importing modules and making them
830 available under the appropriate names within namespace.
831
832 For use by _execute_code().
833
834 Args:
835 namespace: The namespace dictionary to fill in.
836 protect: Boolean. If True (the default) any operation that would
837 clobber an existing entry in namespace will cause an error.
838 Raises:
839 error.AutoservError: When a name would be clobbered by import.
840 """
841 def _import_names(module_name, names=()):
mbligh2b92b862008-11-22 13:25:32 +0000842 """
843 Import a module and assign named attributes into namespace.
mbligh084bc172008-10-18 14:02:45 +0000844
845 Args:
846 module_name: The string module name.
847 names: A limiting list of names to import from module_name. If
848 empty (the default), all names are imported from the module
849 similar to a "from foo.bar import *" statement.
850 Raises:
851 error.AutoservError: When a name being imported would clobber
852 a name already in namespace.
853 """
854 module = __import__(module_name, {}, {}, names)
855
856 # No names supplied? Import * from the lowest level module.
857 # (Ugh, why do I have to implement this part myself?)
858 if not names:
859 for submodule_name in module_name.split('.')[1:]:
860 module = getattr(module, submodule_name)
861 if hasattr(module, '__all__'):
862 names = getattr(module, '__all__')
863 else:
864 names = dir(module)
865
866 # Install each name into namespace, checking to make sure it
867 # doesn't override anything that already exists.
868 for name in names:
869 # Check for conflicts to help prevent future problems.
870 if name in namespace and protect:
871 if namespace[name] is not getattr(module, name):
872 raise error.AutoservError('importing name '
873 '%s from %s %r would override %r' %
874 (name, module_name, getattr(module, name),
875 namespace[name]))
876 else:
877 # Encourage cleanliness and the use of __all__ for a
878 # more concrete API with less surprises on '*' imports.
879 warnings.warn('%s (%r) being imported from %s for use '
880 'in server control files is not the '
881 'first occurrance of that import.' %
882 (name, namespace[name], module_name))
883
884 namespace[name] = getattr(module, name)
885
886
887 # This is the equivalent of prepending a bunch of import statements to
888 # the front of the control script.
889 namespace.update(os=os, sys=sys)
890 _import_names('autotest_lib.server',
891 ('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',
892 'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))
893 _import_names('autotest_lib.server.subcommand',
894 ('parallel', 'parallel_simple', 'subcommand'))
895 _import_names('autotest_lib.server.utils',
896 ('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))
897 _import_names('autotest_lib.client.common_lib.error')
898 _import_names('autotest_lib.client.common_lib.barrier', ('barrier',))
899
900 # Inject ourself as the job object into other classes within the API.
901 # (Yuck, this injection is a gross thing be part of a public API. -gps)
902 #
903 # XXX Base & SiteAutotest do not appear to use .job. Who does?
904 namespace['autotest'].Autotest.job = self
905 # server.hosts.base_classes.Host uses .job.
906 namespace['hosts'].Host.job = self
907
908
909 def _execute_code(self, code_file, namespace, protect=True):
mbligh2b92b862008-11-22 13:25:32 +0000910 """
911 Execute code using a copy of namespace as a server control script.
mbligh084bc172008-10-18 14:02:45 +0000912
913 Unless protect_namespace is explicitly set to False, the dict will not
914 be modified.
915
916 Args:
917 code_file: The filename of the control file to execute.
918 namespace: A dict containing names to make available during execution.
919 protect: Boolean. If True (the default) a copy of the namespace dict
920 is used during execution to prevent the code from modifying its
921 contents outside of this function. If False the raw dict is
922 passed in and modifications will be allowed.
923 """
924 if protect:
925 namespace = namespace.copy()
926 self._fill_server_control_namespace(namespace, protect=protect)
927 # TODO: Simplify and get rid of the special cases for only 1 machine.
showard3e66e8c2008-10-27 19:20:51 +0000928 if len(self.machines) > 1:
mbligh084bc172008-10-18 14:02:45 +0000929 machines_text = '\n'.join(self.machines) + '\n'
930 # Only rewrite the file if it does not match our machine list.
931 try:
932 machines_f = open(MACHINES_FILENAME, 'r')
933 existing_machines_text = machines_f.read()
934 machines_f.close()
935 except EnvironmentError:
936 existing_machines_text = None
937 if machines_text != existing_machines_text:
938 utils.open_write_close(MACHINES_FILENAME, machines_text)
939 execfile(code_file, namespace, namespace)
jadmanski10646442008-08-13 14:05:21 +0000940
941
942 def _record(self, status_code, subdir, operation, status='',
943 epoch_time=None, optional_fields=None):
944 """
945 Actual function for recording a single line into the status
946 logs. Should never be called directly, only by job.record as
947 this would bypass the console monitor logging.
948 """
949
mbligh2b92b862008-11-22 13:25:32 +0000950 msg = self._render_record(status_code, subdir, operation, status,
951 epoch_time, optional_fields=optional_fields)
jadmanski10646442008-08-13 14:05:21 +0000952
jadmanski779bd292009-03-19 17:33:33 +0000953 status_file = self.get_status_log_path()
jadmanski10646442008-08-13 14:05:21 +0000954 sys.stdout.write(msg)
mbligh210bae62009-04-01 18:33:13 +0000955 if status_file:
956 open(status_file, "a").write(msg)
jadmanski10646442008-08-13 14:05:21 +0000957 if subdir:
jadmanski779bd292009-03-19 17:33:33 +0000958 sub_status_file = self.get_status_log_path(subdir)
959 open(sub_status_file, "a").write(msg)
jadmanski10646442008-08-13 14:05:21 +0000960 self.__parse_status(msg.splitlines())
961
962
963 def __parse_status(self, new_lines):
964 if not self.using_parser:
965 return
966 new_tests = self.parser.process_lines(new_lines)
967 for test in new_tests:
968 self.__insert_test(test)
969
970
971 def __insert_test(self, test):
mbligh2b92b862008-11-22 13:25:32 +0000972 """
973 An internal method to insert a new test result into the
jadmanski10646442008-08-13 14:05:21 +0000974 database. This method will not raise an exception, even if an
975 error occurs during the insert, to avoid failing a test
976 simply because of unexpected database issues."""
showard21baa452008-10-21 00:08:39 +0000977 self.num_tests_run += 1
978 if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):
979 self.num_tests_failed += 1
jadmanski10646442008-08-13 14:05:21 +0000980 try:
981 self.results_db.insert_test(self.job_model, test)
982 except Exception:
983 msg = ("WARNING: An unexpected error occured while "
984 "inserting test results into the database. "
985 "Ignoring error.\n" + traceback.format_exc())
986 print >> sys.stderr, msg
987
mblighcaa62c22008-04-07 21:51:17 +0000988
mbligha7007722009-01-13 00:37:11 +0000989site_server_job = utils.import_site_class(
990 __file__, "autotest_lib.server.site_server_job", "site_server_job",
991 base_server_job)
jadmanski0afbb632008-06-06 21:10:57 +0000992
jadmanski10646442008-08-13 14:05:21 +0000993class server_job(site_server_job, base_server_job):
jadmanski0afbb632008-06-06 21:10:57 +0000994 pass
jadmanskif37df842009-02-11 00:03:26 +0000995
996
997class warning_manager(object):
998 """Class for controlling warning logs. Manages the enabling and disabling
999 of warnings."""
1000 def __init__(self):
1001 # a map of warning types to a list of disabled time intervals
1002 self.disabled_warnings = {}
1003
1004
1005 def is_valid(self, timestamp, warning_type):
1006 """Indicates if a warning (based on the time it occured and its type)
1007 is a valid warning. A warning is considered "invalid" if this type of
1008 warning was marked as "disabled" at the time the warning occured."""
1009 disabled_intervals = self.disabled_warnings.get(warning_type, [])
1010 for start, end in disabled_intervals:
1011 if timestamp >= start and (end is None or timestamp < end):
1012 return False
1013 return True
1014
1015
1016 def disable_warnings(self, warning_type, current_time_func=time.time):
1017 """As of now, disables all further warnings of this type."""
1018 intervals = self.disabled_warnings.setdefault(warning_type, [])
1019 if not intervals or intervals[-1][1] is not None:
jadmanski16a7ff72009-04-01 18:19:53 +00001020 intervals.append((int(current_time_func()), None))
jadmanskif37df842009-02-11 00:03:26 +00001021
1022
1023 def enable_warnings(self, warning_type, current_time_func=time.time):
1024 """As of now, enables all further warnings of this type."""
1025 intervals = self.disabled_warnings.get(warning_type, [])
1026 if intervals and intervals[-1][1] is None:
jadmanski16a7ff72009-04-01 18:19:53 +00001027 intervals[-1] = (intervals[-1][0], int(current_time_func()))