blob: ba5f0a2df006b87f81c4a7ec9abd954db7bc412d [file] [log] [blame]
Paul Pendlebury7c1fdcf2011-05-04 12:39:15 -07001# Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
mbligh57e78662008-06-17 19:53:49 +00004"""
5The main job wrapper for the server side.
6
7This is the core infrastructure. Derived from the client side job.py
8
9Copyright Martin J. Bligh, Andy Whitcroft 2007
10"""
11
Eric Li861b2d52011-02-04 14:50:35 -080012import getpass, os, sys, re, stat, tempfile, time, select, subprocess, platform
Paul Pendleburye4afc772011-04-26 11:20:15 -070013import multiprocessing
mblighfc3da5b2010-01-06 18:37:22 +000014import traceback, shutil, warnings, fcntl, pickle, logging, itertools, errno
showard75cdfee2009-06-10 17:40:41 +000015from autotest_lib.client.bin import sysinfo
mbligh0d0f67d2009-11-06 03:15:03 +000016from autotest_lib.client.common_lib import base_job
mbligh09108442008-10-15 16:27:38 +000017from autotest_lib.client.common_lib import error, log, utils, packages
showard75cdfee2009-06-10 17:40:41 +000018from autotest_lib.client.common_lib import logging_manager
Paul Pendleburyf807c182011-04-05 11:24:34 -070019from autotest_lib.server import test, subcommand, profilers, server_job_utils
Paul Pendlebury7c1fdcf2011-05-04 12:39:15 -070020from autotest_lib.server import gtest_runner
mbligh0a883702010-04-21 01:58:34 +000021from autotest_lib.server.hosts import abstract_ssh
jadmanski10646442008-08-13 14:05:21 +000022from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils
jadmanski10646442008-08-13 14:05:21 +000023
24
mbligh084bc172008-10-18 14:02:45 +000025def _control_segment_path(name):
26 """Get the pathname of the named control segment file."""
jadmanski10646442008-08-13 14:05:21 +000027 server_dir = os.path.dirname(os.path.abspath(__file__))
mbligh084bc172008-10-18 14:02:45 +000028 return os.path.join(server_dir, "control_segments", name)
jadmanski10646442008-08-13 14:05:21 +000029
30
mbligh084bc172008-10-18 14:02:45 +000031CLIENT_CONTROL_FILENAME = 'control'
32SERVER_CONTROL_FILENAME = 'control.srv'
33MACHINES_FILENAME = '.machines'
jadmanski10646442008-08-13 14:05:21 +000034
mbligh084bc172008-10-18 14:02:45 +000035CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')
36CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')
37CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')
mbligh084bc172008-10-18 14:02:45 +000038INSTALL_CONTROL_FILE = _control_segment_path('install')
showard45ae8192008-11-05 19:32:53 +000039CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')
jadmanski10646442008-08-13 14:05:21 +000040
mbligh084bc172008-10-18 14:02:45 +000041VERIFY_CONTROL_FILE = _control_segment_path('verify')
mbligh084bc172008-10-18 14:02:45 +000042REPAIR_CONTROL_FILE = _control_segment_path('repair')
jadmanski10646442008-08-13 14:05:21 +000043
44
mbligh062ed152009-01-13 00:57:14 +000045# by default provide a stub that generates no site data
46def _get_site_job_data_dummy(job):
47 return {}
48
49
jadmanski10646442008-08-13 14:05:21 +000050# load up site-specific code for generating site-specific job data
mbligh062ed152009-01-13 00:57:14 +000051get_site_job_data = utils.import_site_function(__file__,
jadmanskic0a623d2009-03-03 21:11:48 +000052 "autotest_lib.server.site_server_job", "get_site_job_data",
mbligh062ed152009-01-13 00:57:14 +000053 _get_site_job_data_dummy)
jadmanski10646442008-08-13 14:05:21 +000054
55
jadmanski2a89dac2010-06-11 14:32:58 +000056class status_indenter(base_job.status_indenter):
57 """Provide a simple integer-backed status indenter."""
58 def __init__(self):
59 self._indent = 0
60
61
62 @property
63 def indent(self):
64 return self._indent
65
66
67 def increment(self):
68 self._indent += 1
69
70
71 def decrement(self):
72 self._indent -= 1
73
74
jadmanski52053632010-06-11 21:08:10 +000075 def get_context(self):
76 """Returns a context object for use by job.get_record_context."""
77 class context(object):
78 def __init__(self, indenter, indent):
79 self._indenter = indenter
80 self._indent = indent
81 def restore(self):
82 self._indenter._indent = self._indent
83 return context(self, self._indent)
84
85
jadmanski2a89dac2010-06-11 14:32:58 +000086class server_job_record_hook(object):
87 """The job.record hook for server job. Used to inject WARN messages from
88 the console or vlm whenever new logs are written, and to echo any logs
89 to INFO level logging. Implemented as a class so that it can use state to
90 block recursive calls, so that the hook can call job.record itself to
91 log WARN messages.
92
93 Depends on job._read_warnings and job._logger.
94 """
95 def __init__(self, job):
96 self._job = job
97 self._being_called = False
98
99
100 def __call__(self, entry):
101 """A wrapper around the 'real' record hook, the _hook method, which
102 prevents recursion. This isn't making any effort to be threadsafe,
103 the intent is to outright block infinite recursion via a
104 job.record->_hook->job.record->_hook->job.record... chain."""
105 if self._being_called:
106 return
107 self._being_called = True
108 try:
109 self._hook(self._job, entry)
110 finally:
111 self._being_called = False
112
113
114 @staticmethod
115 def _hook(job, entry):
116 """The core hook, which can safely call job.record."""
117 entries = []
118 # poll all our warning loggers for new warnings
119 for timestamp, msg in job._read_warnings():
120 warning_entry = base_job.status_log_entry(
121 'WARN', None, None, msg, {}, timestamp=timestamp)
122 entries.append(warning_entry)
123 job.record_entry(warning_entry)
124 # echo rendered versions of all the status logs to info
125 entries.append(entry)
126 for entry in entries:
127 rendered_entry = job._logger.render_entry(entry)
128 logging.info(rendered_entry)
jadmanskie29d0e42010-06-17 16:06:52 +0000129 job._parse_status(rendered_entry)
jadmanski2a89dac2010-06-11 14:32:58 +0000130
131
mbligh0d0f67d2009-11-06 03:15:03 +0000132class base_server_job(base_job.base_job):
133 """The server-side concrete implementation of base_job.
jadmanski10646442008-08-13 14:05:21 +0000134
mbligh0d0f67d2009-11-06 03:15:03 +0000135 Optional properties provided by this implementation:
136 serverdir
137 conmuxdir
138
139 num_tests_run
140 num_tests_failed
141
142 warning_manager
143 warning_loggers
jadmanski10646442008-08-13 14:05:21 +0000144 """
145
mbligh0d0f67d2009-11-06 03:15:03 +0000146 _STATUS_VERSION = 1
jadmanski10646442008-08-13 14:05:21 +0000147
148 def __init__(self, control, args, resultdir, label, user, machines,
149 client=False, parse_job='',
mbligh374f3412009-05-13 21:29:45 +0000150 ssh_user='root', ssh_port=22, ssh_pass='',
mblighe0cbc912010-03-11 18:03:07 +0000151 group_name='', tag='',
152 control_filename=SERVER_CONTROL_FILENAME):
jadmanski10646442008-08-13 14:05:21 +0000153 """
mbligh374f3412009-05-13 21:29:45 +0000154 Create a server side job object.
mblighb5dac432008-11-27 00:38:44 +0000155
mblighe7d9c602009-07-02 19:02:33 +0000156 @param control: The pathname of the control file.
157 @param args: Passed to the control file.
158 @param resultdir: Where to throw the results.
159 @param label: Description of the job.
160 @param user: Username for the job (email address).
161 @param client: True if this is a client-side control file.
162 @param parse_job: string, if supplied it is the job execution tag that
163 the results will be passed through to the TKO parser with.
164 @param ssh_user: The SSH username. [root]
165 @param ssh_port: The SSH port number. [22]
166 @param ssh_pass: The SSH passphrase, if needed.
167 @param group_name: If supplied, this will be written out as
mbligh374f3412009-05-13 21:29:45 +0000168 host_group_name in the keyvals file for the parser.
mblighe7d9c602009-07-02 19:02:33 +0000169 @param tag: The job execution tag from the scheduler. [optional]
mblighe0cbc912010-03-11 18:03:07 +0000170 @param control_filename: The filename where the server control file
171 should be written in the results directory.
jadmanski10646442008-08-13 14:05:21 +0000172 """
mbligh0d0f67d2009-11-06 03:15:03 +0000173 super(base_server_job, self).__init__(resultdir=resultdir)
mbligha788dc42009-03-26 21:10:16 +0000174
mbligh0d0f67d2009-11-06 03:15:03 +0000175 path = os.path.dirname(__file__)
176 self.control = control
177 self._uncollected_log_file = os.path.join(self.resultdir,
178 'uncollected_logs')
179 debugdir = os.path.join(self.resultdir, 'debug')
180 if not os.path.exists(debugdir):
181 os.mkdir(debugdir)
182
183 if user:
184 self.user = user
185 else:
186 self.user = getpass.getuser()
187
jadmanski808f4b12010-04-09 22:30:31 +0000188 self.args = args
jadmanski10646442008-08-13 14:05:21 +0000189 self.machines = machines
mbligh0d0f67d2009-11-06 03:15:03 +0000190 self._client = client
jadmanski10646442008-08-13 14:05:21 +0000191 self.warning_loggers = set()
jadmanskif37df842009-02-11 00:03:26 +0000192 self.warning_manager = warning_manager()
mbligh0d0f67d2009-11-06 03:15:03 +0000193 self._ssh_user = ssh_user
194 self._ssh_port = ssh_port
195 self._ssh_pass = ssh_pass
mblighe7d9c602009-07-02 19:02:33 +0000196 self.tag = tag
mbligh09108442008-10-15 16:27:38 +0000197 self.last_boot_tag = None
jadmanski53aaf382008-11-17 16:22:31 +0000198 self.hosts = set()
mbligh0d0f67d2009-11-06 03:15:03 +0000199 self.drop_caches = False
mblighb5dac432008-11-27 00:38:44 +0000200 self.drop_caches_between_iterations = False
mblighe0cbc912010-03-11 18:03:07 +0000201 self._control_filename = control_filename
jadmanski10646442008-08-13 14:05:21 +0000202
showard75cdfee2009-06-10 17:40:41 +0000203 self.logging = logging_manager.get_logging_manager(
204 manage_stdout_and_stderr=True, redirect_fds=True)
205 subcommand.logging_manager_object = self.logging
jadmanski10646442008-08-13 14:05:21 +0000206
mbligh0d0f67d2009-11-06 03:15:03 +0000207 self.sysinfo = sysinfo.sysinfo(self.resultdir)
jadmanski043e1132008-11-19 17:10:32 +0000208 self.profilers = profilers.profilers(self)
jadmanskic09fc152008-10-15 17:56:59 +0000209
jadmanski10646442008-08-13 14:05:21 +0000210 job_data = {'label' : label, 'user' : user,
211 'hostname' : ','.join(machines),
Eric Li861b2d52011-02-04 14:50:35 -0800212 'drone' : platform.node(),
mbligh0d0f67d2009-11-06 03:15:03 +0000213 'status_version' : str(self._STATUS_VERSION),
showard170873e2009-01-07 00:22:26 +0000214 'job_started' : str(int(time.time()))}
mbligh374f3412009-05-13 21:29:45 +0000215 if group_name:
216 job_data['host_group_name'] = group_name
jadmanski10646442008-08-13 14:05:21 +0000217
mbligh0d0f67d2009-11-06 03:15:03 +0000218 # only write these keyvals out on the first job in a resultdir
219 if 'job_started' not in utils.read_keyval(self.resultdir):
220 job_data.update(get_site_job_data(self))
221 utils.write_keyval(self.resultdir, job_data)
222
223 self._parse_job = parse_job
showardcc929362010-01-25 21:20:41 +0000224 self._using_parser = (self._parse_job and len(machines) <= 1)
mbligh0d0f67d2009-11-06 03:15:03 +0000225 self.pkgmgr = packages.PackageManager(
226 self.autodir, run_function_dargs={'timeout':600})
showard21baa452008-10-21 00:08:39 +0000227 self.num_tests_run = 0
228 self.num_tests_failed = 0
229
jadmanski550fdc22008-11-20 16:32:08 +0000230 self._register_subcommand_hooks()
231
mbligh0d0f67d2009-11-06 03:15:03 +0000232 # these components aren't usable on the server
233 self.bootloader = None
234 self.harness = None
235
jadmanski2a89dac2010-06-11 14:32:58 +0000236 # set up the status logger
jadmanski52053632010-06-11 21:08:10 +0000237 self._indenter = status_indenter()
jadmanski2a89dac2010-06-11 14:32:58 +0000238 self._logger = base_job.status_logger(
jadmanski52053632010-06-11 21:08:10 +0000239 self, self._indenter, 'status.log', 'status.log',
jadmanski2a89dac2010-06-11 14:32:58 +0000240 record_hook=server_job_record_hook(self))
241
mbligh0d0f67d2009-11-06 03:15:03 +0000242
243 @classmethod
244 def _find_base_directories(cls):
245 """
246 Determine locations of autodir, clientdir and serverdir. Assumes
247 that this file is located within serverdir and uses __file__ along
248 with relative paths to resolve the location.
249 """
250 serverdir = os.path.abspath(os.path.dirname(__file__))
251 autodir = os.path.normpath(os.path.join(serverdir, '..'))
252 clientdir = os.path.join(autodir, 'client')
253 return autodir, clientdir, serverdir
254
255
256 def _find_resultdir(self, resultdir):
257 """
258 Determine the location of resultdir. For server jobs we expect one to
259 always be explicitly passed in to __init__, so just return that.
260 """
261 if resultdir:
262 return os.path.normpath(resultdir)
263 else:
264 return None
265
jadmanski550fdc22008-11-20 16:32:08 +0000266
jadmanski2a89dac2010-06-11 14:32:58 +0000267 def _get_status_logger(self):
268 """Return a reference to the status logger."""
269 return self._logger
270
271
jadmanskie432dd22009-01-30 15:04:51 +0000272 @staticmethod
273 def _load_control_file(path):
274 f = open(path)
275 try:
276 control_file = f.read()
277 finally:
278 f.close()
279 return re.sub('\r', '', control_file)
280
281
jadmanski550fdc22008-11-20 16:32:08 +0000282 def _register_subcommand_hooks(self):
mbligh2b92b862008-11-22 13:25:32 +0000283 """
284 Register some hooks into the subcommand modules that allow us
285 to properly clean up self.hosts created in forked subprocesses.
286 """
jadmanski550fdc22008-11-20 16:32:08 +0000287 def on_fork(cmd):
288 self._existing_hosts_on_fork = set(self.hosts)
289 def on_join(cmd):
290 new_hosts = self.hosts - self._existing_hosts_on_fork
291 for host in new_hosts:
292 host.close()
293 subcommand.subcommand.register_fork_hook(on_fork)
294 subcommand.subcommand.register_join_hook(on_join)
295
jadmanski10646442008-08-13 14:05:21 +0000296
mbligh4608b002010-01-05 18:22:35 +0000297 def init_parser(self):
mbligh2b92b862008-11-22 13:25:32 +0000298 """
mbligh4608b002010-01-05 18:22:35 +0000299 Start the continuous parsing of self.resultdir. This sets up
jadmanski10646442008-08-13 14:05:21 +0000300 the database connection and inserts the basic job object into
mbligh2b92b862008-11-22 13:25:32 +0000301 the database if necessary.
302 """
mbligh4608b002010-01-05 18:22:35 +0000303 if not self._using_parser:
304 return
jadmanski10646442008-08-13 14:05:21 +0000305 # redirect parser debugging to .parse.log
mbligh4608b002010-01-05 18:22:35 +0000306 parse_log = os.path.join(self.resultdir, '.parse.log')
jadmanski10646442008-08-13 14:05:21 +0000307 parse_log = open(parse_log, 'w', 0)
308 tko_utils.redirect_parser_debugging(parse_log)
309 # create a job model object and set up the db
310 self.results_db = tko_db.db(autocommit=True)
mbligh0d0f67d2009-11-06 03:15:03 +0000311 self.parser = status_lib.parser(self._STATUS_VERSION)
mbligh4608b002010-01-05 18:22:35 +0000312 self.job_model = self.parser.make_job(self.resultdir)
jadmanski10646442008-08-13 14:05:21 +0000313 self.parser.start(self.job_model)
314 # check if a job already exists in the db and insert it if
315 # it does not
mbligh0d0f67d2009-11-06 03:15:03 +0000316 job_idx = self.results_db.find_job(self._parse_job)
jadmanski10646442008-08-13 14:05:21 +0000317 if job_idx is None:
mbligh0d0f67d2009-11-06 03:15:03 +0000318 self.results_db.insert_job(self._parse_job, self.job_model)
jadmanski10646442008-08-13 14:05:21 +0000319 else:
mbligh2b92b862008-11-22 13:25:32 +0000320 machine_idx = self.results_db.lookup_machine(self.job_model.machine)
jadmanski10646442008-08-13 14:05:21 +0000321 self.job_model.index = job_idx
322 self.job_model.machine_idx = machine_idx
323
324
325 def cleanup_parser(self):
mbligh2b92b862008-11-22 13:25:32 +0000326 """
327 This should be called after the server job is finished
jadmanski10646442008-08-13 14:05:21 +0000328 to carry out any remaining cleanup (e.g. flushing any
mbligh2b92b862008-11-22 13:25:32 +0000329 remaining test results to the results db)
330 """
mbligh0d0f67d2009-11-06 03:15:03 +0000331 if not self._using_parser:
jadmanski10646442008-08-13 14:05:21 +0000332 return
333 final_tests = self.parser.end()
334 for test in final_tests:
335 self.__insert_test(test)
mbligh0d0f67d2009-11-06 03:15:03 +0000336 self._using_parser = False
jadmanski10646442008-08-13 14:05:21 +0000337
338
339 def verify(self):
340 if not self.machines:
mbligh084bc172008-10-18 14:02:45 +0000341 raise error.AutoservError('No machines specified to verify')
mbligh0fce4112008-11-27 00:37:17 +0000342 if self.resultdir:
343 os.chdir(self.resultdir)
jadmanski10646442008-08-13 14:05:21 +0000344 try:
jadmanskicdd0c402008-09-19 21:21:31 +0000345 namespace = {'machines' : self.machines, 'job' : self,
mbligh0d0f67d2009-11-06 03:15:03 +0000346 'ssh_user' : self._ssh_user,
347 'ssh_port' : self._ssh_port,
348 'ssh_pass' : self._ssh_pass}
mbligh084bc172008-10-18 14:02:45 +0000349 self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)
jadmanski10646442008-08-13 14:05:21 +0000350 except Exception, e:
mbligh2b92b862008-11-22 13:25:32 +0000351 msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())
jadmanski10646442008-08-13 14:05:21 +0000352 self.record('ABORT', None, None, msg)
353 raise
354
355
356 def repair(self, host_protection):
357 if not self.machines:
358 raise error.AutoservError('No machines specified to repair')
mbligh0fce4112008-11-27 00:37:17 +0000359 if self.resultdir:
360 os.chdir(self.resultdir)
jadmanski10646442008-08-13 14:05:21 +0000361 namespace = {'machines': self.machines, 'job': self,
mbligh0d0f67d2009-11-06 03:15:03 +0000362 'ssh_user': self._ssh_user, 'ssh_port': self._ssh_port,
363 'ssh_pass': self._ssh_pass,
jadmanski10646442008-08-13 14:05:21 +0000364 'protection_level': host_protection}
mbligh25c0b8c2009-01-24 01:44:17 +0000365
mbligh0931b0a2009-04-08 17:44:48 +0000366 self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)
jadmanski10646442008-08-13 14:05:21 +0000367
368
369 def precheck(self):
370 """
371 perform any additional checks in derived classes.
372 """
373 pass
374
375
376 def enable_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000377 """
378 Start or restart external logging mechanism.
jadmanski10646442008-08-13 14:05:21 +0000379 """
380 pass
381
382
383 def disable_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000384 """
385 Pause or stop external logging mechanism.
jadmanski10646442008-08-13 14:05:21 +0000386 """
387 pass
388
389
390 def use_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000391 """
392 Return True if external logging should be used.
jadmanski10646442008-08-13 14:05:21 +0000393 """
394 return False
395
396
mbligh415dc212009-06-15 21:53:34 +0000397 def _make_parallel_wrapper(self, function, machines, log):
398 """Wrap function as appropriate for calling by parallel_simple."""
mbligh2b92b862008-11-22 13:25:32 +0000399 is_forking = not (len(machines) == 1 and self.machines == machines)
mbligh0d0f67d2009-11-06 03:15:03 +0000400 if self._parse_job and is_forking and log:
jadmanski10646442008-08-13 14:05:21 +0000401 def wrapper(machine):
mbligh0d0f67d2009-11-06 03:15:03 +0000402 self._parse_job += "/" + machine
403 self._using_parser = True
jadmanski10646442008-08-13 14:05:21 +0000404 self.machines = [machine]
mbligh0d0f67d2009-11-06 03:15:03 +0000405 self.push_execution_context(machine)
jadmanski609a5f42008-08-26 20:52:42 +0000406 os.chdir(self.resultdir)
showard2bab8f42008-11-12 18:15:22 +0000407 utils.write_keyval(self.resultdir, {"hostname": machine})
mbligh4608b002010-01-05 18:22:35 +0000408 self.init_parser()
jadmanski10646442008-08-13 14:05:21 +0000409 result = function(machine)
410 self.cleanup_parser()
411 return result
jadmanski4dd1a002008-09-05 20:27:30 +0000412 elif len(machines) > 1 and log:
jadmanski10646442008-08-13 14:05:21 +0000413 def wrapper(machine):
mbligh0d0f67d2009-11-06 03:15:03 +0000414 self.push_execution_context(machine)
jadmanski609a5f42008-08-26 20:52:42 +0000415 os.chdir(self.resultdir)
mbligh838d82d2009-03-11 17:14:31 +0000416 machine_data = {'hostname' : machine,
mbligh0d0f67d2009-11-06 03:15:03 +0000417 'status_version' : str(self._STATUS_VERSION)}
mbligh838d82d2009-03-11 17:14:31 +0000418 utils.write_keyval(self.resultdir, machine_data)
jadmanski10646442008-08-13 14:05:21 +0000419 result = function(machine)
420 return result
421 else:
422 wrapper = function
mbligh415dc212009-06-15 21:53:34 +0000423 return wrapper
424
425
426 def parallel_simple(self, function, machines, log=True, timeout=None,
427 return_results=False):
428 """
429 Run 'function' using parallel_simple, with an extra wrapper to handle
430 the necessary setup for continuous parsing, if possible. If continuous
431 parsing is already properly initialized then this should just work.
432
433 @param function: A callable to run in parallel given each machine.
434 @param machines: A list of machine names to be passed one per subcommand
435 invocation of function.
436 @param log: If True, output will be written to output in a subdirectory
437 named after each machine.
438 @param timeout: Seconds after which the function call should timeout.
439 @param return_results: If True instead of an AutoServError being raised
440 on any error a list of the results|exceptions from the function
441 called on each arg is returned. [default: False]
442
443 @raises error.AutotestError: If any of the functions failed.
444 """
445 wrapper = self._make_parallel_wrapper(function, machines, log)
446 return subcommand.parallel_simple(wrapper, machines,
447 log=log, timeout=timeout,
448 return_results=return_results)
449
450
451 def parallel_on_machines(self, function, machines, timeout=None):
452 """
showardcd5fac42009-07-06 20:19:43 +0000453 @param function: Called in parallel with one machine as its argument.
mbligh415dc212009-06-15 21:53:34 +0000454 @param machines: A list of machines to call function(machine) on.
455 @param timeout: Seconds after which the function call should timeout.
456
457 @returns A list of machines on which function(machine) returned
458 without raising an exception.
459 """
showardcd5fac42009-07-06 20:19:43 +0000460 results = self.parallel_simple(function, machines, timeout=timeout,
mbligh415dc212009-06-15 21:53:34 +0000461 return_results=True)
462 success_machines = []
463 for result, machine in itertools.izip(results, machines):
464 if not isinstance(result, Exception):
465 success_machines.append(machine)
466 return success_machines
jadmanski10646442008-08-13 14:05:21 +0000467
468
Paul Pendleburye4afc772011-04-26 11:20:15 -0700469 def distribute_across_machines(self, tests, machines,
470 continuous_parsing=False):
Paul Pendleburyff1076d2011-03-31 14:45:32 -0700471 """Run each test in tests once using machines.
472
473 Instead of running each test on each machine like parallel_on_machines,
474 run each test once across all machines. Put another way, the total
475 number of tests run by parallel_on_machines is len(tests) *
476 len(machines). The number of tests run by distribute_across_machines is
477 len(tests).
478
479 Args:
480 tests: List of tests to run.
Paul Pendleburye4afc772011-04-26 11:20:15 -0700481 machines: List of machines to use.
482 continuous_parsing: Bool, if true parse job while running.
Paul Pendleburyff1076d2011-03-31 14:45:32 -0700483 """
484 # The Queue is thread safe, but since a machine may have to search
485 # through the queue to find a valid test the lock provides exclusive
486 # queue access for more than just the get call.
Paul Pendleburye4afc772011-04-26 11:20:15 -0700487 test_queue = multiprocessing.JoinableQueue()
488 test_queue_lock = multiprocessing.Lock()
Paul Pendleburyff1076d2011-03-31 14:45:32 -0700489
Paul Pendlebury1f6f3e72011-04-13 11:16:44 -0700490 machine_workers = [server_job_utils.machine_worker(self,
491 machine,
Paul Pendleburyf807c182011-04-05 11:24:34 -0700492 self.resultdir,
493 test_queue,
Paul Pendleburye4afc772011-04-26 11:20:15 -0700494 test_queue_lock,
495 continuous_parsing)
Paul Pendleburyff1076d2011-03-31 14:45:32 -0700496 for machine in machines]
497
498 # To (potentially) speed up searching for valid tests create a list of
499 # unique attribute sets present in the machines for this job. If sets
500 # were hashable we could just use a dictionary for fast verification.
501 # This at least reduces the search space from the number of machines to
502 # the number of unique machines.
503 unique_machine_attributes = []
504 for mw in machine_workers:
505 if not mw.attribute_set in unique_machine_attributes:
506 unique_machine_attributes.append(mw.attribute_set)
507
508 # Only queue tests which are valid on at least one machine. Record
509 # skipped tests in the status.log file using record_skipped_test().
510 for test_entry in tests:
Paul Pendleburyf807c182011-04-05 11:24:34 -0700511 ti = server_job_utils.test_item(*test_entry)
Paul Pendleburyff1076d2011-03-31 14:45:32 -0700512 machine_found = False
513 for ma in unique_machine_attributes:
514 if ti.validate(ma):
515 test_queue.put(ti)
516 machine_found = True
517 break
518 if not machine_found:
519 self.record_skipped_test(ti)
520
521 # Run valid tests and wait for completion.
522 for worker in machine_workers:
523 worker.start()
524 test_queue.join()
525
526
527 def record_skipped_test(self, skipped_test, message=None):
528 """Insert a failure record into status.log for this test."""
529 msg = message
530 if msg is None:
531 msg = 'No valid machines found for test %s.' % skipped_test
532 logging.info(msg)
533 self.record('START', None, skipped_test.test_name)
534 self.record('INFO', None, skipped_test.test_name, msg)
535 self.record('END TEST_NA', None, skipped_test.test_name, msg)
536
537
mbligh0d0f67d2009-11-06 03:15:03 +0000538 _USE_TEMP_DIR = object()
mbligh2b92b862008-11-22 13:25:32 +0000539 def run(self, cleanup=False, install_before=False, install_after=False,
jadmanskie432dd22009-01-30 15:04:51 +0000540 collect_crashdumps=True, namespace={}, control=None,
jadmanskidef0c3c2009-03-25 20:07:10 +0000541 control_file_dir=None, only_collect_crashinfo=False):
jadmanskifb9c0fa2009-04-29 17:39:16 +0000542 # for a normal job, make sure the uncollected logs file exists
543 # for a crashinfo-only run it should already exist, bail out otherwise
jadmanski648c39f2010-03-19 17:38:01 +0000544 created_uncollected_logs = False
mbligh0d0f67d2009-11-06 03:15:03 +0000545 if self.resultdir and not os.path.exists(self._uncollected_log_file):
jadmanskifb9c0fa2009-04-29 17:39:16 +0000546 if only_collect_crashinfo:
547 # if this is a crashinfo-only run, and there were no existing
548 # uncollected logs, just bail out early
549 logging.info("No existing uncollected logs, "
550 "skipping crashinfo collection")
551 return
552 else:
mbligh0d0f67d2009-11-06 03:15:03 +0000553 log_file = open(self._uncollected_log_file, "w")
jadmanskifb9c0fa2009-04-29 17:39:16 +0000554 pickle.dump([], log_file)
555 log_file.close()
jadmanski648c39f2010-03-19 17:38:01 +0000556 created_uncollected_logs = True
jadmanskifb9c0fa2009-04-29 17:39:16 +0000557
jadmanski10646442008-08-13 14:05:21 +0000558 # use a copy so changes don't affect the original dictionary
559 namespace = namespace.copy()
560 machines = self.machines
jadmanskie432dd22009-01-30 15:04:51 +0000561 if control is None:
jadmanski02a3ba22009-11-13 20:47:27 +0000562 if self.control is None:
563 control = ''
564 else:
565 control = self._load_control_file(self.control)
jadmanskie432dd22009-01-30 15:04:51 +0000566 if control_file_dir is None:
567 control_file_dir = self.resultdir
jadmanski10646442008-08-13 14:05:21 +0000568
569 self.aborted = False
Paul Pendlebury7c1fdcf2011-05-04 12:39:15 -0700570 namespace['gtest_runner'] = gtest_runner.gtest_runner()
jadmanski10646442008-08-13 14:05:21 +0000571 namespace['machines'] = machines
jadmanski808f4b12010-04-09 22:30:31 +0000572 namespace['args'] = self.args
jadmanski10646442008-08-13 14:05:21 +0000573 namespace['job'] = self
mbligh0d0f67d2009-11-06 03:15:03 +0000574 namespace['ssh_user'] = self._ssh_user
575 namespace['ssh_port'] = self._ssh_port
576 namespace['ssh_pass'] = self._ssh_pass
jadmanski10646442008-08-13 14:05:21 +0000577 test_start_time = int(time.time())
578
mbligh80e1eba2008-11-19 00:26:18 +0000579 if self.resultdir:
580 os.chdir(self.resultdir)
jadmanski779bd292009-03-19 17:33:33 +0000581 # touch status.log so that the parser knows a job is running here
jadmanski382303a2009-04-21 19:53:39 +0000582 open(self.get_status_log_path(), 'a').close()
mbligh80e1eba2008-11-19 00:26:18 +0000583 self.enable_external_logging()
jadmanskie432dd22009-01-30 15:04:51 +0000584
jadmanskicdd0c402008-09-19 21:21:31 +0000585 collect_crashinfo = True
mblighaebe3b62008-12-22 14:45:40 +0000586 temp_control_file_dir = None
jadmanski10646442008-08-13 14:05:21 +0000587 try:
showardcf8d4922009-10-14 16:08:39 +0000588 try:
589 if install_before and machines:
590 self._execute_code(INSTALL_CONTROL_FILE, namespace)
jadmanskie432dd22009-01-30 15:04:51 +0000591
showardcf8d4922009-10-14 16:08:39 +0000592 if only_collect_crashinfo:
593 return
594
jadmanskidef0c3c2009-03-25 20:07:10 +0000595 # determine the dir to write the control files to
596 cfd_specified = (control_file_dir
mbligh0d0f67d2009-11-06 03:15:03 +0000597 and control_file_dir is not self._USE_TEMP_DIR)
jadmanskidef0c3c2009-03-25 20:07:10 +0000598 if cfd_specified:
599 temp_control_file_dir = None
600 else:
601 temp_control_file_dir = tempfile.mkdtemp(
602 suffix='temp_control_file_dir')
603 control_file_dir = temp_control_file_dir
604 server_control_file = os.path.join(control_file_dir,
mblighe0cbc912010-03-11 18:03:07 +0000605 self._control_filename)
jadmanskidef0c3c2009-03-25 20:07:10 +0000606 client_control_file = os.path.join(control_file_dir,
607 CLIENT_CONTROL_FILENAME)
mbligh0d0f67d2009-11-06 03:15:03 +0000608 if self._client:
jadmanskidef0c3c2009-03-25 20:07:10 +0000609 namespace['control'] = control
610 utils.open_write_close(client_control_file, control)
mblighfeac0102009-04-28 18:31:12 +0000611 shutil.copyfile(CLIENT_WRAPPER_CONTROL_FILE,
612 server_control_file)
jadmanskidef0c3c2009-03-25 20:07:10 +0000613 else:
614 utils.open_write_close(server_control_file, control)
mbligh26f0d882009-06-22 18:30:01 +0000615 logging.info("Processing control file")
jadmanskidef0c3c2009-03-25 20:07:10 +0000616 self._execute_code(server_control_file, namespace)
mbligh26f0d882009-06-22 18:30:01 +0000617 logging.info("Finished processing control file")
jadmanski10646442008-08-13 14:05:21 +0000618
jadmanskidef0c3c2009-03-25 20:07:10 +0000619 # no error occured, so we don't need to collect crashinfo
620 collect_crashinfo = False
Eric Li6f27d4f2010-09-29 10:55:17 -0700621 except Exception, e:
showardcf8d4922009-10-14 16:08:39 +0000622 try:
623 logging.exception(
624 'Exception escaped control file, job aborting:')
Eric Li6f27d4f2010-09-29 10:55:17 -0700625 self.record('INFO', None, None, str(e),
626 {'job_abort_reason': str(e)})
showardcf8d4922009-10-14 16:08:39 +0000627 except:
628 pass # don't let logging exceptions here interfere
629 raise
jadmanski10646442008-08-13 14:05:21 +0000630 finally:
mblighaebe3b62008-12-22 14:45:40 +0000631 if temp_control_file_dir:
jadmanskie432dd22009-01-30 15:04:51 +0000632 # Clean up temp directory used for copies of the control files
mblighaebe3b62008-12-22 14:45:40 +0000633 try:
634 shutil.rmtree(temp_control_file_dir)
635 except Exception, e:
mblighe7d9c602009-07-02 19:02:33 +0000636 logging.warn('Could not remove temp directory %s: %s',
637 temp_control_file_dir, e)
jadmanskie432dd22009-01-30 15:04:51 +0000638
jadmanskicdd0c402008-09-19 21:21:31 +0000639 if machines and (collect_crashdumps or collect_crashinfo):
jadmanski10646442008-08-13 14:05:21 +0000640 namespace['test_start_time'] = test_start_time
jadmanskicdd0c402008-09-19 21:21:31 +0000641 if collect_crashinfo:
mbligh084bc172008-10-18 14:02:45 +0000642 # includes crashdumps
643 self._execute_code(CRASHINFO_CONTROL_FILE, namespace)
jadmanskicdd0c402008-09-19 21:21:31 +0000644 else:
mbligh084bc172008-10-18 14:02:45 +0000645 self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)
jadmanski648c39f2010-03-19 17:38:01 +0000646 if self._uncollected_log_file and created_uncollected_logs:
mbligh0d0f67d2009-11-06 03:15:03 +0000647 os.remove(self._uncollected_log_file)
jadmanski10646442008-08-13 14:05:21 +0000648 self.disable_external_logging()
showard45ae8192008-11-05 19:32:53 +0000649 if cleanup and machines:
650 self._execute_code(CLEANUP_CONTROL_FILE, namespace)
jadmanski10646442008-08-13 14:05:21 +0000651 if install_after and machines:
mbligh084bc172008-10-18 14:02:45 +0000652 self._execute_code(INSTALL_CONTROL_FILE, namespace)
jadmanski10646442008-08-13 14:05:21 +0000653
654
655 def run_test(self, url, *args, **dargs):
mbligh2b92b862008-11-22 13:25:32 +0000656 """
657 Summon a test object and run it.
jadmanski10646442008-08-13 14:05:21 +0000658
659 tag
660 tag to add to testname
661 url
662 url of the test to run
663 """
mblighfc3da5b2010-01-06 18:37:22 +0000664 group, testname = self.pkgmgr.get_package_name(url, 'test')
665 testname, subdir, tag = self._build_tagged_test_name(testname, dargs)
666 outputdir = self._make_test_outputdir(subdir)
jadmanski10646442008-08-13 14:05:21 +0000667
668 def group_func():
669 try:
670 test.runtest(self, url, tag, args, dargs)
671 except error.TestBaseException, e:
672 self.record(e.exit_status, subdir, testname, str(e))
673 raise
674 except Exception, e:
675 info = str(e) + "\n" + traceback.format_exc()
676 self.record('FAIL', subdir, testname, info)
677 raise
678 else:
mbligh2b92b862008-11-22 13:25:32 +0000679 self.record('GOOD', subdir, testname, 'completed successfully')
jadmanskide292df2008-08-26 20:51:14 +0000680
681 result, exc_info = self._run_group(testname, subdir, group_func)
682 if exc_info and isinstance(exc_info[1], error.TestBaseException):
683 return False
684 elif exc_info:
685 raise exc_info[0], exc_info[1], exc_info[2]
686 else:
687 return True
jadmanski10646442008-08-13 14:05:21 +0000688
689
690 def _run_group(self, name, subdir, function, *args, **dargs):
691 """\
692 Underlying method for running something inside of a group.
693 """
jadmanskide292df2008-08-26 20:51:14 +0000694 result, exc_info = None, None
jadmanski10646442008-08-13 14:05:21 +0000695 try:
696 self.record('START', subdir, name)
jadmanski52053632010-06-11 21:08:10 +0000697 result = function(*args, **dargs)
jadmanski10646442008-08-13 14:05:21 +0000698 except error.TestBaseException, e:
jadmanskib88d6dc2009-01-10 00:33:18 +0000699 self.record("END %s" % e.exit_status, subdir, name)
jadmanskide292df2008-08-26 20:51:14 +0000700 exc_info = sys.exc_info()
jadmanski10646442008-08-13 14:05:21 +0000701 except Exception, e:
702 err_msg = str(e) + '\n'
703 err_msg += traceback.format_exc()
704 self.record('END ABORT', subdir, name, err_msg)
705 raise error.JobError(name + ' failed\n' + traceback.format_exc())
706 else:
707 self.record('END GOOD', subdir, name)
708
jadmanskide292df2008-08-26 20:51:14 +0000709 return result, exc_info
jadmanski10646442008-08-13 14:05:21 +0000710
711
712 def run_group(self, function, *args, **dargs):
713 """\
714 function:
715 subroutine to run
716 *args:
717 arguments for the function
718 """
719
720 name = function.__name__
721
722 # Allow the tag for the group to be specified.
723 tag = dargs.pop('tag', None)
724 if tag:
725 name = tag
726
jadmanskide292df2008-08-26 20:51:14 +0000727 return self._run_group(name, None, function, *args, **dargs)[0]
jadmanski10646442008-08-13 14:05:21 +0000728
729
730 def run_reboot(self, reboot_func, get_kernel_func):
731 """\
732 A specialization of run_group meant specifically for handling
733 a reboot. Includes support for capturing the kernel version
734 after the reboot.
735
736 reboot_func: a function that carries out the reboot
737
738 get_kernel_func: a function that returns a string
739 representing the kernel version.
740 """
jadmanski10646442008-08-13 14:05:21 +0000741 try:
742 self.record('START', None, 'reboot')
jadmanski10646442008-08-13 14:05:21 +0000743 reboot_func()
744 except Exception, e:
jadmanski10646442008-08-13 14:05:21 +0000745 err_msg = str(e) + '\n' + traceback.format_exc()
746 self.record('END FAIL', None, 'reboot', err_msg)
jadmanski4b51d542009-04-08 14:17:16 +0000747 raise
jadmanski10646442008-08-13 14:05:21 +0000748 else:
749 kernel = get_kernel_func()
jadmanski10646442008-08-13 14:05:21 +0000750 self.record('END GOOD', None, 'reboot',
Dale Curtis74a314b2011-06-23 14:55:46 -0700751 optional_fields={"kernel": kernel})
jadmanski10646442008-08-13 14:05:21 +0000752
753
jadmanskie432dd22009-01-30 15:04:51 +0000754 def run_control(self, path):
755 """Execute a control file found at path (relative to the autotest
756 path). Intended for executing a control file within a control file,
757 not for running the top-level job control file."""
758 path = os.path.join(self.autodir, path)
759 control_file = self._load_control_file(path)
mbligh0d0f67d2009-11-06 03:15:03 +0000760 self.run(control=control_file, control_file_dir=self._USE_TEMP_DIR)
jadmanskie432dd22009-01-30 15:04:51 +0000761
762
jadmanskic09fc152008-10-15 17:56:59 +0000763 def add_sysinfo_command(self, command, logfile=None, on_every_test=False):
mbligh4395bbd2009-03-25 19:34:17 +0000764 self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),
jadmanskic09fc152008-10-15 17:56:59 +0000765 on_every_test)
766
767
768 def add_sysinfo_logfile(self, file, on_every_test=False):
769 self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)
770
771
772 def _add_sysinfo_loggable(self, loggable, on_every_test):
773 if on_every_test:
774 self.sysinfo.test_loggables.add(loggable)
775 else:
776 self.sysinfo.boot_loggables.add(loggable)
777
778
jadmanski10646442008-08-13 14:05:21 +0000779 def _read_warnings(self):
jadmanskif37df842009-02-11 00:03:26 +0000780 """Poll all the warning loggers and extract any new warnings that have
781 been logged. If the warnings belong to a category that is currently
782 disabled, this method will discard them and they will no longer be
783 retrievable.
784
785 Returns a list of (timestamp, message) tuples, where timestamp is an
786 integer epoch timestamp."""
jadmanski10646442008-08-13 14:05:21 +0000787 warnings = []
788 while True:
789 # pull in a line of output from every logger that has
790 # output ready to be read
mbligh2b92b862008-11-22 13:25:32 +0000791 loggers, _, _ = select.select(self.warning_loggers, [], [], 0)
jadmanski10646442008-08-13 14:05:21 +0000792 closed_loggers = set()
793 for logger in loggers:
794 line = logger.readline()
795 # record any broken pipes (aka line == empty)
796 if len(line) == 0:
797 closed_loggers.add(logger)
798 continue
jadmanskif37df842009-02-11 00:03:26 +0000799 # parse out the warning
800 timestamp, msgtype, msg = line.split('\t', 2)
801 timestamp = int(timestamp)
802 # if the warning is valid, add it to the results
803 if self.warning_manager.is_valid(timestamp, msgtype):
804 warnings.append((timestamp, msg.strip()))
jadmanski10646442008-08-13 14:05:21 +0000805
806 # stop listening to loggers that are closed
807 self.warning_loggers -= closed_loggers
808
809 # stop if none of the loggers have any output left
810 if not loggers:
811 break
812
813 # sort into timestamp order
814 warnings.sort()
815 return warnings
816
817
showardcc929362010-01-25 21:20:41 +0000818 def _unique_subdirectory(self, base_subdirectory_name):
819 """Compute a unique results subdirectory based on the given name.
820
821 Appends base_subdirectory_name with a number as necessary to find a
822 directory name that doesn't already exist.
823 """
824 subdirectory = base_subdirectory_name
825 counter = 1
826 while os.path.exists(os.path.join(self.resultdir, subdirectory)):
827 subdirectory = base_subdirectory_name + '.' + str(counter)
828 counter += 1
829 return subdirectory
830
831
jadmanski52053632010-06-11 21:08:10 +0000832 def get_record_context(self):
833 """Returns an object representing the current job.record context.
834
835 The object returned is an opaque object with a 0-arg restore method
836 which can be called to restore the job.record context (i.e. indentation)
837 to the current level. The intention is that it should be used when
838 something external which generate job.record calls (e.g. an autotest
839 client) can fail catastrophically and the server job record state
840 needs to be reset to its original "known good" state.
841
842 @return: A context object with a 0-arg restore() method."""
843 return self._indenter.get_context()
844
845
showardcc929362010-01-25 21:20:41 +0000846 def record_summary(self, status_code, test_name, reason='', attributes=None,
847 distinguishing_attributes=(), child_test_ids=None):
848 """Record a summary test result.
849
850 @param status_code: status code string, see
851 common_lib.log.is_valid_status()
852 @param test_name: name of the test
853 @param reason: (optional) string providing detailed reason for test
854 outcome
855 @param attributes: (optional) dict of string keyvals to associate with
856 this result
857 @param distinguishing_attributes: (optional) list of attribute names
858 that should be used to distinguish identically-named test
859 results. These attributes should be present in the attributes
860 parameter. This is used to generate user-friendly subdirectory
861 names.
862 @param child_test_ids: (optional) list of test indices for test results
863 used in generating this result.
864 """
865 subdirectory_name_parts = [test_name]
866 for attribute in distinguishing_attributes:
867 assert attributes
868 assert attribute in attributes, '%s not in %s' % (attribute,
869 attributes)
870 subdirectory_name_parts.append(attributes[attribute])
871 base_subdirectory_name = '.'.join(subdirectory_name_parts)
872
873 subdirectory = self._unique_subdirectory(base_subdirectory_name)
874 subdirectory_path = os.path.join(self.resultdir, subdirectory)
875 os.mkdir(subdirectory_path)
876
877 self.record(status_code, subdirectory, test_name,
878 status=reason, optional_fields={'is_summary': True})
879
880 if attributes:
881 utils.write_keyval(subdirectory_path, attributes)
882
883 if child_test_ids:
884 ids_string = ','.join(str(test_id) for test_id in child_test_ids)
885 summary_data = {'child_test_ids': ids_string}
886 utils.write_keyval(os.path.join(subdirectory_path, 'summary_data'),
887 summary_data)
888
889
jadmanski16a7ff72009-04-01 18:19:53 +0000890 def disable_warnings(self, warning_type):
jadmanskif37df842009-02-11 00:03:26 +0000891 self.warning_manager.disable_warnings(warning_type)
jadmanski16a7ff72009-04-01 18:19:53 +0000892 self.record("INFO", None, None,
893 "disabling %s warnings" % warning_type,
894 {"warnings.disable": warning_type})
jadmanskif37df842009-02-11 00:03:26 +0000895
896
jadmanski16a7ff72009-04-01 18:19:53 +0000897 def enable_warnings(self, warning_type):
jadmanskif37df842009-02-11 00:03:26 +0000898 self.warning_manager.enable_warnings(warning_type)
jadmanski16a7ff72009-04-01 18:19:53 +0000899 self.record("INFO", None, None,
900 "enabling %s warnings" % warning_type,
901 {"warnings.enable": warning_type})
jadmanskif37df842009-02-11 00:03:26 +0000902
903
jadmanski779bd292009-03-19 17:33:33 +0000904 def get_status_log_path(self, subdir=None):
905 """Return the path to the job status log.
906
907 @param subdir - Optional paramter indicating that you want the path
908 to a subdirectory status log.
909
910 @returns The path where the status log should be.
911 """
mbligh210bae62009-04-01 18:33:13 +0000912 if self.resultdir:
913 if subdir:
914 return os.path.join(self.resultdir, subdir, "status.log")
915 else:
916 return os.path.join(self.resultdir, "status.log")
jadmanski779bd292009-03-19 17:33:33 +0000917 else:
mbligh210bae62009-04-01 18:33:13 +0000918 return None
jadmanski779bd292009-03-19 17:33:33 +0000919
920
jadmanski6bb32d72009-03-19 20:25:24 +0000921 def _update_uncollected_logs_list(self, update_func):
922 """Updates the uncollected logs list in a multi-process safe manner.
923
924 @param update_func - a function that updates the list of uncollected
925 logs. Should take one parameter, the list to be updated.
926 """
mbligh0d0f67d2009-11-06 03:15:03 +0000927 if self._uncollected_log_file:
928 log_file = open(self._uncollected_log_file, "r+")
mbligha788dc42009-03-26 21:10:16 +0000929 fcntl.flock(log_file, fcntl.LOCK_EX)
jadmanski6bb32d72009-03-19 20:25:24 +0000930 try:
931 uncollected_logs = pickle.load(log_file)
932 update_func(uncollected_logs)
933 log_file.seek(0)
934 log_file.truncate()
935 pickle.dump(uncollected_logs, log_file)
jadmanski3bff9092009-04-22 18:09:47 +0000936 log_file.flush()
jadmanski6bb32d72009-03-19 20:25:24 +0000937 finally:
938 fcntl.flock(log_file, fcntl.LOCK_UN)
939 log_file.close()
940
941
942 def add_client_log(self, hostname, remote_path, local_path):
943 """Adds a new set of client logs to the list of uncollected logs,
944 to allow for future log recovery.
945
946 @param host - the hostname of the machine holding the logs
947 @param remote_path - the directory on the remote machine holding logs
948 @param local_path - the local directory to copy the logs into
949 """
950 def update_func(logs_list):
951 logs_list.append((hostname, remote_path, local_path))
952 self._update_uncollected_logs_list(update_func)
953
954
955 def remove_client_log(self, hostname, remote_path, local_path):
956 """Removes a set of client logs from the list of uncollected logs,
957 to allow for future log recovery.
958
959 @param host - the hostname of the machine holding the logs
960 @param remote_path - the directory on the remote machine holding logs
961 @param local_path - the local directory to copy the logs into
962 """
963 def update_func(logs_list):
964 logs_list.remove((hostname, remote_path, local_path))
965 self._update_uncollected_logs_list(update_func)
966
967
mbligh0d0f67d2009-11-06 03:15:03 +0000968 def get_client_logs(self):
969 """Retrieves the list of uncollected logs, if it exists.
970
971 @returns A list of (host, remote_path, local_path) tuples. Returns
972 an empty list if no uncollected logs file exists.
973 """
974 log_exists = (self._uncollected_log_file and
975 os.path.exists(self._uncollected_log_file))
976 if log_exists:
977 return pickle.load(open(self._uncollected_log_file))
978 else:
979 return []
980
981
mbligh084bc172008-10-18 14:02:45 +0000982 def _fill_server_control_namespace(self, namespace, protect=True):
mbligh2b92b862008-11-22 13:25:32 +0000983 """
984 Prepare a namespace to be used when executing server control files.
mbligh084bc172008-10-18 14:02:45 +0000985
986 This sets up the control file API by importing modules and making them
987 available under the appropriate names within namespace.
988
989 For use by _execute_code().
990
991 Args:
992 namespace: The namespace dictionary to fill in.
993 protect: Boolean. If True (the default) any operation that would
994 clobber an existing entry in namespace will cause an error.
995 Raises:
996 error.AutoservError: When a name would be clobbered by import.
997 """
998 def _import_names(module_name, names=()):
mbligh2b92b862008-11-22 13:25:32 +0000999 """
1000 Import a module and assign named attributes into namespace.
mbligh084bc172008-10-18 14:02:45 +00001001
1002 Args:
1003 module_name: The string module name.
1004 names: A limiting list of names to import from module_name. If
1005 empty (the default), all names are imported from the module
1006 similar to a "from foo.bar import *" statement.
1007 Raises:
1008 error.AutoservError: When a name being imported would clobber
1009 a name already in namespace.
1010 """
1011 module = __import__(module_name, {}, {}, names)
1012
1013 # No names supplied? Import * from the lowest level module.
1014 # (Ugh, why do I have to implement this part myself?)
1015 if not names:
1016 for submodule_name in module_name.split('.')[1:]:
1017 module = getattr(module, submodule_name)
1018 if hasattr(module, '__all__'):
1019 names = getattr(module, '__all__')
1020 else:
1021 names = dir(module)
1022
1023 # Install each name into namespace, checking to make sure it
1024 # doesn't override anything that already exists.
1025 for name in names:
1026 # Check for conflicts to help prevent future problems.
1027 if name in namespace and protect:
1028 if namespace[name] is not getattr(module, name):
1029 raise error.AutoservError('importing name '
1030 '%s from %s %r would override %r' %
1031 (name, module_name, getattr(module, name),
1032 namespace[name]))
1033 else:
1034 # Encourage cleanliness and the use of __all__ for a
1035 # more concrete API with less surprises on '*' imports.
1036 warnings.warn('%s (%r) being imported from %s for use '
1037 'in server control files is not the '
1038 'first occurrance of that import.' %
1039 (name, namespace[name], module_name))
1040
1041 namespace[name] = getattr(module, name)
1042
1043
1044 # This is the equivalent of prepending a bunch of import statements to
1045 # the front of the control script.
mbligha2b07dd2009-06-22 18:26:13 +00001046 namespace.update(os=os, sys=sys, logging=logging)
mbligh084bc172008-10-18 14:02:45 +00001047 _import_names('autotest_lib.server',
1048 ('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',
1049 'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))
1050 _import_names('autotest_lib.server.subcommand',
1051 ('parallel', 'parallel_simple', 'subcommand'))
1052 _import_names('autotest_lib.server.utils',
1053 ('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))
1054 _import_names('autotest_lib.client.common_lib.error')
1055 _import_names('autotest_lib.client.common_lib.barrier', ('barrier',))
1056
1057 # Inject ourself as the job object into other classes within the API.
1058 # (Yuck, this injection is a gross thing be part of a public API. -gps)
1059 #
1060 # XXX Base & SiteAutotest do not appear to use .job. Who does?
1061 namespace['autotest'].Autotest.job = self
1062 # server.hosts.base_classes.Host uses .job.
1063 namespace['hosts'].Host.job = self
Eric Li10222b82010-11-24 09:33:15 -08001064 namespace['hosts'].factory.ssh_user = self._ssh_user
1065 namespace['hosts'].factory.ssh_port = self._ssh_port
1066 namespace['hosts'].factory.ssh_pass = self._ssh_pass
mbligh084bc172008-10-18 14:02:45 +00001067
1068
1069 def _execute_code(self, code_file, namespace, protect=True):
mbligh2b92b862008-11-22 13:25:32 +00001070 """
1071 Execute code using a copy of namespace as a server control script.
mbligh084bc172008-10-18 14:02:45 +00001072
1073 Unless protect_namespace is explicitly set to False, the dict will not
1074 be modified.
1075
1076 Args:
1077 code_file: The filename of the control file to execute.
1078 namespace: A dict containing names to make available during execution.
1079 protect: Boolean. If True (the default) a copy of the namespace dict
1080 is used during execution to prevent the code from modifying its
1081 contents outside of this function. If False the raw dict is
1082 passed in and modifications will be allowed.
1083 """
1084 if protect:
1085 namespace = namespace.copy()
1086 self._fill_server_control_namespace(namespace, protect=protect)
1087 # TODO: Simplify and get rid of the special cases for only 1 machine.
showard3e66e8c2008-10-27 19:20:51 +00001088 if len(self.machines) > 1:
mbligh084bc172008-10-18 14:02:45 +00001089 machines_text = '\n'.join(self.machines) + '\n'
1090 # Only rewrite the file if it does not match our machine list.
1091 try:
1092 machines_f = open(MACHINES_FILENAME, 'r')
1093 existing_machines_text = machines_f.read()
1094 machines_f.close()
1095 except EnvironmentError:
1096 existing_machines_text = None
1097 if machines_text != existing_machines_text:
1098 utils.open_write_close(MACHINES_FILENAME, machines_text)
1099 execfile(code_file, namespace, namespace)
jadmanski10646442008-08-13 14:05:21 +00001100
1101
jadmanskie29d0e42010-06-17 16:06:52 +00001102 def _parse_status(self, new_line):
mbligh0d0f67d2009-11-06 03:15:03 +00001103 if not self._using_parser:
jadmanski10646442008-08-13 14:05:21 +00001104 return
jadmanskie29d0e42010-06-17 16:06:52 +00001105 new_tests = self.parser.process_lines([new_line])
jadmanski10646442008-08-13 14:05:21 +00001106 for test in new_tests:
1107 self.__insert_test(test)
1108
1109
1110 def __insert_test(self, test):
mbligh2b92b862008-11-22 13:25:32 +00001111 """
1112 An internal method to insert a new test result into the
jadmanski10646442008-08-13 14:05:21 +00001113 database. This method will not raise an exception, even if an
1114 error occurs during the insert, to avoid failing a test
1115 simply because of unexpected database issues."""
showard21baa452008-10-21 00:08:39 +00001116 self.num_tests_run += 1
1117 if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):
1118 self.num_tests_failed += 1
jadmanski10646442008-08-13 14:05:21 +00001119 try:
1120 self.results_db.insert_test(self.job_model, test)
1121 except Exception:
1122 msg = ("WARNING: An unexpected error occured while "
1123 "inserting test results into the database. "
1124 "Ignoring error.\n" + traceback.format_exc())
1125 print >> sys.stderr, msg
1126
mblighcaa62c22008-04-07 21:51:17 +00001127
mblighfc3da5b2010-01-06 18:37:22 +00001128 def preprocess_client_state(self):
1129 """
1130 Produce a state file for initializing the state of a client job.
1131
1132 Creates a new client state file with all the current server state, as
1133 well as some pre-set client state.
1134
1135 @returns The path of the file the state was written into.
1136 """
1137 # initialize the sysinfo state
1138 self._state.set('client', 'sysinfo', self.sysinfo.serialize())
1139
1140 # dump the state out to a tempfile
1141 fd, file_path = tempfile.mkstemp(dir=self.tmpdir)
1142 os.close(fd)
mbligha2c99492010-01-27 22:59:50 +00001143
1144 # write_to_file doesn't need locking, we exclusively own file_path
mblighfc3da5b2010-01-06 18:37:22 +00001145 self._state.write_to_file(file_path)
1146 return file_path
1147
1148
1149 def postprocess_client_state(self, state_path):
1150 """
1151 Update the state of this job with the state from a client job.
1152
1153 Updates the state of the server side of a job with the final state
1154 of a client job that was run. Updates the non-client-specific state,
1155 pulls in some specific bits from the client-specific state, and then
1156 discards the rest. Removes the state file afterwards
1157
1158 @param state_file A path to the state file from the client.
1159 """
1160 # update the on-disk state
mblighfc3da5b2010-01-06 18:37:22 +00001161 try:
jadmanskib6e7bdb2010-04-13 16:00:39 +00001162 self._state.read_from_file(state_path)
mblighfc3da5b2010-01-06 18:37:22 +00001163 os.remove(state_path)
mbligha2c99492010-01-27 22:59:50 +00001164 except OSError, e:
mblighfc3da5b2010-01-06 18:37:22 +00001165 # ignore file-not-found errors
1166 if e.errno != errno.ENOENT:
1167 raise
jadmanskib6e7bdb2010-04-13 16:00:39 +00001168 else:
1169 logging.debug('Client state file %s not found', state_path)
mblighfc3da5b2010-01-06 18:37:22 +00001170
1171 # update the sysinfo state
1172 if self._state.has('client', 'sysinfo'):
1173 self.sysinfo.deserialize(self._state.get('client', 'sysinfo'))
1174
1175 # drop all the client-specific state
1176 self._state.discard_namespace('client')
1177
1178
mbligh0a883702010-04-21 01:58:34 +00001179 def clear_all_known_hosts(self):
1180 """Clears known hosts files for all AbstractSSHHosts."""
1181 for host in self.hosts:
1182 if isinstance(host, abstract_ssh.AbstractSSHHost):
1183 host.clear_known_hosts()
1184
1185
mbligha7007722009-01-13 00:37:11 +00001186site_server_job = utils.import_site_class(
1187 __file__, "autotest_lib.server.site_server_job", "site_server_job",
1188 base_server_job)
jadmanski0afbb632008-06-06 21:10:57 +00001189
mbligh0a8c3322009-04-28 18:32:19 +00001190class server_job(site_server_job):
jadmanski0afbb632008-06-06 21:10:57 +00001191 pass
jadmanskif37df842009-02-11 00:03:26 +00001192
1193
1194class warning_manager(object):
1195 """Class for controlling warning logs. Manages the enabling and disabling
1196 of warnings."""
1197 def __init__(self):
1198 # a map of warning types to a list of disabled time intervals
1199 self.disabled_warnings = {}
1200
1201
1202 def is_valid(self, timestamp, warning_type):
1203 """Indicates if a warning (based on the time it occured and its type)
1204 is a valid warning. A warning is considered "invalid" if this type of
1205 warning was marked as "disabled" at the time the warning occured."""
1206 disabled_intervals = self.disabled_warnings.get(warning_type, [])
1207 for start, end in disabled_intervals:
1208 if timestamp >= start and (end is None or timestamp < end):
1209 return False
1210 return True
1211
1212
1213 def disable_warnings(self, warning_type, current_time_func=time.time):
1214 """As of now, disables all further warnings of this type."""
1215 intervals = self.disabled_warnings.setdefault(warning_type, [])
1216 if not intervals or intervals[-1][1] is not None:
jadmanski16a7ff72009-04-01 18:19:53 +00001217 intervals.append((int(current_time_func()), None))
jadmanskif37df842009-02-11 00:03:26 +00001218
1219
1220 def enable_warnings(self, warning_type, current_time_func=time.time):
1221 """As of now, enables all further warnings of this type."""
1222 intervals = self.disabled_warnings.get(warning_type, [])
1223 if intervals and intervals[-1][1] is None:
jadmanski16a7ff72009-04-01 18:19:53 +00001224 intervals[-1] = (intervals[-1][0], int(current_time_func()))