blob: e3872a4d8b5ed1214dd7d726bff80adb17dd66b2 [file] [log] [blame]
Paul Pendlebury7c1fdcf2011-05-04 12:39:15 -07001# Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
mbligh57e78662008-06-17 19:53:49 +00004"""
5The main job wrapper for the server side.
6
7This is the core infrastructure. Derived from the client side job.py
8
9Copyright Martin J. Bligh, Andy Whitcroft 2007
10"""
11
Eric Li861b2d52011-02-04 14:50:35 -080012import getpass, os, sys, re, stat, tempfile, time, select, subprocess, platform
Paul Pendleburye4afc772011-04-26 11:20:15 -070013import multiprocessing
mblighfc3da5b2010-01-06 18:37:22 +000014import traceback, shutil, warnings, fcntl, pickle, logging, itertools, errno
showard75cdfee2009-06-10 17:40:41 +000015from autotest_lib.client.bin import sysinfo
mbligh0d0f67d2009-11-06 03:15:03 +000016from autotest_lib.client.common_lib import base_job
mbligh09108442008-10-15 16:27:38 +000017from autotest_lib.client.common_lib import error, log, utils, packages
showard75cdfee2009-06-10 17:40:41 +000018from autotest_lib.client.common_lib import logging_manager
Paul Pendleburyf807c182011-04-05 11:24:34 -070019from autotest_lib.server import test, subcommand, profilers, server_job_utils
Paul Pendlebury7c1fdcf2011-05-04 12:39:15 -070020from autotest_lib.server import gtest_runner
mbligh0a883702010-04-21 01:58:34 +000021from autotest_lib.server.hosts import abstract_ssh
jadmanski10646442008-08-13 14:05:21 +000022from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils
jadmanski10646442008-08-13 14:05:21 +000023
24
mbligh084bc172008-10-18 14:02:45 +000025def _control_segment_path(name):
26 """Get the pathname of the named control segment file."""
jadmanski10646442008-08-13 14:05:21 +000027 server_dir = os.path.dirname(os.path.abspath(__file__))
mbligh084bc172008-10-18 14:02:45 +000028 return os.path.join(server_dir, "control_segments", name)
jadmanski10646442008-08-13 14:05:21 +000029
30
mbligh084bc172008-10-18 14:02:45 +000031CLIENT_CONTROL_FILENAME = 'control'
32SERVER_CONTROL_FILENAME = 'control.srv'
33MACHINES_FILENAME = '.machines'
jadmanski10646442008-08-13 14:05:21 +000034
mbligh084bc172008-10-18 14:02:45 +000035CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')
36CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')
37CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')
mbligh084bc172008-10-18 14:02:45 +000038INSTALL_CONTROL_FILE = _control_segment_path('install')
showard45ae8192008-11-05 19:32:53 +000039CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')
jadmanski10646442008-08-13 14:05:21 +000040
mbligh084bc172008-10-18 14:02:45 +000041VERIFY_CONTROL_FILE = _control_segment_path('verify')
mbligh084bc172008-10-18 14:02:45 +000042REPAIR_CONTROL_FILE = _control_segment_path('repair')
jadmanski10646442008-08-13 14:05:21 +000043
44
mbligh062ed152009-01-13 00:57:14 +000045# by default provide a stub that generates no site data
46def _get_site_job_data_dummy(job):
47 return {}
48
49
jadmanski10646442008-08-13 14:05:21 +000050# load up site-specific code for generating site-specific job data
mbligh062ed152009-01-13 00:57:14 +000051get_site_job_data = utils.import_site_function(__file__,
jadmanskic0a623d2009-03-03 21:11:48 +000052 "autotest_lib.server.site_server_job", "get_site_job_data",
mbligh062ed152009-01-13 00:57:14 +000053 _get_site_job_data_dummy)
jadmanski10646442008-08-13 14:05:21 +000054
55
jadmanski2a89dac2010-06-11 14:32:58 +000056class status_indenter(base_job.status_indenter):
57 """Provide a simple integer-backed status indenter."""
58 def __init__(self):
59 self._indent = 0
60
61
62 @property
63 def indent(self):
64 return self._indent
65
66
67 def increment(self):
68 self._indent += 1
69
70
71 def decrement(self):
72 self._indent -= 1
73
74
jadmanski52053632010-06-11 21:08:10 +000075 def get_context(self):
76 """Returns a context object for use by job.get_record_context."""
77 class context(object):
78 def __init__(self, indenter, indent):
79 self._indenter = indenter
80 self._indent = indent
81 def restore(self):
82 self._indenter._indent = self._indent
83 return context(self, self._indent)
84
85
jadmanski2a89dac2010-06-11 14:32:58 +000086class server_job_record_hook(object):
87 """The job.record hook for server job. Used to inject WARN messages from
88 the console or vlm whenever new logs are written, and to echo any logs
89 to INFO level logging. Implemented as a class so that it can use state to
90 block recursive calls, so that the hook can call job.record itself to
91 log WARN messages.
92
93 Depends on job._read_warnings and job._logger.
94 """
95 def __init__(self, job):
96 self._job = job
97 self._being_called = False
98
99
100 def __call__(self, entry):
101 """A wrapper around the 'real' record hook, the _hook method, which
102 prevents recursion. This isn't making any effort to be threadsafe,
103 the intent is to outright block infinite recursion via a
104 job.record->_hook->job.record->_hook->job.record... chain."""
105 if self._being_called:
106 return
107 self._being_called = True
108 try:
109 self._hook(self._job, entry)
110 finally:
111 self._being_called = False
112
113
114 @staticmethod
115 def _hook(job, entry):
116 """The core hook, which can safely call job.record."""
117 entries = []
118 # poll all our warning loggers for new warnings
119 for timestamp, msg in job._read_warnings():
120 warning_entry = base_job.status_log_entry(
121 'WARN', None, None, msg, {}, timestamp=timestamp)
122 entries.append(warning_entry)
123 job.record_entry(warning_entry)
124 # echo rendered versions of all the status logs to info
125 entries.append(entry)
126 for entry in entries:
127 rendered_entry = job._logger.render_entry(entry)
128 logging.info(rendered_entry)
jadmanskie29d0e42010-06-17 16:06:52 +0000129 job._parse_status(rendered_entry)
jadmanski2a89dac2010-06-11 14:32:58 +0000130
131
mbligh0d0f67d2009-11-06 03:15:03 +0000132class base_server_job(base_job.base_job):
133 """The server-side concrete implementation of base_job.
jadmanski10646442008-08-13 14:05:21 +0000134
mbligh0d0f67d2009-11-06 03:15:03 +0000135 Optional properties provided by this implementation:
136 serverdir
137 conmuxdir
138
139 num_tests_run
140 num_tests_failed
141
142 warning_manager
143 warning_loggers
jadmanski10646442008-08-13 14:05:21 +0000144 """
145
mbligh0d0f67d2009-11-06 03:15:03 +0000146 _STATUS_VERSION = 1
jadmanski10646442008-08-13 14:05:21 +0000147
148 def __init__(self, control, args, resultdir, label, user, machines,
149 client=False, parse_job='',
mbligh374f3412009-05-13 21:29:45 +0000150 ssh_user='root', ssh_port=22, ssh_pass='',
mblighe0cbc912010-03-11 18:03:07 +0000151 group_name='', tag='',
152 control_filename=SERVER_CONTROL_FILENAME):
jadmanski10646442008-08-13 14:05:21 +0000153 """
mbligh374f3412009-05-13 21:29:45 +0000154 Create a server side job object.
mblighb5dac432008-11-27 00:38:44 +0000155
mblighe7d9c602009-07-02 19:02:33 +0000156 @param control: The pathname of the control file.
157 @param args: Passed to the control file.
158 @param resultdir: Where to throw the results.
159 @param label: Description of the job.
160 @param user: Username for the job (email address).
161 @param client: True if this is a client-side control file.
162 @param parse_job: string, if supplied it is the job execution tag that
163 the results will be passed through to the TKO parser with.
164 @param ssh_user: The SSH username. [root]
165 @param ssh_port: The SSH port number. [22]
166 @param ssh_pass: The SSH passphrase, if needed.
167 @param group_name: If supplied, this will be written out as
mbligh374f3412009-05-13 21:29:45 +0000168 host_group_name in the keyvals file for the parser.
mblighe7d9c602009-07-02 19:02:33 +0000169 @param tag: The job execution tag from the scheduler. [optional]
mblighe0cbc912010-03-11 18:03:07 +0000170 @param control_filename: The filename where the server control file
171 should be written in the results directory.
jadmanski10646442008-08-13 14:05:21 +0000172 """
mbligh0d0f67d2009-11-06 03:15:03 +0000173 super(base_server_job, self).__init__(resultdir=resultdir)
mbligha788dc42009-03-26 21:10:16 +0000174
mbligh0d0f67d2009-11-06 03:15:03 +0000175 path = os.path.dirname(__file__)
176 self.control = control
177 self._uncollected_log_file = os.path.join(self.resultdir,
178 'uncollected_logs')
179 debugdir = os.path.join(self.resultdir, 'debug')
180 if not os.path.exists(debugdir):
181 os.mkdir(debugdir)
182
183 if user:
184 self.user = user
185 else:
186 self.user = getpass.getuser()
187
jadmanski808f4b12010-04-09 22:30:31 +0000188 self.args = args
jadmanski10646442008-08-13 14:05:21 +0000189 self.machines = machines
mbligh0d0f67d2009-11-06 03:15:03 +0000190 self._client = client
jadmanski10646442008-08-13 14:05:21 +0000191 self.warning_loggers = set()
jadmanskif37df842009-02-11 00:03:26 +0000192 self.warning_manager = warning_manager()
mbligh0d0f67d2009-11-06 03:15:03 +0000193 self._ssh_user = ssh_user
194 self._ssh_port = ssh_port
195 self._ssh_pass = ssh_pass
mblighe7d9c602009-07-02 19:02:33 +0000196 self.tag = tag
mbligh09108442008-10-15 16:27:38 +0000197 self.last_boot_tag = None
jadmanski53aaf382008-11-17 16:22:31 +0000198 self.hosts = set()
mbligh0d0f67d2009-11-06 03:15:03 +0000199 self.drop_caches = False
mblighb5dac432008-11-27 00:38:44 +0000200 self.drop_caches_between_iterations = False
mblighe0cbc912010-03-11 18:03:07 +0000201 self._control_filename = control_filename
jadmanski10646442008-08-13 14:05:21 +0000202
showard75cdfee2009-06-10 17:40:41 +0000203 self.logging = logging_manager.get_logging_manager(
204 manage_stdout_and_stderr=True, redirect_fds=True)
205 subcommand.logging_manager_object = self.logging
jadmanski10646442008-08-13 14:05:21 +0000206
mbligh0d0f67d2009-11-06 03:15:03 +0000207 self.sysinfo = sysinfo.sysinfo(self.resultdir)
jadmanski043e1132008-11-19 17:10:32 +0000208 self.profilers = profilers.profilers(self)
jadmanskic09fc152008-10-15 17:56:59 +0000209
jadmanski10646442008-08-13 14:05:21 +0000210 job_data = {'label' : label, 'user' : user,
211 'hostname' : ','.join(machines),
Eric Li861b2d52011-02-04 14:50:35 -0800212 'drone' : platform.node(),
mbligh0d0f67d2009-11-06 03:15:03 +0000213 'status_version' : str(self._STATUS_VERSION),
showard170873e2009-01-07 00:22:26 +0000214 'job_started' : str(int(time.time()))}
mbligh374f3412009-05-13 21:29:45 +0000215 if group_name:
216 job_data['host_group_name'] = group_name
jadmanski10646442008-08-13 14:05:21 +0000217
mbligh0d0f67d2009-11-06 03:15:03 +0000218 # only write these keyvals out on the first job in a resultdir
219 if 'job_started' not in utils.read_keyval(self.resultdir):
220 job_data.update(get_site_job_data(self))
221 utils.write_keyval(self.resultdir, job_data)
222
223 self._parse_job = parse_job
showardcc929362010-01-25 21:20:41 +0000224 self._using_parser = (self._parse_job and len(machines) <= 1)
mbligh0d0f67d2009-11-06 03:15:03 +0000225 self.pkgmgr = packages.PackageManager(
226 self.autodir, run_function_dargs={'timeout':600})
showard21baa452008-10-21 00:08:39 +0000227 self.num_tests_run = 0
228 self.num_tests_failed = 0
229
jadmanski550fdc22008-11-20 16:32:08 +0000230 self._register_subcommand_hooks()
231
mbligh0d0f67d2009-11-06 03:15:03 +0000232 # these components aren't usable on the server
233 self.bootloader = None
234 self.harness = None
235
jadmanski2a89dac2010-06-11 14:32:58 +0000236 # set up the status logger
jadmanski52053632010-06-11 21:08:10 +0000237 self._indenter = status_indenter()
jadmanski2a89dac2010-06-11 14:32:58 +0000238 self._logger = base_job.status_logger(
jadmanski52053632010-06-11 21:08:10 +0000239 self, self._indenter, 'status.log', 'status.log',
jadmanski2a89dac2010-06-11 14:32:58 +0000240 record_hook=server_job_record_hook(self))
241
mbligh0d0f67d2009-11-06 03:15:03 +0000242
243 @classmethod
244 def _find_base_directories(cls):
245 """
246 Determine locations of autodir, clientdir and serverdir. Assumes
247 that this file is located within serverdir and uses __file__ along
248 with relative paths to resolve the location.
249 """
250 serverdir = os.path.abspath(os.path.dirname(__file__))
251 autodir = os.path.normpath(os.path.join(serverdir, '..'))
252 clientdir = os.path.join(autodir, 'client')
253 return autodir, clientdir, serverdir
254
255
256 def _find_resultdir(self, resultdir):
257 """
258 Determine the location of resultdir. For server jobs we expect one to
259 always be explicitly passed in to __init__, so just return that.
260 """
261 if resultdir:
262 return os.path.normpath(resultdir)
263 else:
264 return None
265
jadmanski550fdc22008-11-20 16:32:08 +0000266
jadmanski2a89dac2010-06-11 14:32:58 +0000267 def _get_status_logger(self):
268 """Return a reference to the status logger."""
269 return self._logger
270
271
jadmanskie432dd22009-01-30 15:04:51 +0000272 @staticmethod
273 def _load_control_file(path):
274 f = open(path)
275 try:
276 control_file = f.read()
277 finally:
278 f.close()
279 return re.sub('\r', '', control_file)
280
281
jadmanski550fdc22008-11-20 16:32:08 +0000282 def _register_subcommand_hooks(self):
mbligh2b92b862008-11-22 13:25:32 +0000283 """
284 Register some hooks into the subcommand modules that allow us
285 to properly clean up self.hosts created in forked subprocesses.
286 """
jadmanski550fdc22008-11-20 16:32:08 +0000287 def on_fork(cmd):
288 self._existing_hosts_on_fork = set(self.hosts)
289 def on_join(cmd):
290 new_hosts = self.hosts - self._existing_hosts_on_fork
291 for host in new_hosts:
292 host.close()
293 subcommand.subcommand.register_fork_hook(on_fork)
294 subcommand.subcommand.register_join_hook(on_join)
295
jadmanski10646442008-08-13 14:05:21 +0000296
mbligh4608b002010-01-05 18:22:35 +0000297 def init_parser(self):
mbligh2b92b862008-11-22 13:25:32 +0000298 """
mbligh4608b002010-01-05 18:22:35 +0000299 Start the continuous parsing of self.resultdir. This sets up
jadmanski10646442008-08-13 14:05:21 +0000300 the database connection and inserts the basic job object into
mbligh2b92b862008-11-22 13:25:32 +0000301 the database if necessary.
302 """
mbligh4608b002010-01-05 18:22:35 +0000303 if not self._using_parser:
304 return
jadmanski10646442008-08-13 14:05:21 +0000305 # redirect parser debugging to .parse.log
mbligh4608b002010-01-05 18:22:35 +0000306 parse_log = os.path.join(self.resultdir, '.parse.log')
jadmanski10646442008-08-13 14:05:21 +0000307 parse_log = open(parse_log, 'w', 0)
308 tko_utils.redirect_parser_debugging(parse_log)
309 # create a job model object and set up the db
310 self.results_db = tko_db.db(autocommit=True)
mbligh0d0f67d2009-11-06 03:15:03 +0000311 self.parser = status_lib.parser(self._STATUS_VERSION)
mbligh4608b002010-01-05 18:22:35 +0000312 self.job_model = self.parser.make_job(self.resultdir)
jadmanski10646442008-08-13 14:05:21 +0000313 self.parser.start(self.job_model)
314 # check if a job already exists in the db and insert it if
315 # it does not
mbligh0d0f67d2009-11-06 03:15:03 +0000316 job_idx = self.results_db.find_job(self._parse_job)
jadmanski10646442008-08-13 14:05:21 +0000317 if job_idx is None:
mbligh0d0f67d2009-11-06 03:15:03 +0000318 self.results_db.insert_job(self._parse_job, self.job_model)
jadmanski10646442008-08-13 14:05:21 +0000319 else:
mbligh2b92b862008-11-22 13:25:32 +0000320 machine_idx = self.results_db.lookup_machine(self.job_model.machine)
jadmanski10646442008-08-13 14:05:21 +0000321 self.job_model.index = job_idx
322 self.job_model.machine_idx = machine_idx
323
324
325 def cleanup_parser(self):
mbligh2b92b862008-11-22 13:25:32 +0000326 """
327 This should be called after the server job is finished
jadmanski10646442008-08-13 14:05:21 +0000328 to carry out any remaining cleanup (e.g. flushing any
mbligh2b92b862008-11-22 13:25:32 +0000329 remaining test results to the results db)
330 """
mbligh0d0f67d2009-11-06 03:15:03 +0000331 if not self._using_parser:
jadmanski10646442008-08-13 14:05:21 +0000332 return
333 final_tests = self.parser.end()
334 for test in final_tests:
335 self.__insert_test(test)
mbligh0d0f67d2009-11-06 03:15:03 +0000336 self._using_parser = False
jadmanski10646442008-08-13 14:05:21 +0000337
338
339 def verify(self):
340 if not self.machines:
mbligh084bc172008-10-18 14:02:45 +0000341 raise error.AutoservError('No machines specified to verify')
mbligh0fce4112008-11-27 00:37:17 +0000342 if self.resultdir:
343 os.chdir(self.resultdir)
jadmanski10646442008-08-13 14:05:21 +0000344 try:
jadmanskicdd0c402008-09-19 21:21:31 +0000345 namespace = {'machines' : self.machines, 'job' : self,
mbligh0d0f67d2009-11-06 03:15:03 +0000346 'ssh_user' : self._ssh_user,
347 'ssh_port' : self._ssh_port,
348 'ssh_pass' : self._ssh_pass}
mbligh084bc172008-10-18 14:02:45 +0000349 self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)
jadmanski10646442008-08-13 14:05:21 +0000350 except Exception, e:
mbligh2b92b862008-11-22 13:25:32 +0000351 msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())
jadmanski10646442008-08-13 14:05:21 +0000352 self.record('ABORT', None, None, msg)
353 raise
354
355
356 def repair(self, host_protection):
357 if not self.machines:
358 raise error.AutoservError('No machines specified to repair')
mbligh0fce4112008-11-27 00:37:17 +0000359 if self.resultdir:
360 os.chdir(self.resultdir)
jadmanski10646442008-08-13 14:05:21 +0000361 namespace = {'machines': self.machines, 'job': self,
mbligh0d0f67d2009-11-06 03:15:03 +0000362 'ssh_user': self._ssh_user, 'ssh_port': self._ssh_port,
363 'ssh_pass': self._ssh_pass,
jadmanski10646442008-08-13 14:05:21 +0000364 'protection_level': host_protection}
mbligh25c0b8c2009-01-24 01:44:17 +0000365
mbligh0931b0a2009-04-08 17:44:48 +0000366 self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)
jadmanski10646442008-08-13 14:05:21 +0000367
368
369 def precheck(self):
370 """
371 perform any additional checks in derived classes.
372 """
373 pass
374
375
376 def enable_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000377 """
378 Start or restart external logging mechanism.
jadmanski10646442008-08-13 14:05:21 +0000379 """
380 pass
381
382
383 def disable_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000384 """
385 Pause or stop external logging mechanism.
jadmanski10646442008-08-13 14:05:21 +0000386 """
387 pass
388
389
390 def use_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000391 """
392 Return True if external logging should be used.
jadmanski10646442008-08-13 14:05:21 +0000393 """
394 return False
395
396
mbligh415dc212009-06-15 21:53:34 +0000397 def _make_parallel_wrapper(self, function, machines, log):
398 """Wrap function as appropriate for calling by parallel_simple."""
mbligh2b92b862008-11-22 13:25:32 +0000399 is_forking = not (len(machines) == 1 and self.machines == machines)
mbligh0d0f67d2009-11-06 03:15:03 +0000400 if self._parse_job and is_forking and log:
jadmanski10646442008-08-13 14:05:21 +0000401 def wrapper(machine):
mbligh0d0f67d2009-11-06 03:15:03 +0000402 self._parse_job += "/" + machine
403 self._using_parser = True
jadmanski10646442008-08-13 14:05:21 +0000404 self.machines = [machine]
mbligh0d0f67d2009-11-06 03:15:03 +0000405 self.push_execution_context(machine)
jadmanski609a5f42008-08-26 20:52:42 +0000406 os.chdir(self.resultdir)
showard2bab8f42008-11-12 18:15:22 +0000407 utils.write_keyval(self.resultdir, {"hostname": machine})
mbligh4608b002010-01-05 18:22:35 +0000408 self.init_parser()
jadmanski10646442008-08-13 14:05:21 +0000409 result = function(machine)
410 self.cleanup_parser()
411 return result
jadmanski4dd1a002008-09-05 20:27:30 +0000412 elif len(machines) > 1 and log:
jadmanski10646442008-08-13 14:05:21 +0000413 def wrapper(machine):
mbligh0d0f67d2009-11-06 03:15:03 +0000414 self.push_execution_context(machine)
jadmanski609a5f42008-08-26 20:52:42 +0000415 os.chdir(self.resultdir)
mbligh838d82d2009-03-11 17:14:31 +0000416 machine_data = {'hostname' : machine,
mbligh0d0f67d2009-11-06 03:15:03 +0000417 'status_version' : str(self._STATUS_VERSION)}
mbligh838d82d2009-03-11 17:14:31 +0000418 utils.write_keyval(self.resultdir, machine_data)
jadmanski10646442008-08-13 14:05:21 +0000419 result = function(machine)
420 return result
421 else:
422 wrapper = function
mbligh415dc212009-06-15 21:53:34 +0000423 return wrapper
424
425
426 def parallel_simple(self, function, machines, log=True, timeout=None,
427 return_results=False):
428 """
429 Run 'function' using parallel_simple, with an extra wrapper to handle
430 the necessary setup for continuous parsing, if possible. If continuous
431 parsing is already properly initialized then this should just work.
432
433 @param function: A callable to run in parallel given each machine.
434 @param machines: A list of machine names to be passed one per subcommand
435 invocation of function.
436 @param log: If True, output will be written to output in a subdirectory
437 named after each machine.
438 @param timeout: Seconds after which the function call should timeout.
439 @param return_results: If True instead of an AutoServError being raised
440 on any error a list of the results|exceptions from the function
441 called on each arg is returned. [default: False]
442
443 @raises error.AutotestError: If any of the functions failed.
444 """
445 wrapper = self._make_parallel_wrapper(function, machines, log)
446 return subcommand.parallel_simple(wrapper, machines,
447 log=log, timeout=timeout,
448 return_results=return_results)
449
450
451 def parallel_on_machines(self, function, machines, timeout=None):
452 """
showardcd5fac42009-07-06 20:19:43 +0000453 @param function: Called in parallel with one machine as its argument.
mbligh415dc212009-06-15 21:53:34 +0000454 @param machines: A list of machines to call function(machine) on.
455 @param timeout: Seconds after which the function call should timeout.
456
457 @returns A list of machines on which function(machine) returned
458 without raising an exception.
459 """
showardcd5fac42009-07-06 20:19:43 +0000460 results = self.parallel_simple(function, machines, timeout=timeout,
mbligh415dc212009-06-15 21:53:34 +0000461 return_results=True)
462 success_machines = []
463 for result, machine in itertools.izip(results, machines):
464 if not isinstance(result, Exception):
465 success_machines.append(machine)
466 return success_machines
jadmanski10646442008-08-13 14:05:21 +0000467
468
Paul Pendleburye4afc772011-04-26 11:20:15 -0700469 def distribute_across_machines(self, tests, machines,
470 continuous_parsing=False):
Paul Pendleburyff1076d2011-03-31 14:45:32 -0700471 """Run each test in tests once using machines.
472
473 Instead of running each test on each machine like parallel_on_machines,
474 run each test once across all machines. Put another way, the total
475 number of tests run by parallel_on_machines is len(tests) *
476 len(machines). The number of tests run by distribute_across_machines is
477 len(tests).
478
479 Args:
480 tests: List of tests to run.
Paul Pendleburye4afc772011-04-26 11:20:15 -0700481 machines: List of machines to use.
482 continuous_parsing: Bool, if true parse job while running.
Paul Pendleburyff1076d2011-03-31 14:45:32 -0700483 """
484 # The Queue is thread safe, but since a machine may have to search
485 # through the queue to find a valid test the lock provides exclusive
486 # queue access for more than just the get call.
Paul Pendleburye4afc772011-04-26 11:20:15 -0700487 test_queue = multiprocessing.JoinableQueue()
488 test_queue_lock = multiprocessing.Lock()
Paul Pendleburyff1076d2011-03-31 14:45:32 -0700489
Paul Pendlebury1f6f3e72011-04-13 11:16:44 -0700490 machine_workers = [server_job_utils.machine_worker(self,
491 machine,
Paul Pendleburyf807c182011-04-05 11:24:34 -0700492 self.resultdir,
493 test_queue,
Paul Pendleburye4afc772011-04-26 11:20:15 -0700494 test_queue_lock,
495 continuous_parsing)
Paul Pendleburyff1076d2011-03-31 14:45:32 -0700496 for machine in machines]
497
498 # To (potentially) speed up searching for valid tests create a list of
499 # unique attribute sets present in the machines for this job. If sets
500 # were hashable we could just use a dictionary for fast verification.
501 # This at least reduces the search space from the number of machines to
502 # the number of unique machines.
503 unique_machine_attributes = []
504 for mw in machine_workers:
505 if not mw.attribute_set in unique_machine_attributes:
506 unique_machine_attributes.append(mw.attribute_set)
507
508 # Only queue tests which are valid on at least one machine. Record
509 # skipped tests in the status.log file using record_skipped_test().
510 for test_entry in tests:
Paul Pendleburyf807c182011-04-05 11:24:34 -0700511 ti = server_job_utils.test_item(*test_entry)
Paul Pendleburyff1076d2011-03-31 14:45:32 -0700512 machine_found = False
513 for ma in unique_machine_attributes:
514 if ti.validate(ma):
515 test_queue.put(ti)
516 machine_found = True
517 break
518 if not machine_found:
519 self.record_skipped_test(ti)
520
521 # Run valid tests and wait for completion.
522 for worker in machine_workers:
523 worker.start()
524 test_queue.join()
525
526
527 def record_skipped_test(self, skipped_test, message=None):
528 """Insert a failure record into status.log for this test."""
529 msg = message
530 if msg is None:
531 msg = 'No valid machines found for test %s.' % skipped_test
532 logging.info(msg)
533 self.record('START', None, skipped_test.test_name)
534 self.record('INFO', None, skipped_test.test_name, msg)
535 self.record('END TEST_NA', None, skipped_test.test_name, msg)
536
537
mbligh0d0f67d2009-11-06 03:15:03 +0000538 _USE_TEMP_DIR = object()
mbligh2b92b862008-11-22 13:25:32 +0000539 def run(self, cleanup=False, install_before=False, install_after=False,
jadmanskie432dd22009-01-30 15:04:51 +0000540 collect_crashdumps=True, namespace={}, control=None,
jadmanskidef0c3c2009-03-25 20:07:10 +0000541 control_file_dir=None, only_collect_crashinfo=False):
jadmanskifb9c0fa2009-04-29 17:39:16 +0000542 # for a normal job, make sure the uncollected logs file exists
543 # for a crashinfo-only run it should already exist, bail out otherwise
jadmanski648c39f2010-03-19 17:38:01 +0000544 created_uncollected_logs = False
mbligh0d0f67d2009-11-06 03:15:03 +0000545 if self.resultdir and not os.path.exists(self._uncollected_log_file):
jadmanskifb9c0fa2009-04-29 17:39:16 +0000546 if only_collect_crashinfo:
547 # if this is a crashinfo-only run, and there were no existing
548 # uncollected logs, just bail out early
549 logging.info("No existing uncollected logs, "
550 "skipping crashinfo collection")
551 return
552 else:
mbligh0d0f67d2009-11-06 03:15:03 +0000553 log_file = open(self._uncollected_log_file, "w")
jadmanskifb9c0fa2009-04-29 17:39:16 +0000554 pickle.dump([], log_file)
555 log_file.close()
jadmanski648c39f2010-03-19 17:38:01 +0000556 created_uncollected_logs = True
jadmanskifb9c0fa2009-04-29 17:39:16 +0000557
jadmanski10646442008-08-13 14:05:21 +0000558 # use a copy so changes don't affect the original dictionary
559 namespace = namespace.copy()
560 machines = self.machines
jadmanskie432dd22009-01-30 15:04:51 +0000561 if control is None:
jadmanski02a3ba22009-11-13 20:47:27 +0000562 if self.control is None:
563 control = ''
564 else:
565 control = self._load_control_file(self.control)
jadmanskie432dd22009-01-30 15:04:51 +0000566 if control_file_dir is None:
567 control_file_dir = self.resultdir
jadmanski10646442008-08-13 14:05:21 +0000568
569 self.aborted = False
Paul Pendlebury7c1fdcf2011-05-04 12:39:15 -0700570 namespace['gtest_runner'] = gtest_runner.gtest_runner()
jadmanski10646442008-08-13 14:05:21 +0000571 namespace['machines'] = machines
jadmanski808f4b12010-04-09 22:30:31 +0000572 namespace['args'] = self.args
jadmanski10646442008-08-13 14:05:21 +0000573 namespace['job'] = self
mbligh0d0f67d2009-11-06 03:15:03 +0000574 namespace['ssh_user'] = self._ssh_user
575 namespace['ssh_port'] = self._ssh_port
576 namespace['ssh_pass'] = self._ssh_pass
jadmanski10646442008-08-13 14:05:21 +0000577 test_start_time = int(time.time())
578
mbligh80e1eba2008-11-19 00:26:18 +0000579 if self.resultdir:
580 os.chdir(self.resultdir)
jadmanski779bd292009-03-19 17:33:33 +0000581 # touch status.log so that the parser knows a job is running here
jadmanski382303a2009-04-21 19:53:39 +0000582 open(self.get_status_log_path(), 'a').close()
mbligh80e1eba2008-11-19 00:26:18 +0000583 self.enable_external_logging()
jadmanskie432dd22009-01-30 15:04:51 +0000584
jadmanskicdd0c402008-09-19 21:21:31 +0000585 collect_crashinfo = True
mblighaebe3b62008-12-22 14:45:40 +0000586 temp_control_file_dir = None
jadmanski10646442008-08-13 14:05:21 +0000587 try:
showardcf8d4922009-10-14 16:08:39 +0000588 try:
589 if install_before and machines:
590 self._execute_code(INSTALL_CONTROL_FILE, namespace)
jadmanskie432dd22009-01-30 15:04:51 +0000591
showardcf8d4922009-10-14 16:08:39 +0000592 if only_collect_crashinfo:
593 return
594
jadmanskidef0c3c2009-03-25 20:07:10 +0000595 # determine the dir to write the control files to
596 cfd_specified = (control_file_dir
mbligh0d0f67d2009-11-06 03:15:03 +0000597 and control_file_dir is not self._USE_TEMP_DIR)
jadmanskidef0c3c2009-03-25 20:07:10 +0000598 if cfd_specified:
599 temp_control_file_dir = None
600 else:
601 temp_control_file_dir = tempfile.mkdtemp(
602 suffix='temp_control_file_dir')
603 control_file_dir = temp_control_file_dir
604 server_control_file = os.path.join(control_file_dir,
mblighe0cbc912010-03-11 18:03:07 +0000605 self._control_filename)
jadmanskidef0c3c2009-03-25 20:07:10 +0000606 client_control_file = os.path.join(control_file_dir,
607 CLIENT_CONTROL_FILENAME)
mbligh0d0f67d2009-11-06 03:15:03 +0000608 if self._client:
jadmanskidef0c3c2009-03-25 20:07:10 +0000609 namespace['control'] = control
610 utils.open_write_close(client_control_file, control)
mblighfeac0102009-04-28 18:31:12 +0000611 shutil.copyfile(CLIENT_WRAPPER_CONTROL_FILE,
612 server_control_file)
jadmanskidef0c3c2009-03-25 20:07:10 +0000613 else:
614 utils.open_write_close(server_control_file, control)
mbligh26f0d882009-06-22 18:30:01 +0000615 logging.info("Processing control file")
jadmanskidef0c3c2009-03-25 20:07:10 +0000616 self._execute_code(server_control_file, namespace)
mbligh26f0d882009-06-22 18:30:01 +0000617 logging.info("Finished processing control file")
jadmanski10646442008-08-13 14:05:21 +0000618
jadmanskidef0c3c2009-03-25 20:07:10 +0000619 # no error occured, so we don't need to collect crashinfo
620 collect_crashinfo = False
Eric Li6f27d4f2010-09-29 10:55:17 -0700621 except Exception, e:
showardcf8d4922009-10-14 16:08:39 +0000622 try:
623 logging.exception(
624 'Exception escaped control file, job aborting:')
Eric Li6f27d4f2010-09-29 10:55:17 -0700625 self.record('INFO', None, None, str(e),
626 {'job_abort_reason': str(e)})
showardcf8d4922009-10-14 16:08:39 +0000627 except:
628 pass # don't let logging exceptions here interfere
629 raise
jadmanski10646442008-08-13 14:05:21 +0000630 finally:
mblighaebe3b62008-12-22 14:45:40 +0000631 if temp_control_file_dir:
jadmanskie432dd22009-01-30 15:04:51 +0000632 # Clean up temp directory used for copies of the control files
mblighaebe3b62008-12-22 14:45:40 +0000633 try:
634 shutil.rmtree(temp_control_file_dir)
635 except Exception, e:
mblighe7d9c602009-07-02 19:02:33 +0000636 logging.warn('Could not remove temp directory %s: %s',
637 temp_control_file_dir, e)
jadmanskie432dd22009-01-30 15:04:51 +0000638
jadmanskicdd0c402008-09-19 21:21:31 +0000639 if machines and (collect_crashdumps or collect_crashinfo):
jadmanski10646442008-08-13 14:05:21 +0000640 namespace['test_start_time'] = test_start_time
jadmanskicdd0c402008-09-19 21:21:31 +0000641 if collect_crashinfo:
mbligh084bc172008-10-18 14:02:45 +0000642 # includes crashdumps
643 self._execute_code(CRASHINFO_CONTROL_FILE, namespace)
jadmanskicdd0c402008-09-19 21:21:31 +0000644 else:
mbligh084bc172008-10-18 14:02:45 +0000645 self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)
jadmanski648c39f2010-03-19 17:38:01 +0000646 if self._uncollected_log_file and created_uncollected_logs:
mbligh0d0f67d2009-11-06 03:15:03 +0000647 os.remove(self._uncollected_log_file)
jadmanski10646442008-08-13 14:05:21 +0000648 self.disable_external_logging()
showard45ae8192008-11-05 19:32:53 +0000649 if cleanup and machines:
650 self._execute_code(CLEANUP_CONTROL_FILE, namespace)
jadmanski10646442008-08-13 14:05:21 +0000651 if install_after and machines:
mbligh084bc172008-10-18 14:02:45 +0000652 self._execute_code(INSTALL_CONTROL_FILE, namespace)
jadmanski10646442008-08-13 14:05:21 +0000653
654
655 def run_test(self, url, *args, **dargs):
mbligh2b92b862008-11-22 13:25:32 +0000656 """
657 Summon a test object and run it.
jadmanski10646442008-08-13 14:05:21 +0000658
659 tag
660 tag to add to testname
661 url
662 url of the test to run
663 """
mblighfc3da5b2010-01-06 18:37:22 +0000664 group, testname = self.pkgmgr.get_package_name(url, 'test')
665 testname, subdir, tag = self._build_tagged_test_name(testname, dargs)
666 outputdir = self._make_test_outputdir(subdir)
jadmanski10646442008-08-13 14:05:21 +0000667
668 def group_func():
669 try:
670 test.runtest(self, url, tag, args, dargs)
671 except error.TestBaseException, e:
672 self.record(e.exit_status, subdir, testname, str(e))
673 raise
674 except Exception, e:
675 info = str(e) + "\n" + traceback.format_exc()
676 self.record('FAIL', subdir, testname, info)
677 raise
678 else:
mbligh2b92b862008-11-22 13:25:32 +0000679 self.record('GOOD', subdir, testname, 'completed successfully')
jadmanskide292df2008-08-26 20:51:14 +0000680
681 result, exc_info = self._run_group(testname, subdir, group_func)
682 if exc_info and isinstance(exc_info[1], error.TestBaseException):
683 return False
684 elif exc_info:
685 raise exc_info[0], exc_info[1], exc_info[2]
686 else:
687 return True
jadmanski10646442008-08-13 14:05:21 +0000688
689
690 def _run_group(self, name, subdir, function, *args, **dargs):
691 """\
692 Underlying method for running something inside of a group.
693 """
jadmanskide292df2008-08-26 20:51:14 +0000694 result, exc_info = None, None
jadmanski10646442008-08-13 14:05:21 +0000695 try:
696 self.record('START', subdir, name)
jadmanski52053632010-06-11 21:08:10 +0000697 result = function(*args, **dargs)
jadmanski10646442008-08-13 14:05:21 +0000698 except error.TestBaseException, e:
jadmanskib88d6dc2009-01-10 00:33:18 +0000699 self.record("END %s" % e.exit_status, subdir, name)
jadmanskide292df2008-08-26 20:51:14 +0000700 exc_info = sys.exc_info()
jadmanski10646442008-08-13 14:05:21 +0000701 except Exception, e:
702 err_msg = str(e) + '\n'
703 err_msg += traceback.format_exc()
704 self.record('END ABORT', subdir, name, err_msg)
705 raise error.JobError(name + ' failed\n' + traceback.format_exc())
706 else:
707 self.record('END GOOD', subdir, name)
708
jadmanskide292df2008-08-26 20:51:14 +0000709 return result, exc_info
jadmanski10646442008-08-13 14:05:21 +0000710
711
712 def run_group(self, function, *args, **dargs):
713 """\
714 function:
715 subroutine to run
716 *args:
717 arguments for the function
718 """
719
720 name = function.__name__
721
722 # Allow the tag for the group to be specified.
723 tag = dargs.pop('tag', None)
724 if tag:
725 name = tag
726
jadmanskide292df2008-08-26 20:51:14 +0000727 return self._run_group(name, None, function, *args, **dargs)[0]
jadmanski10646442008-08-13 14:05:21 +0000728
729
730 def run_reboot(self, reboot_func, get_kernel_func):
731 """\
732 A specialization of run_group meant specifically for handling
733 a reboot. Includes support for capturing the kernel version
734 after the reboot.
735
736 reboot_func: a function that carries out the reboot
737
738 get_kernel_func: a function that returns a string
739 representing the kernel version.
740 """
jadmanski10646442008-08-13 14:05:21 +0000741 try:
742 self.record('START', None, 'reboot')
jadmanski10646442008-08-13 14:05:21 +0000743 reboot_func()
744 except Exception, e:
jadmanski10646442008-08-13 14:05:21 +0000745 err_msg = str(e) + '\n' + traceback.format_exc()
746 self.record('END FAIL', None, 'reboot', err_msg)
jadmanski4b51d542009-04-08 14:17:16 +0000747 raise
jadmanski10646442008-08-13 14:05:21 +0000748 else:
749 kernel = get_kernel_func()
jadmanski10646442008-08-13 14:05:21 +0000750 self.record('END GOOD', None, 'reboot',
Paul Pendlebury92a90742011-05-25 11:54:34 -0700751 optional_fields={"kernel": kernel},
752 status='completed successfully')
jadmanski10646442008-08-13 14:05:21 +0000753
754
jadmanskie432dd22009-01-30 15:04:51 +0000755 def run_control(self, path):
756 """Execute a control file found at path (relative to the autotest
757 path). Intended for executing a control file within a control file,
758 not for running the top-level job control file."""
759 path = os.path.join(self.autodir, path)
760 control_file = self._load_control_file(path)
mbligh0d0f67d2009-11-06 03:15:03 +0000761 self.run(control=control_file, control_file_dir=self._USE_TEMP_DIR)
jadmanskie432dd22009-01-30 15:04:51 +0000762
763
jadmanskic09fc152008-10-15 17:56:59 +0000764 def add_sysinfo_command(self, command, logfile=None, on_every_test=False):
mbligh4395bbd2009-03-25 19:34:17 +0000765 self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),
jadmanskic09fc152008-10-15 17:56:59 +0000766 on_every_test)
767
768
769 def add_sysinfo_logfile(self, file, on_every_test=False):
770 self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)
771
772
773 def _add_sysinfo_loggable(self, loggable, on_every_test):
774 if on_every_test:
775 self.sysinfo.test_loggables.add(loggable)
776 else:
777 self.sysinfo.boot_loggables.add(loggable)
778
779
jadmanski10646442008-08-13 14:05:21 +0000780 def _read_warnings(self):
jadmanskif37df842009-02-11 00:03:26 +0000781 """Poll all the warning loggers and extract any new warnings that have
782 been logged. If the warnings belong to a category that is currently
783 disabled, this method will discard them and they will no longer be
784 retrievable.
785
786 Returns a list of (timestamp, message) tuples, where timestamp is an
787 integer epoch timestamp."""
jadmanski10646442008-08-13 14:05:21 +0000788 warnings = []
789 while True:
790 # pull in a line of output from every logger that has
791 # output ready to be read
mbligh2b92b862008-11-22 13:25:32 +0000792 loggers, _, _ = select.select(self.warning_loggers, [], [], 0)
jadmanski10646442008-08-13 14:05:21 +0000793 closed_loggers = set()
794 for logger in loggers:
795 line = logger.readline()
796 # record any broken pipes (aka line == empty)
797 if len(line) == 0:
798 closed_loggers.add(logger)
799 continue
jadmanskif37df842009-02-11 00:03:26 +0000800 # parse out the warning
801 timestamp, msgtype, msg = line.split('\t', 2)
802 timestamp = int(timestamp)
803 # if the warning is valid, add it to the results
804 if self.warning_manager.is_valid(timestamp, msgtype):
805 warnings.append((timestamp, msg.strip()))
jadmanski10646442008-08-13 14:05:21 +0000806
807 # stop listening to loggers that are closed
808 self.warning_loggers -= closed_loggers
809
810 # stop if none of the loggers have any output left
811 if not loggers:
812 break
813
814 # sort into timestamp order
815 warnings.sort()
816 return warnings
817
818
showardcc929362010-01-25 21:20:41 +0000819 def _unique_subdirectory(self, base_subdirectory_name):
820 """Compute a unique results subdirectory based on the given name.
821
822 Appends base_subdirectory_name with a number as necessary to find a
823 directory name that doesn't already exist.
824 """
825 subdirectory = base_subdirectory_name
826 counter = 1
827 while os.path.exists(os.path.join(self.resultdir, subdirectory)):
828 subdirectory = base_subdirectory_name + '.' + str(counter)
829 counter += 1
830 return subdirectory
831
832
jadmanski52053632010-06-11 21:08:10 +0000833 def get_record_context(self):
834 """Returns an object representing the current job.record context.
835
836 The object returned is an opaque object with a 0-arg restore method
837 which can be called to restore the job.record context (i.e. indentation)
838 to the current level. The intention is that it should be used when
839 something external which generate job.record calls (e.g. an autotest
840 client) can fail catastrophically and the server job record state
841 needs to be reset to its original "known good" state.
842
843 @return: A context object with a 0-arg restore() method."""
844 return self._indenter.get_context()
845
846
showardcc929362010-01-25 21:20:41 +0000847 def record_summary(self, status_code, test_name, reason='', attributes=None,
848 distinguishing_attributes=(), child_test_ids=None):
849 """Record a summary test result.
850
851 @param status_code: status code string, see
852 common_lib.log.is_valid_status()
853 @param test_name: name of the test
854 @param reason: (optional) string providing detailed reason for test
855 outcome
856 @param attributes: (optional) dict of string keyvals to associate with
857 this result
858 @param distinguishing_attributes: (optional) list of attribute names
859 that should be used to distinguish identically-named test
860 results. These attributes should be present in the attributes
861 parameter. This is used to generate user-friendly subdirectory
862 names.
863 @param child_test_ids: (optional) list of test indices for test results
864 used in generating this result.
865 """
866 subdirectory_name_parts = [test_name]
867 for attribute in distinguishing_attributes:
868 assert attributes
869 assert attribute in attributes, '%s not in %s' % (attribute,
870 attributes)
871 subdirectory_name_parts.append(attributes[attribute])
872 base_subdirectory_name = '.'.join(subdirectory_name_parts)
873
874 subdirectory = self._unique_subdirectory(base_subdirectory_name)
875 subdirectory_path = os.path.join(self.resultdir, subdirectory)
876 os.mkdir(subdirectory_path)
877
878 self.record(status_code, subdirectory, test_name,
879 status=reason, optional_fields={'is_summary': True})
880
881 if attributes:
882 utils.write_keyval(subdirectory_path, attributes)
883
884 if child_test_ids:
885 ids_string = ','.join(str(test_id) for test_id in child_test_ids)
886 summary_data = {'child_test_ids': ids_string}
887 utils.write_keyval(os.path.join(subdirectory_path, 'summary_data'),
888 summary_data)
889
890
jadmanski16a7ff72009-04-01 18:19:53 +0000891 def disable_warnings(self, warning_type):
jadmanskif37df842009-02-11 00:03:26 +0000892 self.warning_manager.disable_warnings(warning_type)
jadmanski16a7ff72009-04-01 18:19:53 +0000893 self.record("INFO", None, None,
894 "disabling %s warnings" % warning_type,
895 {"warnings.disable": warning_type})
jadmanskif37df842009-02-11 00:03:26 +0000896
897
jadmanski16a7ff72009-04-01 18:19:53 +0000898 def enable_warnings(self, warning_type):
jadmanskif37df842009-02-11 00:03:26 +0000899 self.warning_manager.enable_warnings(warning_type)
jadmanski16a7ff72009-04-01 18:19:53 +0000900 self.record("INFO", None, None,
901 "enabling %s warnings" % warning_type,
902 {"warnings.enable": warning_type})
jadmanskif37df842009-02-11 00:03:26 +0000903
904
jadmanski779bd292009-03-19 17:33:33 +0000905 def get_status_log_path(self, subdir=None):
906 """Return the path to the job status log.
907
908 @param subdir - Optional paramter indicating that you want the path
909 to a subdirectory status log.
910
911 @returns The path where the status log should be.
912 """
mbligh210bae62009-04-01 18:33:13 +0000913 if self.resultdir:
914 if subdir:
915 return os.path.join(self.resultdir, subdir, "status.log")
916 else:
917 return os.path.join(self.resultdir, "status.log")
jadmanski779bd292009-03-19 17:33:33 +0000918 else:
mbligh210bae62009-04-01 18:33:13 +0000919 return None
jadmanski779bd292009-03-19 17:33:33 +0000920
921
jadmanski6bb32d72009-03-19 20:25:24 +0000922 def _update_uncollected_logs_list(self, update_func):
923 """Updates the uncollected logs list in a multi-process safe manner.
924
925 @param update_func - a function that updates the list of uncollected
926 logs. Should take one parameter, the list to be updated.
927 """
mbligh0d0f67d2009-11-06 03:15:03 +0000928 if self._uncollected_log_file:
929 log_file = open(self._uncollected_log_file, "r+")
mbligha788dc42009-03-26 21:10:16 +0000930 fcntl.flock(log_file, fcntl.LOCK_EX)
jadmanski6bb32d72009-03-19 20:25:24 +0000931 try:
932 uncollected_logs = pickle.load(log_file)
933 update_func(uncollected_logs)
934 log_file.seek(0)
935 log_file.truncate()
936 pickle.dump(uncollected_logs, log_file)
jadmanski3bff9092009-04-22 18:09:47 +0000937 log_file.flush()
jadmanski6bb32d72009-03-19 20:25:24 +0000938 finally:
939 fcntl.flock(log_file, fcntl.LOCK_UN)
940 log_file.close()
941
942
943 def add_client_log(self, hostname, remote_path, local_path):
944 """Adds a new set of client logs to the list of uncollected logs,
945 to allow for future log recovery.
946
947 @param host - the hostname of the machine holding the logs
948 @param remote_path - the directory on the remote machine holding logs
949 @param local_path - the local directory to copy the logs into
950 """
951 def update_func(logs_list):
952 logs_list.append((hostname, remote_path, local_path))
953 self._update_uncollected_logs_list(update_func)
954
955
956 def remove_client_log(self, hostname, remote_path, local_path):
957 """Removes a set of client logs from the list of uncollected logs,
958 to allow for future log recovery.
959
960 @param host - the hostname of the machine holding the logs
961 @param remote_path - the directory on the remote machine holding logs
962 @param local_path - the local directory to copy the logs into
963 """
964 def update_func(logs_list):
965 logs_list.remove((hostname, remote_path, local_path))
966 self._update_uncollected_logs_list(update_func)
967
968
mbligh0d0f67d2009-11-06 03:15:03 +0000969 def get_client_logs(self):
970 """Retrieves the list of uncollected logs, if it exists.
971
972 @returns A list of (host, remote_path, local_path) tuples. Returns
973 an empty list if no uncollected logs file exists.
974 """
975 log_exists = (self._uncollected_log_file and
976 os.path.exists(self._uncollected_log_file))
977 if log_exists:
978 return pickle.load(open(self._uncollected_log_file))
979 else:
980 return []
981
982
mbligh084bc172008-10-18 14:02:45 +0000983 def _fill_server_control_namespace(self, namespace, protect=True):
mbligh2b92b862008-11-22 13:25:32 +0000984 """
985 Prepare a namespace to be used when executing server control files.
mbligh084bc172008-10-18 14:02:45 +0000986
987 This sets up the control file API by importing modules and making them
988 available under the appropriate names within namespace.
989
990 For use by _execute_code().
991
992 Args:
993 namespace: The namespace dictionary to fill in.
994 protect: Boolean. If True (the default) any operation that would
995 clobber an existing entry in namespace will cause an error.
996 Raises:
997 error.AutoservError: When a name would be clobbered by import.
998 """
999 def _import_names(module_name, names=()):
mbligh2b92b862008-11-22 13:25:32 +00001000 """
1001 Import a module and assign named attributes into namespace.
mbligh084bc172008-10-18 14:02:45 +00001002
1003 Args:
1004 module_name: The string module name.
1005 names: A limiting list of names to import from module_name. If
1006 empty (the default), all names are imported from the module
1007 similar to a "from foo.bar import *" statement.
1008 Raises:
1009 error.AutoservError: When a name being imported would clobber
1010 a name already in namespace.
1011 """
1012 module = __import__(module_name, {}, {}, names)
1013
1014 # No names supplied? Import * from the lowest level module.
1015 # (Ugh, why do I have to implement this part myself?)
1016 if not names:
1017 for submodule_name in module_name.split('.')[1:]:
1018 module = getattr(module, submodule_name)
1019 if hasattr(module, '__all__'):
1020 names = getattr(module, '__all__')
1021 else:
1022 names = dir(module)
1023
1024 # Install each name into namespace, checking to make sure it
1025 # doesn't override anything that already exists.
1026 for name in names:
1027 # Check for conflicts to help prevent future problems.
1028 if name in namespace and protect:
1029 if namespace[name] is not getattr(module, name):
1030 raise error.AutoservError('importing name '
1031 '%s from %s %r would override %r' %
1032 (name, module_name, getattr(module, name),
1033 namespace[name]))
1034 else:
1035 # Encourage cleanliness and the use of __all__ for a
1036 # more concrete API with less surprises on '*' imports.
1037 warnings.warn('%s (%r) being imported from %s for use '
1038 'in server control files is not the '
1039 'first occurrance of that import.' %
1040 (name, namespace[name], module_name))
1041
1042 namespace[name] = getattr(module, name)
1043
1044
1045 # This is the equivalent of prepending a bunch of import statements to
1046 # the front of the control script.
mbligha2b07dd2009-06-22 18:26:13 +00001047 namespace.update(os=os, sys=sys, logging=logging)
mbligh084bc172008-10-18 14:02:45 +00001048 _import_names('autotest_lib.server',
1049 ('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',
1050 'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))
1051 _import_names('autotest_lib.server.subcommand',
1052 ('parallel', 'parallel_simple', 'subcommand'))
1053 _import_names('autotest_lib.server.utils',
1054 ('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))
1055 _import_names('autotest_lib.client.common_lib.error')
1056 _import_names('autotest_lib.client.common_lib.barrier', ('barrier',))
1057
1058 # Inject ourself as the job object into other classes within the API.
1059 # (Yuck, this injection is a gross thing be part of a public API. -gps)
1060 #
1061 # XXX Base & SiteAutotest do not appear to use .job. Who does?
1062 namespace['autotest'].Autotest.job = self
1063 # server.hosts.base_classes.Host uses .job.
1064 namespace['hosts'].Host.job = self
Eric Li10222b82010-11-24 09:33:15 -08001065 namespace['hosts'].factory.ssh_user = self._ssh_user
1066 namespace['hosts'].factory.ssh_port = self._ssh_port
1067 namespace['hosts'].factory.ssh_pass = self._ssh_pass
mbligh084bc172008-10-18 14:02:45 +00001068
1069
1070 def _execute_code(self, code_file, namespace, protect=True):
mbligh2b92b862008-11-22 13:25:32 +00001071 """
1072 Execute code using a copy of namespace as a server control script.
mbligh084bc172008-10-18 14:02:45 +00001073
1074 Unless protect_namespace is explicitly set to False, the dict will not
1075 be modified.
1076
1077 Args:
1078 code_file: The filename of the control file to execute.
1079 namespace: A dict containing names to make available during execution.
1080 protect: Boolean. If True (the default) a copy of the namespace dict
1081 is used during execution to prevent the code from modifying its
1082 contents outside of this function. If False the raw dict is
1083 passed in and modifications will be allowed.
1084 """
1085 if protect:
1086 namespace = namespace.copy()
1087 self._fill_server_control_namespace(namespace, protect=protect)
1088 # TODO: Simplify and get rid of the special cases for only 1 machine.
showard3e66e8c2008-10-27 19:20:51 +00001089 if len(self.machines) > 1:
mbligh084bc172008-10-18 14:02:45 +00001090 machines_text = '\n'.join(self.machines) + '\n'
1091 # Only rewrite the file if it does not match our machine list.
1092 try:
1093 machines_f = open(MACHINES_FILENAME, 'r')
1094 existing_machines_text = machines_f.read()
1095 machines_f.close()
1096 except EnvironmentError:
1097 existing_machines_text = None
1098 if machines_text != existing_machines_text:
1099 utils.open_write_close(MACHINES_FILENAME, machines_text)
1100 execfile(code_file, namespace, namespace)
jadmanski10646442008-08-13 14:05:21 +00001101
1102
jadmanskie29d0e42010-06-17 16:06:52 +00001103 def _parse_status(self, new_line):
mbligh0d0f67d2009-11-06 03:15:03 +00001104 if not self._using_parser:
jadmanski10646442008-08-13 14:05:21 +00001105 return
jadmanskie29d0e42010-06-17 16:06:52 +00001106 new_tests = self.parser.process_lines([new_line])
jadmanski10646442008-08-13 14:05:21 +00001107 for test in new_tests:
1108 self.__insert_test(test)
1109
1110
1111 def __insert_test(self, test):
mbligh2b92b862008-11-22 13:25:32 +00001112 """
1113 An internal method to insert a new test result into the
jadmanski10646442008-08-13 14:05:21 +00001114 database. This method will not raise an exception, even if an
1115 error occurs during the insert, to avoid failing a test
1116 simply because of unexpected database issues."""
showard21baa452008-10-21 00:08:39 +00001117 self.num_tests_run += 1
1118 if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):
1119 self.num_tests_failed += 1
jadmanski10646442008-08-13 14:05:21 +00001120 try:
1121 self.results_db.insert_test(self.job_model, test)
1122 except Exception:
1123 msg = ("WARNING: An unexpected error occured while "
1124 "inserting test results into the database. "
1125 "Ignoring error.\n" + traceback.format_exc())
1126 print >> sys.stderr, msg
1127
mblighcaa62c22008-04-07 21:51:17 +00001128
mblighfc3da5b2010-01-06 18:37:22 +00001129 def preprocess_client_state(self):
1130 """
1131 Produce a state file for initializing the state of a client job.
1132
1133 Creates a new client state file with all the current server state, as
1134 well as some pre-set client state.
1135
1136 @returns The path of the file the state was written into.
1137 """
1138 # initialize the sysinfo state
1139 self._state.set('client', 'sysinfo', self.sysinfo.serialize())
1140
1141 # dump the state out to a tempfile
1142 fd, file_path = tempfile.mkstemp(dir=self.tmpdir)
1143 os.close(fd)
mbligha2c99492010-01-27 22:59:50 +00001144
1145 # write_to_file doesn't need locking, we exclusively own file_path
mblighfc3da5b2010-01-06 18:37:22 +00001146 self._state.write_to_file(file_path)
1147 return file_path
1148
1149
1150 def postprocess_client_state(self, state_path):
1151 """
1152 Update the state of this job with the state from a client job.
1153
1154 Updates the state of the server side of a job with the final state
1155 of a client job that was run. Updates the non-client-specific state,
1156 pulls in some specific bits from the client-specific state, and then
1157 discards the rest. Removes the state file afterwards
1158
1159 @param state_file A path to the state file from the client.
1160 """
1161 # update the on-disk state
mblighfc3da5b2010-01-06 18:37:22 +00001162 try:
jadmanskib6e7bdb2010-04-13 16:00:39 +00001163 self._state.read_from_file(state_path)
mblighfc3da5b2010-01-06 18:37:22 +00001164 os.remove(state_path)
mbligha2c99492010-01-27 22:59:50 +00001165 except OSError, e:
mblighfc3da5b2010-01-06 18:37:22 +00001166 # ignore file-not-found errors
1167 if e.errno != errno.ENOENT:
1168 raise
jadmanskib6e7bdb2010-04-13 16:00:39 +00001169 else:
1170 logging.debug('Client state file %s not found', state_path)
mblighfc3da5b2010-01-06 18:37:22 +00001171
1172 # update the sysinfo state
1173 if self._state.has('client', 'sysinfo'):
1174 self.sysinfo.deserialize(self._state.get('client', 'sysinfo'))
1175
1176 # drop all the client-specific state
1177 self._state.discard_namespace('client')
1178
1179
mbligh0a883702010-04-21 01:58:34 +00001180 def clear_all_known_hosts(self):
1181 """Clears known hosts files for all AbstractSSHHosts."""
1182 for host in self.hosts:
1183 if isinstance(host, abstract_ssh.AbstractSSHHost):
1184 host.clear_known_hosts()
1185
1186
mbligha7007722009-01-13 00:37:11 +00001187site_server_job = utils.import_site_class(
1188 __file__, "autotest_lib.server.site_server_job", "site_server_job",
1189 base_server_job)
jadmanski0afbb632008-06-06 21:10:57 +00001190
mbligh0a8c3322009-04-28 18:32:19 +00001191class server_job(site_server_job):
jadmanski0afbb632008-06-06 21:10:57 +00001192 pass
jadmanskif37df842009-02-11 00:03:26 +00001193
1194
1195class warning_manager(object):
1196 """Class for controlling warning logs. Manages the enabling and disabling
1197 of warnings."""
1198 def __init__(self):
1199 # a map of warning types to a list of disabled time intervals
1200 self.disabled_warnings = {}
1201
1202
1203 def is_valid(self, timestamp, warning_type):
1204 """Indicates if a warning (based on the time it occured and its type)
1205 is a valid warning. A warning is considered "invalid" if this type of
1206 warning was marked as "disabled" at the time the warning occured."""
1207 disabled_intervals = self.disabled_warnings.get(warning_type, [])
1208 for start, end in disabled_intervals:
1209 if timestamp >= start and (end is None or timestamp < end):
1210 return False
1211 return True
1212
1213
1214 def disable_warnings(self, warning_type, current_time_func=time.time):
1215 """As of now, disables all further warnings of this type."""
1216 intervals = self.disabled_warnings.setdefault(warning_type, [])
1217 if not intervals or intervals[-1][1] is not None:
jadmanski16a7ff72009-04-01 18:19:53 +00001218 intervals.append((int(current_time_func()), None))
jadmanskif37df842009-02-11 00:03:26 +00001219
1220
1221 def enable_warnings(self, warning_type, current_time_func=time.time):
1222 """As of now, enables all further warnings of this type."""
1223 intervals = self.disabled_warnings.get(warning_type, [])
1224 if intervals and intervals[-1][1] is None:
jadmanski16a7ff72009-04-01 18:19:53 +00001225 intervals[-1] = (intervals[-1][0], int(current_time_func()))