blob: 8bcf130baf8a27afa16c8d6d039c3a4c113904e6 [file] [log] [blame]
Dan Shi07e09af2013-04-12 09:31:29 -07001# pylint: disable-msg=C0111
2
Paul Pendlebury7c1fdcf2011-05-04 12:39:15 -07003# Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
mbligh57e78662008-06-17 19:53:49 +00006"""
7The main job wrapper for the server side.
8
9This is the core infrastructure. Derived from the client side job.py
10
11Copyright Martin J. Bligh, Andy Whitcroft 2007
12"""
13
Scott Zawalski91493c82013-01-25 16:15:20 -050014import getpass, os, sys, re, tempfile, time, select, platform
mblighfc3da5b2010-01-06 18:37:22 +000015import traceback, shutil, warnings, fcntl, pickle, logging, itertools, errno
showard75cdfee2009-06-10 17:40:41 +000016from autotest_lib.client.bin import sysinfo
mbligh0d0f67d2009-11-06 03:15:03 +000017from autotest_lib.client.common_lib import base_job
Scott Zawalski91493c82013-01-25 16:15:20 -050018from autotest_lib.client.common_lib import error, utils, packages
showard75cdfee2009-06-10 17:40:41 +000019from autotest_lib.client.common_lib import logging_manager
Paul Pendlebury57593562011-06-15 10:45:49 -070020from autotest_lib.server import test, subcommand, profilers
mbligh0a883702010-04-21 01:58:34 +000021from autotest_lib.server.hosts import abstract_ssh
jadmanski10646442008-08-13 14:05:21 +000022from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils
jadmanski10646442008-08-13 14:05:21 +000023
24
mbligh084bc172008-10-18 14:02:45 +000025def _control_segment_path(name):
26 """Get the pathname of the named control segment file."""
jadmanski10646442008-08-13 14:05:21 +000027 server_dir = os.path.dirname(os.path.abspath(__file__))
mbligh084bc172008-10-18 14:02:45 +000028 return os.path.join(server_dir, "control_segments", name)
jadmanski10646442008-08-13 14:05:21 +000029
30
mbligh084bc172008-10-18 14:02:45 +000031CLIENT_CONTROL_FILENAME = 'control'
32SERVER_CONTROL_FILENAME = 'control.srv'
33MACHINES_FILENAME = '.machines'
jadmanski10646442008-08-13 14:05:21 +000034
mbligh084bc172008-10-18 14:02:45 +000035CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')
36CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')
37CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')
mbligh084bc172008-10-18 14:02:45 +000038INSTALL_CONTROL_FILE = _control_segment_path('install')
showard45ae8192008-11-05 19:32:53 +000039CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')
mbligh084bc172008-10-18 14:02:45 +000040VERIFY_CONTROL_FILE = _control_segment_path('verify')
mbligh084bc172008-10-18 14:02:45 +000041REPAIR_CONTROL_FILE = _control_segment_path('repair')
Alex Millercb79ba72013-05-29 14:43:00 -070042PROVISION_CONTROL_FILE = _control_segment_path('provision')
beepscb6f1e22013-06-28 19:14:10 -070043VERIFY_JOB_REPO_URL_CONTROL_FILE = _control_segment_path('verify_job_repo_url')
Dan Shi07e09af2013-04-12 09:31:29 -070044RESET_CONTROL_FILE = _control_segment_path('reset')
jadmanski10646442008-08-13 14:05:21 +000045
46
mbligh062ed152009-01-13 00:57:14 +000047# by default provide a stub that generates no site data
48def _get_site_job_data_dummy(job):
49 return {}
50
51
jadmanski2a89dac2010-06-11 14:32:58 +000052class status_indenter(base_job.status_indenter):
53 """Provide a simple integer-backed status indenter."""
54 def __init__(self):
55 self._indent = 0
56
57
58 @property
59 def indent(self):
60 return self._indent
61
62
63 def increment(self):
64 self._indent += 1
65
66
67 def decrement(self):
68 self._indent -= 1
69
70
jadmanski52053632010-06-11 21:08:10 +000071 def get_context(self):
72 """Returns a context object for use by job.get_record_context."""
73 class context(object):
74 def __init__(self, indenter, indent):
75 self._indenter = indenter
76 self._indent = indent
77 def restore(self):
78 self._indenter._indent = self._indent
79 return context(self, self._indent)
80
81
jadmanski2a89dac2010-06-11 14:32:58 +000082class server_job_record_hook(object):
83 """The job.record hook for server job. Used to inject WARN messages from
84 the console or vlm whenever new logs are written, and to echo any logs
85 to INFO level logging. Implemented as a class so that it can use state to
86 block recursive calls, so that the hook can call job.record itself to
87 log WARN messages.
88
89 Depends on job._read_warnings and job._logger.
90 """
91 def __init__(self, job):
92 self._job = job
93 self._being_called = False
94
95
96 def __call__(self, entry):
97 """A wrapper around the 'real' record hook, the _hook method, which
98 prevents recursion. This isn't making any effort to be threadsafe,
99 the intent is to outright block infinite recursion via a
100 job.record->_hook->job.record->_hook->job.record... chain."""
101 if self._being_called:
102 return
103 self._being_called = True
104 try:
105 self._hook(self._job, entry)
106 finally:
107 self._being_called = False
108
109
110 @staticmethod
111 def _hook(job, entry):
112 """The core hook, which can safely call job.record."""
113 entries = []
114 # poll all our warning loggers for new warnings
115 for timestamp, msg in job._read_warnings():
116 warning_entry = base_job.status_log_entry(
117 'WARN', None, None, msg, {}, timestamp=timestamp)
118 entries.append(warning_entry)
119 job.record_entry(warning_entry)
120 # echo rendered versions of all the status logs to info
121 entries.append(entry)
122 for entry in entries:
123 rendered_entry = job._logger.render_entry(entry)
124 logging.info(rendered_entry)
jadmanskie29d0e42010-06-17 16:06:52 +0000125 job._parse_status(rendered_entry)
jadmanski2a89dac2010-06-11 14:32:58 +0000126
127
mbligh0d0f67d2009-11-06 03:15:03 +0000128class base_server_job(base_job.base_job):
129 """The server-side concrete implementation of base_job.
jadmanski10646442008-08-13 14:05:21 +0000130
mbligh0d0f67d2009-11-06 03:15:03 +0000131 Optional properties provided by this implementation:
132 serverdir
133 conmuxdir
134
135 num_tests_run
136 num_tests_failed
137
138 warning_manager
139 warning_loggers
jadmanski10646442008-08-13 14:05:21 +0000140 """
141
mbligh0d0f67d2009-11-06 03:15:03 +0000142 _STATUS_VERSION = 1
jadmanski10646442008-08-13 14:05:21 +0000143
144 def __init__(self, control, args, resultdir, label, user, machines,
145 client=False, parse_job='',
Scott Zawalski91493c82013-01-25 16:15:20 -0500146 ssh_user='root', ssh_port=22, ssh_pass='', test_retry=0,
Christopher Wiley8a91f232013-07-09 11:02:27 -0700147 group_name='', tag='', disable_sysinfo=False,
mblighe0cbc912010-03-11 18:03:07 +0000148 control_filename=SERVER_CONTROL_FILENAME):
jadmanski10646442008-08-13 14:05:21 +0000149 """
mbligh374f3412009-05-13 21:29:45 +0000150 Create a server side job object.
mblighb5dac432008-11-27 00:38:44 +0000151
mblighe7d9c602009-07-02 19:02:33 +0000152 @param control: The pathname of the control file.
153 @param args: Passed to the control file.
154 @param resultdir: Where to throw the results.
155 @param label: Description of the job.
156 @param user: Username for the job (email address).
157 @param client: True if this is a client-side control file.
158 @param parse_job: string, if supplied it is the job execution tag that
159 the results will be passed through to the TKO parser with.
160 @param ssh_user: The SSH username. [root]
161 @param ssh_port: The SSH port number. [22]
162 @param ssh_pass: The SSH passphrase, if needed.
Scott Zawalski91493c82013-01-25 16:15:20 -0500163 @param test_retry: The number of times to retry a test if the test did
164 not complete successfully.
mblighe7d9c602009-07-02 19:02:33 +0000165 @param group_name: If supplied, this will be written out as
mbligh374f3412009-05-13 21:29:45 +0000166 host_group_name in the keyvals file for the parser.
mblighe7d9c602009-07-02 19:02:33 +0000167 @param tag: The job execution tag from the scheduler. [optional]
Christopher Wiley8a91f232013-07-09 11:02:27 -0700168 @param disable_sysinfo: Whether we should disable the sysinfo step of
169 tests for a modest shortening of test time. [optional]
mblighe0cbc912010-03-11 18:03:07 +0000170 @param control_filename: The filename where the server control file
171 should be written in the results directory.
jadmanski10646442008-08-13 14:05:21 +0000172 """
Scott Zawalski91493c82013-01-25 16:15:20 -0500173 super(base_server_job, self).__init__(resultdir=resultdir,
174 test_retry=test_retry)
mbligh0d0f67d2009-11-06 03:15:03 +0000175 path = os.path.dirname(__file__)
Scott Zawalski91493c82013-01-25 16:15:20 -0500176 self.test_retry = test_retry
mbligh0d0f67d2009-11-06 03:15:03 +0000177 self.control = control
178 self._uncollected_log_file = os.path.join(self.resultdir,
179 'uncollected_logs')
180 debugdir = os.path.join(self.resultdir, 'debug')
181 if not os.path.exists(debugdir):
182 os.mkdir(debugdir)
183
184 if user:
185 self.user = user
186 else:
187 self.user = getpass.getuser()
188
jadmanski808f4b12010-04-09 22:30:31 +0000189 self.args = args
Peter Mayo7a875762012-06-13 14:38:15 -0400190 self.label = label
jadmanski10646442008-08-13 14:05:21 +0000191 self.machines = machines
mbligh0d0f67d2009-11-06 03:15:03 +0000192 self._client = client
jadmanski10646442008-08-13 14:05:21 +0000193 self.warning_loggers = set()
jadmanskif37df842009-02-11 00:03:26 +0000194 self.warning_manager = warning_manager()
mbligh0d0f67d2009-11-06 03:15:03 +0000195 self._ssh_user = ssh_user
196 self._ssh_port = ssh_port
197 self._ssh_pass = ssh_pass
mblighe7d9c602009-07-02 19:02:33 +0000198 self.tag = tag
mbligh09108442008-10-15 16:27:38 +0000199 self.last_boot_tag = None
jadmanski53aaf382008-11-17 16:22:31 +0000200 self.hosts = set()
mbligh0d0f67d2009-11-06 03:15:03 +0000201 self.drop_caches = False
mblighb5dac432008-11-27 00:38:44 +0000202 self.drop_caches_between_iterations = False
mblighe0cbc912010-03-11 18:03:07 +0000203 self._control_filename = control_filename
Christopher Wiley8a91f232013-07-09 11:02:27 -0700204 self._disable_sysinfo = disable_sysinfo
jadmanski10646442008-08-13 14:05:21 +0000205
showard75cdfee2009-06-10 17:40:41 +0000206 self.logging = logging_manager.get_logging_manager(
207 manage_stdout_and_stderr=True, redirect_fds=True)
208 subcommand.logging_manager_object = self.logging
jadmanski10646442008-08-13 14:05:21 +0000209
mbligh0d0f67d2009-11-06 03:15:03 +0000210 self.sysinfo = sysinfo.sysinfo(self.resultdir)
jadmanski043e1132008-11-19 17:10:32 +0000211 self.profilers = profilers.profilers(self)
jadmanskic09fc152008-10-15 17:56:59 +0000212
jadmanski10646442008-08-13 14:05:21 +0000213 job_data = {'label' : label, 'user' : user,
214 'hostname' : ','.join(machines),
Eric Li861b2d52011-02-04 14:50:35 -0800215 'drone' : platform.node(),
mbligh0d0f67d2009-11-06 03:15:03 +0000216 'status_version' : str(self._STATUS_VERSION),
showard170873e2009-01-07 00:22:26 +0000217 'job_started' : str(int(time.time()))}
mbligh374f3412009-05-13 21:29:45 +0000218 if group_name:
219 job_data['host_group_name'] = group_name
jadmanski10646442008-08-13 14:05:21 +0000220
mbligh0d0f67d2009-11-06 03:15:03 +0000221 # only write these keyvals out on the first job in a resultdir
222 if 'job_started' not in utils.read_keyval(self.resultdir):
223 job_data.update(get_site_job_data(self))
224 utils.write_keyval(self.resultdir, job_data)
225
226 self._parse_job = parse_job
showardcc929362010-01-25 21:20:41 +0000227 self._using_parser = (self._parse_job and len(machines) <= 1)
mbligh0d0f67d2009-11-06 03:15:03 +0000228 self.pkgmgr = packages.PackageManager(
229 self.autodir, run_function_dargs={'timeout':600})
showard21baa452008-10-21 00:08:39 +0000230 self.num_tests_run = 0
231 self.num_tests_failed = 0
232
jadmanski550fdc22008-11-20 16:32:08 +0000233 self._register_subcommand_hooks()
234
mbligh0d0f67d2009-11-06 03:15:03 +0000235 # these components aren't usable on the server
236 self.bootloader = None
237 self.harness = None
238
jadmanski2a89dac2010-06-11 14:32:58 +0000239 # set up the status logger
jadmanski52053632010-06-11 21:08:10 +0000240 self._indenter = status_indenter()
jadmanski2a89dac2010-06-11 14:32:58 +0000241 self._logger = base_job.status_logger(
jadmanski52053632010-06-11 21:08:10 +0000242 self, self._indenter, 'status.log', 'status.log',
jadmanski2a89dac2010-06-11 14:32:58 +0000243 record_hook=server_job_record_hook(self))
244
mbligh0d0f67d2009-11-06 03:15:03 +0000245
246 @classmethod
247 def _find_base_directories(cls):
248 """
249 Determine locations of autodir, clientdir and serverdir. Assumes
250 that this file is located within serverdir and uses __file__ along
251 with relative paths to resolve the location.
252 """
253 serverdir = os.path.abspath(os.path.dirname(__file__))
254 autodir = os.path.normpath(os.path.join(serverdir, '..'))
255 clientdir = os.path.join(autodir, 'client')
256 return autodir, clientdir, serverdir
257
258
Scott Zawalski91493c82013-01-25 16:15:20 -0500259 def _find_resultdir(self, resultdir, *args, **dargs):
mbligh0d0f67d2009-11-06 03:15:03 +0000260 """
261 Determine the location of resultdir. For server jobs we expect one to
262 always be explicitly passed in to __init__, so just return that.
263 """
264 if resultdir:
265 return os.path.normpath(resultdir)
266 else:
267 return None
268
jadmanski550fdc22008-11-20 16:32:08 +0000269
jadmanski2a89dac2010-06-11 14:32:58 +0000270 def _get_status_logger(self):
271 """Return a reference to the status logger."""
272 return self._logger
273
274
jadmanskie432dd22009-01-30 15:04:51 +0000275 @staticmethod
276 def _load_control_file(path):
277 f = open(path)
278 try:
279 control_file = f.read()
280 finally:
281 f.close()
282 return re.sub('\r', '', control_file)
283
284
jadmanski550fdc22008-11-20 16:32:08 +0000285 def _register_subcommand_hooks(self):
mbligh2b92b862008-11-22 13:25:32 +0000286 """
287 Register some hooks into the subcommand modules that allow us
288 to properly clean up self.hosts created in forked subprocesses.
289 """
jadmanski550fdc22008-11-20 16:32:08 +0000290 def on_fork(cmd):
291 self._existing_hosts_on_fork = set(self.hosts)
292 def on_join(cmd):
293 new_hosts = self.hosts - self._existing_hosts_on_fork
294 for host in new_hosts:
295 host.close()
296 subcommand.subcommand.register_fork_hook(on_fork)
297 subcommand.subcommand.register_join_hook(on_join)
298
jadmanski10646442008-08-13 14:05:21 +0000299
mbligh4608b002010-01-05 18:22:35 +0000300 def init_parser(self):
mbligh2b92b862008-11-22 13:25:32 +0000301 """
mbligh4608b002010-01-05 18:22:35 +0000302 Start the continuous parsing of self.resultdir. This sets up
jadmanski10646442008-08-13 14:05:21 +0000303 the database connection and inserts the basic job object into
mbligh2b92b862008-11-22 13:25:32 +0000304 the database if necessary.
305 """
mbligh4608b002010-01-05 18:22:35 +0000306 if not self._using_parser:
307 return
jadmanski10646442008-08-13 14:05:21 +0000308 # redirect parser debugging to .parse.log
mbligh4608b002010-01-05 18:22:35 +0000309 parse_log = os.path.join(self.resultdir, '.parse.log')
jadmanski10646442008-08-13 14:05:21 +0000310 parse_log = open(parse_log, 'w', 0)
311 tko_utils.redirect_parser_debugging(parse_log)
312 # create a job model object and set up the db
313 self.results_db = tko_db.db(autocommit=True)
mbligh0d0f67d2009-11-06 03:15:03 +0000314 self.parser = status_lib.parser(self._STATUS_VERSION)
mbligh4608b002010-01-05 18:22:35 +0000315 self.job_model = self.parser.make_job(self.resultdir)
jadmanski10646442008-08-13 14:05:21 +0000316 self.parser.start(self.job_model)
317 # check if a job already exists in the db and insert it if
318 # it does not
mbligh0d0f67d2009-11-06 03:15:03 +0000319 job_idx = self.results_db.find_job(self._parse_job)
jadmanski10646442008-08-13 14:05:21 +0000320 if job_idx is None:
mbligh0d0f67d2009-11-06 03:15:03 +0000321 self.results_db.insert_job(self._parse_job, self.job_model)
jadmanski10646442008-08-13 14:05:21 +0000322 else:
mbligh2b92b862008-11-22 13:25:32 +0000323 machine_idx = self.results_db.lookup_machine(self.job_model.machine)
jadmanski10646442008-08-13 14:05:21 +0000324 self.job_model.index = job_idx
325 self.job_model.machine_idx = machine_idx
326
327
328 def cleanup_parser(self):
mbligh2b92b862008-11-22 13:25:32 +0000329 """
330 This should be called after the server job is finished
jadmanski10646442008-08-13 14:05:21 +0000331 to carry out any remaining cleanup (e.g. flushing any
mbligh2b92b862008-11-22 13:25:32 +0000332 remaining test results to the results db)
333 """
mbligh0d0f67d2009-11-06 03:15:03 +0000334 if not self._using_parser:
jadmanski10646442008-08-13 14:05:21 +0000335 return
336 final_tests = self.parser.end()
337 for test in final_tests:
338 self.__insert_test(test)
mbligh0d0f67d2009-11-06 03:15:03 +0000339 self._using_parser = False
jadmanski10646442008-08-13 14:05:21 +0000340
341
342 def verify(self):
Dan Shi07e09af2013-04-12 09:31:29 -0700343 """Verify machines are all ssh-able."""
jadmanski10646442008-08-13 14:05:21 +0000344 if not self.machines:
mbligh084bc172008-10-18 14:02:45 +0000345 raise error.AutoservError('No machines specified to verify')
mbligh0fce4112008-11-27 00:37:17 +0000346 if self.resultdir:
347 os.chdir(self.resultdir)
jadmanski10646442008-08-13 14:05:21 +0000348 try:
jadmanskicdd0c402008-09-19 21:21:31 +0000349 namespace = {'machines' : self.machines, 'job' : self,
mbligh0d0f67d2009-11-06 03:15:03 +0000350 'ssh_user' : self._ssh_user,
351 'ssh_port' : self._ssh_port,
352 'ssh_pass' : self._ssh_pass}
mbligh084bc172008-10-18 14:02:45 +0000353 self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)
jadmanski10646442008-08-13 14:05:21 +0000354 except Exception, e:
mbligh2b92b862008-11-22 13:25:32 +0000355 msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())
jadmanski10646442008-08-13 14:05:21 +0000356 self.record('ABORT', None, None, msg)
357 raise
358
359
Dan Shi07e09af2013-04-12 09:31:29 -0700360 def reset(self):
361 """Reset machines by first cleanup then verify each machine."""
362 if not self.machines:
363 raise error.AutoservError('No machines specified to reset.')
364 if self.resultdir:
365 os.chdir(self.resultdir)
366
367 try:
368 namespace = {'machines' : self.machines, 'job' : self,
369 'ssh_user' : self._ssh_user,
370 'ssh_port' : self._ssh_port,
371 'ssh_pass' : self._ssh_pass}
372 self._execute_code(RESET_CONTROL_FILE, namespace, protect=False)
373 except Exception as e:
374 msg = ('Reset failed\n' + str(e) + '\n' +
375 traceback.format_exc())
376 self.record('ABORT', None, None, msg)
377 raise
378
379
jadmanski10646442008-08-13 14:05:21 +0000380 def repair(self, host_protection):
381 if not self.machines:
382 raise error.AutoservError('No machines specified to repair')
mbligh0fce4112008-11-27 00:37:17 +0000383 if self.resultdir:
384 os.chdir(self.resultdir)
jadmanski10646442008-08-13 14:05:21 +0000385 namespace = {'machines': self.machines, 'job': self,
mbligh0d0f67d2009-11-06 03:15:03 +0000386 'ssh_user': self._ssh_user, 'ssh_port': self._ssh_port,
387 'ssh_pass': self._ssh_pass,
jadmanski10646442008-08-13 14:05:21 +0000388 'protection_level': host_protection}
mbligh25c0b8c2009-01-24 01:44:17 +0000389
mbligh0931b0a2009-04-08 17:44:48 +0000390 self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)
jadmanski10646442008-08-13 14:05:21 +0000391
392
Alex Millercb79ba72013-05-29 14:43:00 -0700393 def provision(self, labels):
394 """
395 Provision all hosts to match |labels|.
396
397 @param labels: A comma seperated string of labels to provision the
398 host to.
399
400 """
401 namespace = {'provision_labels': labels}
402 control = self._load_control_file(PROVISION_CONTROL_FILE)
403 self.run(control=control, namespace=namespace)
404
405
jadmanski10646442008-08-13 14:05:21 +0000406 def precheck(self):
407 """
408 perform any additional checks in derived classes.
409 """
410 pass
411
412
413 def enable_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000414 """
415 Start or restart external logging mechanism.
jadmanski10646442008-08-13 14:05:21 +0000416 """
417 pass
418
419
420 def disable_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000421 """
422 Pause or stop external logging mechanism.
jadmanski10646442008-08-13 14:05:21 +0000423 """
424 pass
425
426
427 def use_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000428 """
429 Return True if external logging should be used.
jadmanski10646442008-08-13 14:05:21 +0000430 """
431 return False
432
433
mbligh415dc212009-06-15 21:53:34 +0000434 def _make_parallel_wrapper(self, function, machines, log):
435 """Wrap function as appropriate for calling by parallel_simple."""
mbligh2b92b862008-11-22 13:25:32 +0000436 is_forking = not (len(machines) == 1 and self.machines == machines)
mbligh0d0f67d2009-11-06 03:15:03 +0000437 if self._parse_job and is_forking and log:
jadmanski10646442008-08-13 14:05:21 +0000438 def wrapper(machine):
mbligh0d0f67d2009-11-06 03:15:03 +0000439 self._parse_job += "/" + machine
440 self._using_parser = True
jadmanski10646442008-08-13 14:05:21 +0000441 self.machines = [machine]
mbligh0d0f67d2009-11-06 03:15:03 +0000442 self.push_execution_context(machine)
jadmanski609a5f42008-08-26 20:52:42 +0000443 os.chdir(self.resultdir)
showard2bab8f42008-11-12 18:15:22 +0000444 utils.write_keyval(self.resultdir, {"hostname": machine})
mbligh4608b002010-01-05 18:22:35 +0000445 self.init_parser()
jadmanski10646442008-08-13 14:05:21 +0000446 result = function(machine)
447 self.cleanup_parser()
448 return result
jadmanski4dd1a002008-09-05 20:27:30 +0000449 elif len(machines) > 1 and log:
jadmanski10646442008-08-13 14:05:21 +0000450 def wrapper(machine):
mbligh0d0f67d2009-11-06 03:15:03 +0000451 self.push_execution_context(machine)
jadmanski609a5f42008-08-26 20:52:42 +0000452 os.chdir(self.resultdir)
mbligh838d82d2009-03-11 17:14:31 +0000453 machine_data = {'hostname' : machine,
mbligh0d0f67d2009-11-06 03:15:03 +0000454 'status_version' : str(self._STATUS_VERSION)}
mbligh838d82d2009-03-11 17:14:31 +0000455 utils.write_keyval(self.resultdir, machine_data)
jadmanski10646442008-08-13 14:05:21 +0000456 result = function(machine)
457 return result
458 else:
459 wrapper = function
mbligh415dc212009-06-15 21:53:34 +0000460 return wrapper
461
462
463 def parallel_simple(self, function, machines, log=True, timeout=None,
464 return_results=False):
465 """
466 Run 'function' using parallel_simple, with an extra wrapper to handle
467 the necessary setup for continuous parsing, if possible. If continuous
468 parsing is already properly initialized then this should just work.
469
470 @param function: A callable to run in parallel given each machine.
471 @param machines: A list of machine names to be passed one per subcommand
472 invocation of function.
473 @param log: If True, output will be written to output in a subdirectory
474 named after each machine.
475 @param timeout: Seconds after which the function call should timeout.
476 @param return_results: If True instead of an AutoServError being raised
477 on any error a list of the results|exceptions from the function
478 called on each arg is returned. [default: False]
479
480 @raises error.AutotestError: If any of the functions failed.
481 """
482 wrapper = self._make_parallel_wrapper(function, machines, log)
483 return subcommand.parallel_simple(wrapper, machines,
484 log=log, timeout=timeout,
485 return_results=return_results)
486
487
488 def parallel_on_machines(self, function, machines, timeout=None):
489 """
showardcd5fac42009-07-06 20:19:43 +0000490 @param function: Called in parallel with one machine as its argument.
mbligh415dc212009-06-15 21:53:34 +0000491 @param machines: A list of machines to call function(machine) on.
492 @param timeout: Seconds after which the function call should timeout.
493
494 @returns A list of machines on which function(machine) returned
495 without raising an exception.
496 """
showardcd5fac42009-07-06 20:19:43 +0000497 results = self.parallel_simple(function, machines, timeout=timeout,
mbligh415dc212009-06-15 21:53:34 +0000498 return_results=True)
499 success_machines = []
500 for result, machine in itertools.izip(results, machines):
501 if not isinstance(result, Exception):
502 success_machines.append(machine)
503 return success_machines
jadmanski10646442008-08-13 14:05:21 +0000504
505
mbligh0d0f67d2009-11-06 03:15:03 +0000506 _USE_TEMP_DIR = object()
mbligh2b92b862008-11-22 13:25:32 +0000507 def run(self, cleanup=False, install_before=False, install_after=False,
jadmanskie432dd22009-01-30 15:04:51 +0000508 collect_crashdumps=True, namespace={}, control=None,
beepscb6f1e22013-06-28 19:14:10 -0700509 control_file_dir=None, verify_job_repo_url=False,
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700510 only_collect_crashinfo=False, skip_crash_collection=False):
jadmanskifb9c0fa2009-04-29 17:39:16 +0000511 # for a normal job, make sure the uncollected logs file exists
512 # for a crashinfo-only run it should already exist, bail out otherwise
jadmanski648c39f2010-03-19 17:38:01 +0000513 created_uncollected_logs = False
Alex Miller45554f32013-08-13 16:48:29 -0700514 logging.info("I am PID %s", os.getpid())
mbligh0d0f67d2009-11-06 03:15:03 +0000515 if self.resultdir and not os.path.exists(self._uncollected_log_file):
jadmanskifb9c0fa2009-04-29 17:39:16 +0000516 if only_collect_crashinfo:
517 # if this is a crashinfo-only run, and there were no existing
518 # uncollected logs, just bail out early
519 logging.info("No existing uncollected logs, "
520 "skipping crashinfo collection")
521 return
522 else:
mbligh0d0f67d2009-11-06 03:15:03 +0000523 log_file = open(self._uncollected_log_file, "w")
jadmanskifb9c0fa2009-04-29 17:39:16 +0000524 pickle.dump([], log_file)
525 log_file.close()
jadmanski648c39f2010-03-19 17:38:01 +0000526 created_uncollected_logs = True
jadmanskifb9c0fa2009-04-29 17:39:16 +0000527
jadmanski10646442008-08-13 14:05:21 +0000528 # use a copy so changes don't affect the original dictionary
529 namespace = namespace.copy()
530 machines = self.machines
jadmanskie432dd22009-01-30 15:04:51 +0000531 if control is None:
jadmanski02a3ba22009-11-13 20:47:27 +0000532 if self.control is None:
533 control = ''
534 else:
535 control = self._load_control_file(self.control)
jadmanskie432dd22009-01-30 15:04:51 +0000536 if control_file_dir is None:
537 control_file_dir = self.resultdir
jadmanski10646442008-08-13 14:05:21 +0000538
539 self.aborted = False
540 namespace['machines'] = machines
jadmanski808f4b12010-04-09 22:30:31 +0000541 namespace['args'] = self.args
jadmanski10646442008-08-13 14:05:21 +0000542 namespace['job'] = self
mbligh0d0f67d2009-11-06 03:15:03 +0000543 namespace['ssh_user'] = self._ssh_user
544 namespace['ssh_port'] = self._ssh_port
545 namespace['ssh_pass'] = self._ssh_pass
jadmanski10646442008-08-13 14:05:21 +0000546 test_start_time = int(time.time())
547
mbligh80e1eba2008-11-19 00:26:18 +0000548 if self.resultdir:
549 os.chdir(self.resultdir)
jadmanski779bd292009-03-19 17:33:33 +0000550 # touch status.log so that the parser knows a job is running here
jadmanski382303a2009-04-21 19:53:39 +0000551 open(self.get_status_log_path(), 'a').close()
mbligh80e1eba2008-11-19 00:26:18 +0000552 self.enable_external_logging()
jadmanskie432dd22009-01-30 15:04:51 +0000553
jadmanskicdd0c402008-09-19 21:21:31 +0000554 collect_crashinfo = True
mblighaebe3b62008-12-22 14:45:40 +0000555 temp_control_file_dir = None
jadmanski10646442008-08-13 14:05:21 +0000556 try:
showardcf8d4922009-10-14 16:08:39 +0000557 try:
558 if install_before and machines:
559 self._execute_code(INSTALL_CONTROL_FILE, namespace)
jadmanskie432dd22009-01-30 15:04:51 +0000560
showardcf8d4922009-10-14 16:08:39 +0000561 if only_collect_crashinfo:
562 return
563
beepscb6f1e22013-06-28 19:14:10 -0700564 # If the verify_job_repo_url option is set but we're unable
565 # to actually verify that the job_repo_url contains the autotest
566 # package, this job will fail.
567 if verify_job_repo_url:
568 self._execute_code(VERIFY_JOB_REPO_URL_CONTROL_FILE,
569 namespace)
570 else:
571 logging.warning('Not checking if job_repo_url contains '
572 'autotest packages on %s', machines)
573
jadmanskidef0c3c2009-03-25 20:07:10 +0000574 # determine the dir to write the control files to
575 cfd_specified = (control_file_dir
mbligh0d0f67d2009-11-06 03:15:03 +0000576 and control_file_dir is not self._USE_TEMP_DIR)
jadmanskidef0c3c2009-03-25 20:07:10 +0000577 if cfd_specified:
578 temp_control_file_dir = None
579 else:
580 temp_control_file_dir = tempfile.mkdtemp(
581 suffix='temp_control_file_dir')
582 control_file_dir = temp_control_file_dir
583 server_control_file = os.path.join(control_file_dir,
mblighe0cbc912010-03-11 18:03:07 +0000584 self._control_filename)
jadmanskidef0c3c2009-03-25 20:07:10 +0000585 client_control_file = os.path.join(control_file_dir,
586 CLIENT_CONTROL_FILENAME)
mbligh0d0f67d2009-11-06 03:15:03 +0000587 if self._client:
jadmanskidef0c3c2009-03-25 20:07:10 +0000588 namespace['control'] = control
589 utils.open_write_close(client_control_file, control)
mblighfeac0102009-04-28 18:31:12 +0000590 shutil.copyfile(CLIENT_WRAPPER_CONTROL_FILE,
591 server_control_file)
jadmanskidef0c3c2009-03-25 20:07:10 +0000592 else:
593 utils.open_write_close(server_control_file, control)
mbligh26f0d882009-06-22 18:30:01 +0000594 logging.info("Processing control file")
jadmanskidef0c3c2009-03-25 20:07:10 +0000595 self._execute_code(server_control_file, namespace)
mbligh26f0d882009-06-22 18:30:01 +0000596 logging.info("Finished processing control file")
jadmanski10646442008-08-13 14:05:21 +0000597
jadmanskidef0c3c2009-03-25 20:07:10 +0000598 # no error occured, so we don't need to collect crashinfo
599 collect_crashinfo = False
Eric Li6f27d4f2010-09-29 10:55:17 -0700600 except Exception, e:
showardcf8d4922009-10-14 16:08:39 +0000601 try:
602 logging.exception(
603 'Exception escaped control file, job aborting:')
Eric Li6f27d4f2010-09-29 10:55:17 -0700604 self.record('INFO', None, None, str(e),
605 {'job_abort_reason': str(e)})
showardcf8d4922009-10-14 16:08:39 +0000606 except:
607 pass # don't let logging exceptions here interfere
608 raise
jadmanski10646442008-08-13 14:05:21 +0000609 finally:
mblighaebe3b62008-12-22 14:45:40 +0000610 if temp_control_file_dir:
jadmanskie432dd22009-01-30 15:04:51 +0000611 # Clean up temp directory used for copies of the control files
mblighaebe3b62008-12-22 14:45:40 +0000612 try:
613 shutil.rmtree(temp_control_file_dir)
614 except Exception, e:
mblighe7d9c602009-07-02 19:02:33 +0000615 logging.warn('Could not remove temp directory %s: %s',
616 temp_control_file_dir, e)
jadmanskie432dd22009-01-30 15:04:51 +0000617
jadmanskicdd0c402008-09-19 21:21:31 +0000618 if machines and (collect_crashdumps or collect_crashinfo):
jadmanski10646442008-08-13 14:05:21 +0000619 namespace['test_start_time'] = test_start_time
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700620 if skip_crash_collection:
621 logging.info('Skipping crash dump/info collection '
622 'as requested.')
623 elif collect_crashinfo:
mbligh084bc172008-10-18 14:02:45 +0000624 # includes crashdumps
625 self._execute_code(CRASHINFO_CONTROL_FILE, namespace)
jadmanskicdd0c402008-09-19 21:21:31 +0000626 else:
mbligh084bc172008-10-18 14:02:45 +0000627 self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)
jadmanski10646442008-08-13 14:05:21 +0000628 self.disable_external_logging()
showard45ae8192008-11-05 19:32:53 +0000629 if cleanup and machines:
630 self._execute_code(CLEANUP_CONTROL_FILE, namespace)
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700631 if self._uncollected_log_file and created_uncollected_logs:
632 os.remove(self._uncollected_log_file)
jadmanski10646442008-08-13 14:05:21 +0000633 if install_after and machines:
mbligh084bc172008-10-18 14:02:45 +0000634 self._execute_code(INSTALL_CONTROL_FILE, namespace)
jadmanski10646442008-08-13 14:05:21 +0000635
636
637 def run_test(self, url, *args, **dargs):
mbligh2b92b862008-11-22 13:25:32 +0000638 """
639 Summon a test object and run it.
jadmanski10646442008-08-13 14:05:21 +0000640
641 tag
642 tag to add to testname
643 url
644 url of the test to run
645 """
Christopher Wiley8a91f232013-07-09 11:02:27 -0700646 if self._disable_sysinfo:
647 dargs['disable_sysinfo'] = True
648
mblighfc3da5b2010-01-06 18:37:22 +0000649 group, testname = self.pkgmgr.get_package_name(url, 'test')
650 testname, subdir, tag = self._build_tagged_test_name(testname, dargs)
651 outputdir = self._make_test_outputdir(subdir)
jadmanski10646442008-08-13 14:05:21 +0000652
653 def group_func():
654 try:
655 test.runtest(self, url, tag, args, dargs)
656 except error.TestBaseException, e:
657 self.record(e.exit_status, subdir, testname, str(e))
658 raise
659 except Exception, e:
660 info = str(e) + "\n" + traceback.format_exc()
661 self.record('FAIL', subdir, testname, info)
662 raise
663 else:
mbligh2b92b862008-11-22 13:25:32 +0000664 self.record('GOOD', subdir, testname, 'completed successfully')
jadmanskide292df2008-08-26 20:51:14 +0000665
666 result, exc_info = self._run_group(testname, subdir, group_func)
667 if exc_info and isinstance(exc_info[1], error.TestBaseException):
668 return False
669 elif exc_info:
670 raise exc_info[0], exc_info[1], exc_info[2]
671 else:
672 return True
jadmanski10646442008-08-13 14:05:21 +0000673
674
675 def _run_group(self, name, subdir, function, *args, **dargs):
676 """\
677 Underlying method for running something inside of a group.
678 """
jadmanskide292df2008-08-26 20:51:14 +0000679 result, exc_info = None, None
jadmanski10646442008-08-13 14:05:21 +0000680 try:
681 self.record('START', subdir, name)
jadmanski52053632010-06-11 21:08:10 +0000682 result = function(*args, **dargs)
jadmanski10646442008-08-13 14:05:21 +0000683 except error.TestBaseException, e:
jadmanskib88d6dc2009-01-10 00:33:18 +0000684 self.record("END %s" % e.exit_status, subdir, name)
jadmanskide292df2008-08-26 20:51:14 +0000685 exc_info = sys.exc_info()
jadmanski10646442008-08-13 14:05:21 +0000686 except Exception, e:
687 err_msg = str(e) + '\n'
688 err_msg += traceback.format_exc()
689 self.record('END ABORT', subdir, name, err_msg)
690 raise error.JobError(name + ' failed\n' + traceback.format_exc())
691 else:
692 self.record('END GOOD', subdir, name)
693
jadmanskide292df2008-08-26 20:51:14 +0000694 return result, exc_info
jadmanski10646442008-08-13 14:05:21 +0000695
696
697 def run_group(self, function, *args, **dargs):
698 """\
699 function:
700 subroutine to run
701 *args:
702 arguments for the function
703 """
704
705 name = function.__name__
706
707 # Allow the tag for the group to be specified.
708 tag = dargs.pop('tag', None)
709 if tag:
710 name = tag
711
jadmanskide292df2008-08-26 20:51:14 +0000712 return self._run_group(name, None, function, *args, **dargs)[0]
jadmanski10646442008-08-13 14:05:21 +0000713
714
715 def run_reboot(self, reboot_func, get_kernel_func):
716 """\
717 A specialization of run_group meant specifically for handling
718 a reboot. Includes support for capturing the kernel version
719 after the reboot.
720
721 reboot_func: a function that carries out the reboot
722
723 get_kernel_func: a function that returns a string
724 representing the kernel version.
725 """
jadmanski10646442008-08-13 14:05:21 +0000726 try:
727 self.record('START', None, 'reboot')
jadmanski10646442008-08-13 14:05:21 +0000728 reboot_func()
729 except Exception, e:
jadmanski10646442008-08-13 14:05:21 +0000730 err_msg = str(e) + '\n' + traceback.format_exc()
731 self.record('END FAIL', None, 'reboot', err_msg)
jadmanski4b51d542009-04-08 14:17:16 +0000732 raise
jadmanski10646442008-08-13 14:05:21 +0000733 else:
734 kernel = get_kernel_func()
jadmanski10646442008-08-13 14:05:21 +0000735 self.record('END GOOD', None, 'reboot',
Dale Curtis74a314b2011-06-23 14:55:46 -0700736 optional_fields={"kernel": kernel})
jadmanski10646442008-08-13 14:05:21 +0000737
738
jadmanskie432dd22009-01-30 15:04:51 +0000739 def run_control(self, path):
740 """Execute a control file found at path (relative to the autotest
741 path). Intended for executing a control file within a control file,
742 not for running the top-level job control file."""
743 path = os.path.join(self.autodir, path)
744 control_file = self._load_control_file(path)
mbligh0d0f67d2009-11-06 03:15:03 +0000745 self.run(control=control_file, control_file_dir=self._USE_TEMP_DIR)
jadmanskie432dd22009-01-30 15:04:51 +0000746
747
jadmanskic09fc152008-10-15 17:56:59 +0000748 def add_sysinfo_command(self, command, logfile=None, on_every_test=False):
mbligh4395bbd2009-03-25 19:34:17 +0000749 self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),
jadmanskic09fc152008-10-15 17:56:59 +0000750 on_every_test)
751
752
753 def add_sysinfo_logfile(self, file, on_every_test=False):
754 self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)
755
756
757 def _add_sysinfo_loggable(self, loggable, on_every_test):
758 if on_every_test:
759 self.sysinfo.test_loggables.add(loggable)
760 else:
761 self.sysinfo.boot_loggables.add(loggable)
762
763
jadmanski10646442008-08-13 14:05:21 +0000764 def _read_warnings(self):
jadmanskif37df842009-02-11 00:03:26 +0000765 """Poll all the warning loggers and extract any new warnings that have
766 been logged. If the warnings belong to a category that is currently
767 disabled, this method will discard them and they will no longer be
768 retrievable.
769
770 Returns a list of (timestamp, message) tuples, where timestamp is an
771 integer epoch timestamp."""
jadmanski10646442008-08-13 14:05:21 +0000772 warnings = []
773 while True:
774 # pull in a line of output from every logger that has
775 # output ready to be read
mbligh2b92b862008-11-22 13:25:32 +0000776 loggers, _, _ = select.select(self.warning_loggers, [], [], 0)
jadmanski10646442008-08-13 14:05:21 +0000777 closed_loggers = set()
778 for logger in loggers:
779 line = logger.readline()
780 # record any broken pipes (aka line == empty)
781 if len(line) == 0:
782 closed_loggers.add(logger)
783 continue
jadmanskif37df842009-02-11 00:03:26 +0000784 # parse out the warning
785 timestamp, msgtype, msg = line.split('\t', 2)
786 timestamp = int(timestamp)
787 # if the warning is valid, add it to the results
788 if self.warning_manager.is_valid(timestamp, msgtype):
789 warnings.append((timestamp, msg.strip()))
jadmanski10646442008-08-13 14:05:21 +0000790
791 # stop listening to loggers that are closed
792 self.warning_loggers -= closed_loggers
793
794 # stop if none of the loggers have any output left
795 if not loggers:
796 break
797
798 # sort into timestamp order
799 warnings.sort()
800 return warnings
801
802
showardcc929362010-01-25 21:20:41 +0000803 def _unique_subdirectory(self, base_subdirectory_name):
804 """Compute a unique results subdirectory based on the given name.
805
806 Appends base_subdirectory_name with a number as necessary to find a
807 directory name that doesn't already exist.
808 """
809 subdirectory = base_subdirectory_name
810 counter = 1
811 while os.path.exists(os.path.join(self.resultdir, subdirectory)):
812 subdirectory = base_subdirectory_name + '.' + str(counter)
813 counter += 1
814 return subdirectory
815
816
jadmanski52053632010-06-11 21:08:10 +0000817 def get_record_context(self):
818 """Returns an object representing the current job.record context.
819
820 The object returned is an opaque object with a 0-arg restore method
821 which can be called to restore the job.record context (i.e. indentation)
822 to the current level. The intention is that it should be used when
823 something external which generate job.record calls (e.g. an autotest
824 client) can fail catastrophically and the server job record state
825 needs to be reset to its original "known good" state.
826
827 @return: A context object with a 0-arg restore() method."""
828 return self._indenter.get_context()
829
830
showardcc929362010-01-25 21:20:41 +0000831 def record_summary(self, status_code, test_name, reason='', attributes=None,
832 distinguishing_attributes=(), child_test_ids=None):
833 """Record a summary test result.
834
835 @param status_code: status code string, see
836 common_lib.log.is_valid_status()
837 @param test_name: name of the test
838 @param reason: (optional) string providing detailed reason for test
839 outcome
840 @param attributes: (optional) dict of string keyvals to associate with
841 this result
842 @param distinguishing_attributes: (optional) list of attribute names
843 that should be used to distinguish identically-named test
844 results. These attributes should be present in the attributes
845 parameter. This is used to generate user-friendly subdirectory
846 names.
847 @param child_test_ids: (optional) list of test indices for test results
848 used in generating this result.
849 """
850 subdirectory_name_parts = [test_name]
851 for attribute in distinguishing_attributes:
852 assert attributes
853 assert attribute in attributes, '%s not in %s' % (attribute,
854 attributes)
855 subdirectory_name_parts.append(attributes[attribute])
856 base_subdirectory_name = '.'.join(subdirectory_name_parts)
857
858 subdirectory = self._unique_subdirectory(base_subdirectory_name)
859 subdirectory_path = os.path.join(self.resultdir, subdirectory)
860 os.mkdir(subdirectory_path)
861
862 self.record(status_code, subdirectory, test_name,
863 status=reason, optional_fields={'is_summary': True})
864
865 if attributes:
866 utils.write_keyval(subdirectory_path, attributes)
867
868 if child_test_ids:
869 ids_string = ','.join(str(test_id) for test_id in child_test_ids)
870 summary_data = {'child_test_ids': ids_string}
871 utils.write_keyval(os.path.join(subdirectory_path, 'summary_data'),
872 summary_data)
873
874
jadmanski16a7ff72009-04-01 18:19:53 +0000875 def disable_warnings(self, warning_type):
jadmanskif37df842009-02-11 00:03:26 +0000876 self.warning_manager.disable_warnings(warning_type)
jadmanski16a7ff72009-04-01 18:19:53 +0000877 self.record("INFO", None, None,
878 "disabling %s warnings" % warning_type,
879 {"warnings.disable": warning_type})
jadmanskif37df842009-02-11 00:03:26 +0000880
881
jadmanski16a7ff72009-04-01 18:19:53 +0000882 def enable_warnings(self, warning_type):
jadmanskif37df842009-02-11 00:03:26 +0000883 self.warning_manager.enable_warnings(warning_type)
jadmanski16a7ff72009-04-01 18:19:53 +0000884 self.record("INFO", None, None,
885 "enabling %s warnings" % warning_type,
886 {"warnings.enable": warning_type})
jadmanskif37df842009-02-11 00:03:26 +0000887
888
jadmanski779bd292009-03-19 17:33:33 +0000889 def get_status_log_path(self, subdir=None):
890 """Return the path to the job status log.
891
892 @param subdir - Optional paramter indicating that you want the path
893 to a subdirectory status log.
894
895 @returns The path where the status log should be.
896 """
mbligh210bae62009-04-01 18:33:13 +0000897 if self.resultdir:
898 if subdir:
899 return os.path.join(self.resultdir, subdir, "status.log")
900 else:
901 return os.path.join(self.resultdir, "status.log")
jadmanski779bd292009-03-19 17:33:33 +0000902 else:
mbligh210bae62009-04-01 18:33:13 +0000903 return None
jadmanski779bd292009-03-19 17:33:33 +0000904
905
jadmanski6bb32d72009-03-19 20:25:24 +0000906 def _update_uncollected_logs_list(self, update_func):
907 """Updates the uncollected logs list in a multi-process safe manner.
908
909 @param update_func - a function that updates the list of uncollected
910 logs. Should take one parameter, the list to be updated.
911 """
Dan Shi07e09af2013-04-12 09:31:29 -0700912 # Skip log collection if file _uncollected_log_file does not exist.
913 if not (self._uncollected_log_file and
914 os.path.exists(self._uncollected_log_file)):
915 return
mbligh0d0f67d2009-11-06 03:15:03 +0000916 if self._uncollected_log_file:
917 log_file = open(self._uncollected_log_file, "r+")
mbligha788dc42009-03-26 21:10:16 +0000918 fcntl.flock(log_file, fcntl.LOCK_EX)
jadmanski6bb32d72009-03-19 20:25:24 +0000919 try:
920 uncollected_logs = pickle.load(log_file)
921 update_func(uncollected_logs)
922 log_file.seek(0)
923 log_file.truncate()
924 pickle.dump(uncollected_logs, log_file)
jadmanski3bff9092009-04-22 18:09:47 +0000925 log_file.flush()
jadmanski6bb32d72009-03-19 20:25:24 +0000926 finally:
927 fcntl.flock(log_file, fcntl.LOCK_UN)
928 log_file.close()
929
930
931 def add_client_log(self, hostname, remote_path, local_path):
932 """Adds a new set of client logs to the list of uncollected logs,
933 to allow for future log recovery.
934
935 @param host - the hostname of the machine holding the logs
936 @param remote_path - the directory on the remote machine holding logs
937 @param local_path - the local directory to copy the logs into
938 """
939 def update_func(logs_list):
940 logs_list.append((hostname, remote_path, local_path))
941 self._update_uncollected_logs_list(update_func)
942
943
944 def remove_client_log(self, hostname, remote_path, local_path):
945 """Removes a set of client logs from the list of uncollected logs,
946 to allow for future log recovery.
947
948 @param host - the hostname of the machine holding the logs
949 @param remote_path - the directory on the remote machine holding logs
950 @param local_path - the local directory to copy the logs into
951 """
952 def update_func(logs_list):
953 logs_list.remove((hostname, remote_path, local_path))
954 self._update_uncollected_logs_list(update_func)
955
956
mbligh0d0f67d2009-11-06 03:15:03 +0000957 def get_client_logs(self):
958 """Retrieves the list of uncollected logs, if it exists.
959
960 @returns A list of (host, remote_path, local_path) tuples. Returns
961 an empty list if no uncollected logs file exists.
962 """
963 log_exists = (self._uncollected_log_file and
964 os.path.exists(self._uncollected_log_file))
965 if log_exists:
966 return pickle.load(open(self._uncollected_log_file))
967 else:
968 return []
969
970
mbligh084bc172008-10-18 14:02:45 +0000971 def _fill_server_control_namespace(self, namespace, protect=True):
mbligh2b92b862008-11-22 13:25:32 +0000972 """
973 Prepare a namespace to be used when executing server control files.
mbligh084bc172008-10-18 14:02:45 +0000974
975 This sets up the control file API by importing modules and making them
976 available under the appropriate names within namespace.
977
978 For use by _execute_code().
979
980 Args:
981 namespace: The namespace dictionary to fill in.
982 protect: Boolean. If True (the default) any operation that would
983 clobber an existing entry in namespace will cause an error.
984 Raises:
985 error.AutoservError: When a name would be clobbered by import.
986 """
987 def _import_names(module_name, names=()):
mbligh2b92b862008-11-22 13:25:32 +0000988 """
989 Import a module and assign named attributes into namespace.
mbligh084bc172008-10-18 14:02:45 +0000990
991 Args:
992 module_name: The string module name.
993 names: A limiting list of names to import from module_name. If
994 empty (the default), all names are imported from the module
995 similar to a "from foo.bar import *" statement.
996 Raises:
997 error.AutoservError: When a name being imported would clobber
998 a name already in namespace.
999 """
1000 module = __import__(module_name, {}, {}, names)
1001
1002 # No names supplied? Import * from the lowest level module.
1003 # (Ugh, why do I have to implement this part myself?)
1004 if not names:
1005 for submodule_name in module_name.split('.')[1:]:
1006 module = getattr(module, submodule_name)
1007 if hasattr(module, '__all__'):
1008 names = getattr(module, '__all__')
1009 else:
1010 names = dir(module)
1011
1012 # Install each name into namespace, checking to make sure it
1013 # doesn't override anything that already exists.
1014 for name in names:
1015 # Check for conflicts to help prevent future problems.
1016 if name in namespace and protect:
1017 if namespace[name] is not getattr(module, name):
1018 raise error.AutoservError('importing name '
1019 '%s from %s %r would override %r' %
1020 (name, module_name, getattr(module, name),
1021 namespace[name]))
1022 else:
1023 # Encourage cleanliness and the use of __all__ for a
1024 # more concrete API with less surprises on '*' imports.
1025 warnings.warn('%s (%r) being imported from %s for use '
1026 'in server control files is not the '
1027 'first occurrance of that import.' %
1028 (name, namespace[name], module_name))
1029
1030 namespace[name] = getattr(module, name)
1031
1032
1033 # This is the equivalent of prepending a bunch of import statements to
1034 # the front of the control script.
mbligha2b07dd2009-06-22 18:26:13 +00001035 namespace.update(os=os, sys=sys, logging=logging)
mbligh084bc172008-10-18 14:02:45 +00001036 _import_names('autotest_lib.server',
1037 ('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',
1038 'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))
1039 _import_names('autotest_lib.server.subcommand',
1040 ('parallel', 'parallel_simple', 'subcommand'))
1041 _import_names('autotest_lib.server.utils',
1042 ('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))
1043 _import_names('autotest_lib.client.common_lib.error')
1044 _import_names('autotest_lib.client.common_lib.barrier', ('barrier',))
1045
1046 # Inject ourself as the job object into other classes within the API.
1047 # (Yuck, this injection is a gross thing be part of a public API. -gps)
1048 #
1049 # XXX Base & SiteAutotest do not appear to use .job. Who does?
1050 namespace['autotest'].Autotest.job = self
1051 # server.hosts.base_classes.Host uses .job.
1052 namespace['hosts'].Host.job = self
Eric Li10222b82010-11-24 09:33:15 -08001053 namespace['hosts'].factory.ssh_user = self._ssh_user
1054 namespace['hosts'].factory.ssh_port = self._ssh_port
1055 namespace['hosts'].factory.ssh_pass = self._ssh_pass
mbligh084bc172008-10-18 14:02:45 +00001056
1057
1058 def _execute_code(self, code_file, namespace, protect=True):
mbligh2b92b862008-11-22 13:25:32 +00001059 """
1060 Execute code using a copy of namespace as a server control script.
mbligh084bc172008-10-18 14:02:45 +00001061
1062 Unless protect_namespace is explicitly set to False, the dict will not
1063 be modified.
1064
1065 Args:
1066 code_file: The filename of the control file to execute.
1067 namespace: A dict containing names to make available during execution.
1068 protect: Boolean. If True (the default) a copy of the namespace dict
1069 is used during execution to prevent the code from modifying its
1070 contents outside of this function. If False the raw dict is
1071 passed in and modifications will be allowed.
1072 """
1073 if protect:
1074 namespace = namespace.copy()
1075 self._fill_server_control_namespace(namespace, protect=protect)
1076 # TODO: Simplify and get rid of the special cases for only 1 machine.
showard3e66e8c2008-10-27 19:20:51 +00001077 if len(self.machines) > 1:
mbligh084bc172008-10-18 14:02:45 +00001078 machines_text = '\n'.join(self.machines) + '\n'
1079 # Only rewrite the file if it does not match our machine list.
1080 try:
1081 machines_f = open(MACHINES_FILENAME, 'r')
1082 existing_machines_text = machines_f.read()
1083 machines_f.close()
1084 except EnvironmentError:
1085 existing_machines_text = None
1086 if machines_text != existing_machines_text:
1087 utils.open_write_close(MACHINES_FILENAME, machines_text)
1088 execfile(code_file, namespace, namespace)
jadmanski10646442008-08-13 14:05:21 +00001089
1090
jadmanskie29d0e42010-06-17 16:06:52 +00001091 def _parse_status(self, new_line):
mbligh0d0f67d2009-11-06 03:15:03 +00001092 if not self._using_parser:
jadmanski10646442008-08-13 14:05:21 +00001093 return
jadmanskie29d0e42010-06-17 16:06:52 +00001094 new_tests = self.parser.process_lines([new_line])
jadmanski10646442008-08-13 14:05:21 +00001095 for test in new_tests:
1096 self.__insert_test(test)
1097
1098
1099 def __insert_test(self, test):
mbligh2b92b862008-11-22 13:25:32 +00001100 """
1101 An internal method to insert a new test result into the
jadmanski10646442008-08-13 14:05:21 +00001102 database. This method will not raise an exception, even if an
1103 error occurs during the insert, to avoid failing a test
1104 simply because of unexpected database issues."""
showard21baa452008-10-21 00:08:39 +00001105 self.num_tests_run += 1
1106 if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):
1107 self.num_tests_failed += 1
jadmanski10646442008-08-13 14:05:21 +00001108 try:
1109 self.results_db.insert_test(self.job_model, test)
1110 except Exception:
1111 msg = ("WARNING: An unexpected error occured while "
1112 "inserting test results into the database. "
1113 "Ignoring error.\n" + traceback.format_exc())
1114 print >> sys.stderr, msg
1115
mblighcaa62c22008-04-07 21:51:17 +00001116
mblighfc3da5b2010-01-06 18:37:22 +00001117 def preprocess_client_state(self):
1118 """
1119 Produce a state file for initializing the state of a client job.
1120
1121 Creates a new client state file with all the current server state, as
1122 well as some pre-set client state.
1123
1124 @returns The path of the file the state was written into.
1125 """
1126 # initialize the sysinfo state
1127 self._state.set('client', 'sysinfo', self.sysinfo.serialize())
1128
1129 # dump the state out to a tempfile
1130 fd, file_path = tempfile.mkstemp(dir=self.tmpdir)
1131 os.close(fd)
mbligha2c99492010-01-27 22:59:50 +00001132
1133 # write_to_file doesn't need locking, we exclusively own file_path
mblighfc3da5b2010-01-06 18:37:22 +00001134 self._state.write_to_file(file_path)
1135 return file_path
1136
1137
1138 def postprocess_client_state(self, state_path):
1139 """
1140 Update the state of this job with the state from a client job.
1141
1142 Updates the state of the server side of a job with the final state
1143 of a client job that was run. Updates the non-client-specific state,
1144 pulls in some specific bits from the client-specific state, and then
1145 discards the rest. Removes the state file afterwards
1146
1147 @param state_file A path to the state file from the client.
1148 """
1149 # update the on-disk state
mblighfc3da5b2010-01-06 18:37:22 +00001150 try:
jadmanskib6e7bdb2010-04-13 16:00:39 +00001151 self._state.read_from_file(state_path)
mblighfc3da5b2010-01-06 18:37:22 +00001152 os.remove(state_path)
mbligha2c99492010-01-27 22:59:50 +00001153 except OSError, e:
mblighfc3da5b2010-01-06 18:37:22 +00001154 # ignore file-not-found errors
1155 if e.errno != errno.ENOENT:
1156 raise
jadmanskib6e7bdb2010-04-13 16:00:39 +00001157 else:
1158 logging.debug('Client state file %s not found', state_path)
mblighfc3da5b2010-01-06 18:37:22 +00001159
1160 # update the sysinfo state
1161 if self._state.has('client', 'sysinfo'):
1162 self.sysinfo.deserialize(self._state.get('client', 'sysinfo'))
1163
1164 # drop all the client-specific state
1165 self._state.discard_namespace('client')
1166
1167
mbligh0a883702010-04-21 01:58:34 +00001168 def clear_all_known_hosts(self):
1169 """Clears known hosts files for all AbstractSSHHosts."""
1170 for host in self.hosts:
1171 if isinstance(host, abstract_ssh.AbstractSSHHost):
1172 host.clear_known_hosts()
1173
1174
jadmanskif37df842009-02-11 00:03:26 +00001175class warning_manager(object):
1176 """Class for controlling warning logs. Manages the enabling and disabling
1177 of warnings."""
1178 def __init__(self):
1179 # a map of warning types to a list of disabled time intervals
1180 self.disabled_warnings = {}
1181
1182
1183 def is_valid(self, timestamp, warning_type):
1184 """Indicates if a warning (based on the time it occured and its type)
1185 is a valid warning. A warning is considered "invalid" if this type of
1186 warning was marked as "disabled" at the time the warning occured."""
1187 disabled_intervals = self.disabled_warnings.get(warning_type, [])
1188 for start, end in disabled_intervals:
1189 if timestamp >= start and (end is None or timestamp < end):
1190 return False
1191 return True
1192
1193
1194 def disable_warnings(self, warning_type, current_time_func=time.time):
1195 """As of now, disables all further warnings of this type."""
1196 intervals = self.disabled_warnings.setdefault(warning_type, [])
1197 if not intervals or intervals[-1][1] is not None:
jadmanski16a7ff72009-04-01 18:19:53 +00001198 intervals.append((int(current_time_func()), None))
jadmanskif37df842009-02-11 00:03:26 +00001199
1200
1201 def enable_warnings(self, warning_type, current_time_func=time.time):
1202 """As of now, enables all further warnings of this type."""
1203 intervals = self.disabled_warnings.get(warning_type, [])
1204 if intervals and intervals[-1][1] is None:
jadmanski16a7ff72009-04-01 18:19:53 +00001205 intervals[-1] = (intervals[-1][0], int(current_time_func()))
Paul Pendlebury57593562011-06-15 10:45:49 -07001206
1207
1208# load up site-specific code for generating site-specific job data
1209get_site_job_data = utils.import_site_function(__file__,
1210 "autotest_lib.server.site_server_job", "get_site_job_data",
1211 _get_site_job_data_dummy)
1212
1213
1214site_server_job = utils.import_site_class(
1215 __file__, "autotest_lib.server.site_server_job", "site_server_job",
1216 base_server_job)
1217
1218
1219class server_job(site_server_job):
Dale Curtis456d3c12011-07-19 11:42:51 -07001220 pass