blob: 29fac23b6351e07590f470b2559d446427481dd2 [file] [log] [blame]
Dan Shi07e09af2013-04-12 09:31:29 -07001# pylint: disable-msg=C0111
2
Paul Pendlebury7c1fdcf2011-05-04 12:39:15 -07003# Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
mbligh57e78662008-06-17 19:53:49 +00006"""
7The main job wrapper for the server side.
8
9This is the core infrastructure. Derived from the client side job.py
10
11Copyright Martin J. Bligh, Andy Whitcroft 2007
12"""
13
Scott Zawalski91493c82013-01-25 16:15:20 -050014import getpass, os, sys, re, tempfile, time, select, platform
mblighfc3da5b2010-01-06 18:37:22 +000015import traceback, shutil, warnings, fcntl, pickle, logging, itertools, errno
showard75cdfee2009-06-10 17:40:41 +000016from autotest_lib.client.bin import sysinfo
mbligh0d0f67d2009-11-06 03:15:03 +000017from autotest_lib.client.common_lib import base_job
Scott Zawalski91493c82013-01-25 16:15:20 -050018from autotest_lib.client.common_lib import error, utils, packages
showard75cdfee2009-06-10 17:40:41 +000019from autotest_lib.client.common_lib import logging_manager
Paul Pendlebury57593562011-06-15 10:45:49 -070020from autotest_lib.server import test, subcommand, profilers
mbligh0a883702010-04-21 01:58:34 +000021from autotest_lib.server.hosts import abstract_ssh
jadmanski10646442008-08-13 14:05:21 +000022from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils
jadmanski10646442008-08-13 14:05:21 +000023
24
mbligh084bc172008-10-18 14:02:45 +000025def _control_segment_path(name):
26 """Get the pathname of the named control segment file."""
jadmanski10646442008-08-13 14:05:21 +000027 server_dir = os.path.dirname(os.path.abspath(__file__))
mbligh084bc172008-10-18 14:02:45 +000028 return os.path.join(server_dir, "control_segments", name)
jadmanski10646442008-08-13 14:05:21 +000029
30
mbligh084bc172008-10-18 14:02:45 +000031CLIENT_CONTROL_FILENAME = 'control'
32SERVER_CONTROL_FILENAME = 'control.srv'
33MACHINES_FILENAME = '.machines'
jadmanski10646442008-08-13 14:05:21 +000034
mbligh084bc172008-10-18 14:02:45 +000035CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')
36CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')
37CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')
mbligh084bc172008-10-18 14:02:45 +000038INSTALL_CONTROL_FILE = _control_segment_path('install')
showard45ae8192008-11-05 19:32:53 +000039CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')
mbligh084bc172008-10-18 14:02:45 +000040VERIFY_CONTROL_FILE = _control_segment_path('verify')
mbligh084bc172008-10-18 14:02:45 +000041REPAIR_CONTROL_FILE = _control_segment_path('repair')
Alex Millercb79ba72013-05-29 14:43:00 -070042PROVISION_CONTROL_FILE = _control_segment_path('provision')
beepscb6f1e22013-06-28 19:14:10 -070043VERIFY_JOB_REPO_URL_CONTROL_FILE = _control_segment_path('verify_job_repo_url')
Dan Shi07e09af2013-04-12 09:31:29 -070044RESET_CONTROL_FILE = _control_segment_path('reset')
jadmanski10646442008-08-13 14:05:21 +000045
46
mbligh062ed152009-01-13 00:57:14 +000047# by default provide a stub that generates no site data
48def _get_site_job_data_dummy(job):
49 return {}
50
51
jadmanski2a89dac2010-06-11 14:32:58 +000052class status_indenter(base_job.status_indenter):
53 """Provide a simple integer-backed status indenter."""
54 def __init__(self):
55 self._indent = 0
56
57
58 @property
59 def indent(self):
60 return self._indent
61
62
63 def increment(self):
64 self._indent += 1
65
66
67 def decrement(self):
68 self._indent -= 1
69
70
jadmanski52053632010-06-11 21:08:10 +000071 def get_context(self):
72 """Returns a context object for use by job.get_record_context."""
73 class context(object):
74 def __init__(self, indenter, indent):
75 self._indenter = indenter
76 self._indent = indent
77 def restore(self):
78 self._indenter._indent = self._indent
79 return context(self, self._indent)
80
81
jadmanski2a89dac2010-06-11 14:32:58 +000082class server_job_record_hook(object):
83 """The job.record hook for server job. Used to inject WARN messages from
84 the console or vlm whenever new logs are written, and to echo any logs
85 to INFO level logging. Implemented as a class so that it can use state to
86 block recursive calls, so that the hook can call job.record itself to
87 log WARN messages.
88
89 Depends on job._read_warnings and job._logger.
90 """
91 def __init__(self, job):
92 self._job = job
93 self._being_called = False
94
95
96 def __call__(self, entry):
97 """A wrapper around the 'real' record hook, the _hook method, which
98 prevents recursion. This isn't making any effort to be threadsafe,
99 the intent is to outright block infinite recursion via a
100 job.record->_hook->job.record->_hook->job.record... chain."""
101 if self._being_called:
102 return
103 self._being_called = True
104 try:
105 self._hook(self._job, entry)
106 finally:
107 self._being_called = False
108
109
110 @staticmethod
111 def _hook(job, entry):
112 """The core hook, which can safely call job.record."""
113 entries = []
114 # poll all our warning loggers for new warnings
115 for timestamp, msg in job._read_warnings():
116 warning_entry = base_job.status_log_entry(
117 'WARN', None, None, msg, {}, timestamp=timestamp)
118 entries.append(warning_entry)
119 job.record_entry(warning_entry)
120 # echo rendered versions of all the status logs to info
121 entries.append(entry)
122 for entry in entries:
123 rendered_entry = job._logger.render_entry(entry)
124 logging.info(rendered_entry)
jadmanskie29d0e42010-06-17 16:06:52 +0000125 job._parse_status(rendered_entry)
jadmanski2a89dac2010-06-11 14:32:58 +0000126
127
mbligh0d0f67d2009-11-06 03:15:03 +0000128class base_server_job(base_job.base_job):
129 """The server-side concrete implementation of base_job.
jadmanski10646442008-08-13 14:05:21 +0000130
mbligh0d0f67d2009-11-06 03:15:03 +0000131 Optional properties provided by this implementation:
132 serverdir
133 conmuxdir
134
135 num_tests_run
136 num_tests_failed
137
138 warning_manager
139 warning_loggers
jadmanski10646442008-08-13 14:05:21 +0000140 """
141
mbligh0d0f67d2009-11-06 03:15:03 +0000142 _STATUS_VERSION = 1
jadmanski10646442008-08-13 14:05:21 +0000143
144 def __init__(self, control, args, resultdir, label, user, machines,
145 client=False, parse_job='',
Scott Zawalski91493c82013-01-25 16:15:20 -0500146 ssh_user='root', ssh_port=22, ssh_pass='', test_retry=0,
Christopher Wiley8a91f232013-07-09 11:02:27 -0700147 group_name='', tag='', disable_sysinfo=False,
mblighe0cbc912010-03-11 18:03:07 +0000148 control_filename=SERVER_CONTROL_FILENAME):
jadmanski10646442008-08-13 14:05:21 +0000149 """
mbligh374f3412009-05-13 21:29:45 +0000150 Create a server side job object.
mblighb5dac432008-11-27 00:38:44 +0000151
mblighe7d9c602009-07-02 19:02:33 +0000152 @param control: The pathname of the control file.
153 @param args: Passed to the control file.
154 @param resultdir: Where to throw the results.
155 @param label: Description of the job.
156 @param user: Username for the job (email address).
157 @param client: True if this is a client-side control file.
158 @param parse_job: string, if supplied it is the job execution tag that
159 the results will be passed through to the TKO parser with.
160 @param ssh_user: The SSH username. [root]
161 @param ssh_port: The SSH port number. [22]
162 @param ssh_pass: The SSH passphrase, if needed.
Scott Zawalski91493c82013-01-25 16:15:20 -0500163 @param test_retry: The number of times to retry a test if the test did
164 not complete successfully.
mblighe7d9c602009-07-02 19:02:33 +0000165 @param group_name: If supplied, this will be written out as
mbligh374f3412009-05-13 21:29:45 +0000166 host_group_name in the keyvals file for the parser.
mblighe7d9c602009-07-02 19:02:33 +0000167 @param tag: The job execution tag from the scheduler. [optional]
Christopher Wiley8a91f232013-07-09 11:02:27 -0700168 @param disable_sysinfo: Whether we should disable the sysinfo step of
169 tests for a modest shortening of test time. [optional]
mblighe0cbc912010-03-11 18:03:07 +0000170 @param control_filename: The filename where the server control file
171 should be written in the results directory.
jadmanski10646442008-08-13 14:05:21 +0000172 """
Scott Zawalski91493c82013-01-25 16:15:20 -0500173 super(base_server_job, self).__init__(resultdir=resultdir,
174 test_retry=test_retry)
mbligh0d0f67d2009-11-06 03:15:03 +0000175 path = os.path.dirname(__file__)
Scott Zawalski91493c82013-01-25 16:15:20 -0500176 self.test_retry = test_retry
mbligh0d0f67d2009-11-06 03:15:03 +0000177 self.control = control
178 self._uncollected_log_file = os.path.join(self.resultdir,
179 'uncollected_logs')
180 debugdir = os.path.join(self.resultdir, 'debug')
181 if not os.path.exists(debugdir):
182 os.mkdir(debugdir)
183
184 if user:
185 self.user = user
186 else:
187 self.user = getpass.getuser()
188
jadmanski808f4b12010-04-09 22:30:31 +0000189 self.args = args
Peter Mayo7a875762012-06-13 14:38:15 -0400190 self.label = label
jadmanski10646442008-08-13 14:05:21 +0000191 self.machines = machines
mbligh0d0f67d2009-11-06 03:15:03 +0000192 self._client = client
jadmanski10646442008-08-13 14:05:21 +0000193 self.warning_loggers = set()
jadmanskif37df842009-02-11 00:03:26 +0000194 self.warning_manager = warning_manager()
mbligh0d0f67d2009-11-06 03:15:03 +0000195 self._ssh_user = ssh_user
196 self._ssh_port = ssh_port
197 self._ssh_pass = ssh_pass
mblighe7d9c602009-07-02 19:02:33 +0000198 self.tag = tag
mbligh09108442008-10-15 16:27:38 +0000199 self.last_boot_tag = None
jadmanski53aaf382008-11-17 16:22:31 +0000200 self.hosts = set()
mbligh0d0f67d2009-11-06 03:15:03 +0000201 self.drop_caches = False
mblighb5dac432008-11-27 00:38:44 +0000202 self.drop_caches_between_iterations = False
mblighe0cbc912010-03-11 18:03:07 +0000203 self._control_filename = control_filename
Christopher Wiley8a91f232013-07-09 11:02:27 -0700204 self._disable_sysinfo = disable_sysinfo
jadmanski10646442008-08-13 14:05:21 +0000205
showard75cdfee2009-06-10 17:40:41 +0000206 self.logging = logging_manager.get_logging_manager(
207 manage_stdout_and_stderr=True, redirect_fds=True)
208 subcommand.logging_manager_object = self.logging
jadmanski10646442008-08-13 14:05:21 +0000209
mbligh0d0f67d2009-11-06 03:15:03 +0000210 self.sysinfo = sysinfo.sysinfo(self.resultdir)
jadmanski043e1132008-11-19 17:10:32 +0000211 self.profilers = profilers.profilers(self)
jadmanskic09fc152008-10-15 17:56:59 +0000212
jadmanski10646442008-08-13 14:05:21 +0000213 job_data = {'label' : label, 'user' : user,
214 'hostname' : ','.join(machines),
Eric Li861b2d52011-02-04 14:50:35 -0800215 'drone' : platform.node(),
mbligh0d0f67d2009-11-06 03:15:03 +0000216 'status_version' : str(self._STATUS_VERSION),
showard170873e2009-01-07 00:22:26 +0000217 'job_started' : str(int(time.time()))}
mbligh374f3412009-05-13 21:29:45 +0000218 if group_name:
219 job_data['host_group_name'] = group_name
jadmanski10646442008-08-13 14:05:21 +0000220
mbligh0d0f67d2009-11-06 03:15:03 +0000221 # only write these keyvals out on the first job in a resultdir
222 if 'job_started' not in utils.read_keyval(self.resultdir):
223 job_data.update(get_site_job_data(self))
224 utils.write_keyval(self.resultdir, job_data)
225
226 self._parse_job = parse_job
showardcc929362010-01-25 21:20:41 +0000227 self._using_parser = (self._parse_job and len(machines) <= 1)
mbligh0d0f67d2009-11-06 03:15:03 +0000228 self.pkgmgr = packages.PackageManager(
229 self.autodir, run_function_dargs={'timeout':600})
showard21baa452008-10-21 00:08:39 +0000230 self.num_tests_run = 0
231 self.num_tests_failed = 0
232
jadmanski550fdc22008-11-20 16:32:08 +0000233 self._register_subcommand_hooks()
234
mbligh0d0f67d2009-11-06 03:15:03 +0000235 # these components aren't usable on the server
236 self.bootloader = None
237 self.harness = None
238
jadmanski2a89dac2010-06-11 14:32:58 +0000239 # set up the status logger
jadmanski52053632010-06-11 21:08:10 +0000240 self._indenter = status_indenter()
jadmanski2a89dac2010-06-11 14:32:58 +0000241 self._logger = base_job.status_logger(
jadmanski52053632010-06-11 21:08:10 +0000242 self, self._indenter, 'status.log', 'status.log',
jadmanski2a89dac2010-06-11 14:32:58 +0000243 record_hook=server_job_record_hook(self))
244
mbligh0d0f67d2009-11-06 03:15:03 +0000245
246 @classmethod
247 def _find_base_directories(cls):
248 """
249 Determine locations of autodir, clientdir and serverdir. Assumes
250 that this file is located within serverdir and uses __file__ along
251 with relative paths to resolve the location.
252 """
253 serverdir = os.path.abspath(os.path.dirname(__file__))
254 autodir = os.path.normpath(os.path.join(serverdir, '..'))
255 clientdir = os.path.join(autodir, 'client')
256 return autodir, clientdir, serverdir
257
258
Scott Zawalski91493c82013-01-25 16:15:20 -0500259 def _find_resultdir(self, resultdir, *args, **dargs):
mbligh0d0f67d2009-11-06 03:15:03 +0000260 """
261 Determine the location of resultdir. For server jobs we expect one to
262 always be explicitly passed in to __init__, so just return that.
263 """
264 if resultdir:
265 return os.path.normpath(resultdir)
266 else:
267 return None
268
jadmanski550fdc22008-11-20 16:32:08 +0000269
jadmanski2a89dac2010-06-11 14:32:58 +0000270 def _get_status_logger(self):
271 """Return a reference to the status logger."""
272 return self._logger
273
274
jadmanskie432dd22009-01-30 15:04:51 +0000275 @staticmethod
276 def _load_control_file(path):
277 f = open(path)
278 try:
279 control_file = f.read()
280 finally:
281 f.close()
282 return re.sub('\r', '', control_file)
283
284
jadmanski550fdc22008-11-20 16:32:08 +0000285 def _register_subcommand_hooks(self):
mbligh2b92b862008-11-22 13:25:32 +0000286 """
287 Register some hooks into the subcommand modules that allow us
288 to properly clean up self.hosts created in forked subprocesses.
289 """
jadmanski550fdc22008-11-20 16:32:08 +0000290 def on_fork(cmd):
291 self._existing_hosts_on_fork = set(self.hosts)
292 def on_join(cmd):
293 new_hosts = self.hosts - self._existing_hosts_on_fork
294 for host in new_hosts:
295 host.close()
296 subcommand.subcommand.register_fork_hook(on_fork)
297 subcommand.subcommand.register_join_hook(on_join)
298
jadmanski10646442008-08-13 14:05:21 +0000299
mbligh4608b002010-01-05 18:22:35 +0000300 def init_parser(self):
mbligh2b92b862008-11-22 13:25:32 +0000301 """
mbligh4608b002010-01-05 18:22:35 +0000302 Start the continuous parsing of self.resultdir. This sets up
jadmanski10646442008-08-13 14:05:21 +0000303 the database connection and inserts the basic job object into
mbligh2b92b862008-11-22 13:25:32 +0000304 the database if necessary.
305 """
mbligh4608b002010-01-05 18:22:35 +0000306 if not self._using_parser:
307 return
jadmanski10646442008-08-13 14:05:21 +0000308 # redirect parser debugging to .parse.log
mbligh4608b002010-01-05 18:22:35 +0000309 parse_log = os.path.join(self.resultdir, '.parse.log')
jadmanski10646442008-08-13 14:05:21 +0000310 parse_log = open(parse_log, 'w', 0)
311 tko_utils.redirect_parser_debugging(parse_log)
312 # create a job model object and set up the db
313 self.results_db = tko_db.db(autocommit=True)
mbligh0d0f67d2009-11-06 03:15:03 +0000314 self.parser = status_lib.parser(self._STATUS_VERSION)
mbligh4608b002010-01-05 18:22:35 +0000315 self.job_model = self.parser.make_job(self.resultdir)
jadmanski10646442008-08-13 14:05:21 +0000316 self.parser.start(self.job_model)
317 # check if a job already exists in the db and insert it if
318 # it does not
mbligh0d0f67d2009-11-06 03:15:03 +0000319 job_idx = self.results_db.find_job(self._parse_job)
jadmanski10646442008-08-13 14:05:21 +0000320 if job_idx is None:
mbligh0d0f67d2009-11-06 03:15:03 +0000321 self.results_db.insert_job(self._parse_job, self.job_model)
jadmanski10646442008-08-13 14:05:21 +0000322 else:
mbligh2b92b862008-11-22 13:25:32 +0000323 machine_idx = self.results_db.lookup_machine(self.job_model.machine)
jadmanski10646442008-08-13 14:05:21 +0000324 self.job_model.index = job_idx
325 self.job_model.machine_idx = machine_idx
326
327
328 def cleanup_parser(self):
mbligh2b92b862008-11-22 13:25:32 +0000329 """
330 This should be called after the server job is finished
jadmanski10646442008-08-13 14:05:21 +0000331 to carry out any remaining cleanup (e.g. flushing any
mbligh2b92b862008-11-22 13:25:32 +0000332 remaining test results to the results db)
333 """
mbligh0d0f67d2009-11-06 03:15:03 +0000334 if not self._using_parser:
jadmanski10646442008-08-13 14:05:21 +0000335 return
336 final_tests = self.parser.end()
337 for test in final_tests:
338 self.__insert_test(test)
mbligh0d0f67d2009-11-06 03:15:03 +0000339 self._using_parser = False
jadmanski10646442008-08-13 14:05:21 +0000340
341
342 def verify(self):
Dan Shi07e09af2013-04-12 09:31:29 -0700343 """Verify machines are all ssh-able."""
jadmanski10646442008-08-13 14:05:21 +0000344 if not self.machines:
mbligh084bc172008-10-18 14:02:45 +0000345 raise error.AutoservError('No machines specified to verify')
mbligh0fce4112008-11-27 00:37:17 +0000346 if self.resultdir:
347 os.chdir(self.resultdir)
jadmanski10646442008-08-13 14:05:21 +0000348 try:
jadmanskicdd0c402008-09-19 21:21:31 +0000349 namespace = {'machines' : self.machines, 'job' : self,
mbligh0d0f67d2009-11-06 03:15:03 +0000350 'ssh_user' : self._ssh_user,
351 'ssh_port' : self._ssh_port,
352 'ssh_pass' : self._ssh_pass}
mbligh084bc172008-10-18 14:02:45 +0000353 self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)
jadmanski10646442008-08-13 14:05:21 +0000354 except Exception, e:
mbligh2b92b862008-11-22 13:25:32 +0000355 msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())
jadmanski10646442008-08-13 14:05:21 +0000356 self.record('ABORT', None, None, msg)
357 raise
358
359
Dan Shi07e09af2013-04-12 09:31:29 -0700360 def reset(self):
361 """Reset machines by first cleanup then verify each machine."""
362 if not self.machines:
363 raise error.AutoservError('No machines specified to reset.')
364 if self.resultdir:
365 os.chdir(self.resultdir)
366
367 try:
368 namespace = {'machines' : self.machines, 'job' : self,
369 'ssh_user' : self._ssh_user,
370 'ssh_port' : self._ssh_port,
371 'ssh_pass' : self._ssh_pass}
372 self._execute_code(RESET_CONTROL_FILE, namespace, protect=False)
373 except Exception as e:
374 msg = ('Reset failed\n' + str(e) + '\n' +
375 traceback.format_exc())
376 self.record('ABORT', None, None, msg)
377 raise
378
379
jadmanski10646442008-08-13 14:05:21 +0000380 def repair(self, host_protection):
381 if not self.machines:
382 raise error.AutoservError('No machines specified to repair')
mbligh0fce4112008-11-27 00:37:17 +0000383 if self.resultdir:
384 os.chdir(self.resultdir)
jadmanski10646442008-08-13 14:05:21 +0000385 namespace = {'machines': self.machines, 'job': self,
mbligh0d0f67d2009-11-06 03:15:03 +0000386 'ssh_user': self._ssh_user, 'ssh_port': self._ssh_port,
387 'ssh_pass': self._ssh_pass,
jadmanski10646442008-08-13 14:05:21 +0000388 'protection_level': host_protection}
mbligh25c0b8c2009-01-24 01:44:17 +0000389
mbligh0931b0a2009-04-08 17:44:48 +0000390 self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)
jadmanski10646442008-08-13 14:05:21 +0000391
392
Alex Millercb79ba72013-05-29 14:43:00 -0700393 def provision(self, labels):
394 """
395 Provision all hosts to match |labels|.
396
397 @param labels: A comma seperated string of labels to provision the
398 host to.
399
400 """
401 namespace = {'provision_labels': labels}
402 control = self._load_control_file(PROVISION_CONTROL_FILE)
403 self.run(control=control, namespace=namespace)
404
405
jadmanski10646442008-08-13 14:05:21 +0000406 def precheck(self):
407 """
408 perform any additional checks in derived classes.
409 """
410 pass
411
412
413 def enable_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000414 """
415 Start or restart external logging mechanism.
jadmanski10646442008-08-13 14:05:21 +0000416 """
417 pass
418
419
420 def disable_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000421 """
422 Pause or stop external logging mechanism.
jadmanski10646442008-08-13 14:05:21 +0000423 """
424 pass
425
426
427 def use_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000428 """
429 Return True if external logging should be used.
jadmanski10646442008-08-13 14:05:21 +0000430 """
431 return False
432
433
mbligh415dc212009-06-15 21:53:34 +0000434 def _make_parallel_wrapper(self, function, machines, log):
435 """Wrap function as appropriate for calling by parallel_simple."""
mbligh2b92b862008-11-22 13:25:32 +0000436 is_forking = not (len(machines) == 1 and self.machines == machines)
mbligh0d0f67d2009-11-06 03:15:03 +0000437 if self._parse_job and is_forking and log:
jadmanski10646442008-08-13 14:05:21 +0000438 def wrapper(machine):
mbligh0d0f67d2009-11-06 03:15:03 +0000439 self._parse_job += "/" + machine
440 self._using_parser = True
jadmanski10646442008-08-13 14:05:21 +0000441 self.machines = [machine]
mbligh0d0f67d2009-11-06 03:15:03 +0000442 self.push_execution_context(machine)
jadmanski609a5f42008-08-26 20:52:42 +0000443 os.chdir(self.resultdir)
showard2bab8f42008-11-12 18:15:22 +0000444 utils.write_keyval(self.resultdir, {"hostname": machine})
mbligh4608b002010-01-05 18:22:35 +0000445 self.init_parser()
jadmanski10646442008-08-13 14:05:21 +0000446 result = function(machine)
447 self.cleanup_parser()
448 return result
jadmanski4dd1a002008-09-05 20:27:30 +0000449 elif len(machines) > 1 and log:
jadmanski10646442008-08-13 14:05:21 +0000450 def wrapper(machine):
mbligh0d0f67d2009-11-06 03:15:03 +0000451 self.push_execution_context(machine)
jadmanski609a5f42008-08-26 20:52:42 +0000452 os.chdir(self.resultdir)
mbligh838d82d2009-03-11 17:14:31 +0000453 machine_data = {'hostname' : machine,
mbligh0d0f67d2009-11-06 03:15:03 +0000454 'status_version' : str(self._STATUS_VERSION)}
mbligh838d82d2009-03-11 17:14:31 +0000455 utils.write_keyval(self.resultdir, machine_data)
jadmanski10646442008-08-13 14:05:21 +0000456 result = function(machine)
457 return result
458 else:
459 wrapper = function
mbligh415dc212009-06-15 21:53:34 +0000460 return wrapper
461
462
463 def parallel_simple(self, function, machines, log=True, timeout=None,
464 return_results=False):
465 """
466 Run 'function' using parallel_simple, with an extra wrapper to handle
467 the necessary setup for continuous parsing, if possible. If continuous
468 parsing is already properly initialized then this should just work.
469
470 @param function: A callable to run in parallel given each machine.
471 @param machines: A list of machine names to be passed one per subcommand
472 invocation of function.
473 @param log: If True, output will be written to output in a subdirectory
474 named after each machine.
475 @param timeout: Seconds after which the function call should timeout.
476 @param return_results: If True instead of an AutoServError being raised
477 on any error a list of the results|exceptions from the function
478 called on each arg is returned. [default: False]
479
480 @raises error.AutotestError: If any of the functions failed.
481 """
482 wrapper = self._make_parallel_wrapper(function, machines, log)
483 return subcommand.parallel_simple(wrapper, machines,
484 log=log, timeout=timeout,
485 return_results=return_results)
486
487
488 def parallel_on_machines(self, function, machines, timeout=None):
489 """
showardcd5fac42009-07-06 20:19:43 +0000490 @param function: Called in parallel with one machine as its argument.
mbligh415dc212009-06-15 21:53:34 +0000491 @param machines: A list of machines to call function(machine) on.
492 @param timeout: Seconds after which the function call should timeout.
493
494 @returns A list of machines on which function(machine) returned
495 without raising an exception.
496 """
showardcd5fac42009-07-06 20:19:43 +0000497 results = self.parallel_simple(function, machines, timeout=timeout,
mbligh415dc212009-06-15 21:53:34 +0000498 return_results=True)
499 success_machines = []
500 for result, machine in itertools.izip(results, machines):
501 if not isinstance(result, Exception):
502 success_machines.append(machine)
503 return success_machines
jadmanski10646442008-08-13 14:05:21 +0000504
505
mbligh0d0f67d2009-11-06 03:15:03 +0000506 _USE_TEMP_DIR = object()
mbligh2b92b862008-11-22 13:25:32 +0000507 def run(self, cleanup=False, install_before=False, install_after=False,
jadmanskie432dd22009-01-30 15:04:51 +0000508 collect_crashdumps=True, namespace={}, control=None,
beepscb6f1e22013-06-28 19:14:10 -0700509 control_file_dir=None, verify_job_repo_url=False,
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700510 only_collect_crashinfo=False, skip_crash_collection=False):
jadmanskifb9c0fa2009-04-29 17:39:16 +0000511 # for a normal job, make sure the uncollected logs file exists
512 # for a crashinfo-only run it should already exist, bail out otherwise
jadmanski648c39f2010-03-19 17:38:01 +0000513 created_uncollected_logs = False
mbligh0d0f67d2009-11-06 03:15:03 +0000514 if self.resultdir and not os.path.exists(self._uncollected_log_file):
jadmanskifb9c0fa2009-04-29 17:39:16 +0000515 if only_collect_crashinfo:
516 # if this is a crashinfo-only run, and there were no existing
517 # uncollected logs, just bail out early
518 logging.info("No existing uncollected logs, "
519 "skipping crashinfo collection")
520 return
521 else:
mbligh0d0f67d2009-11-06 03:15:03 +0000522 log_file = open(self._uncollected_log_file, "w")
jadmanskifb9c0fa2009-04-29 17:39:16 +0000523 pickle.dump([], log_file)
524 log_file.close()
jadmanski648c39f2010-03-19 17:38:01 +0000525 created_uncollected_logs = True
jadmanskifb9c0fa2009-04-29 17:39:16 +0000526
jadmanski10646442008-08-13 14:05:21 +0000527 # use a copy so changes don't affect the original dictionary
528 namespace = namespace.copy()
529 machines = self.machines
jadmanskie432dd22009-01-30 15:04:51 +0000530 if control is None:
jadmanski02a3ba22009-11-13 20:47:27 +0000531 if self.control is None:
532 control = ''
533 else:
534 control = self._load_control_file(self.control)
jadmanskie432dd22009-01-30 15:04:51 +0000535 if control_file_dir is None:
536 control_file_dir = self.resultdir
jadmanski10646442008-08-13 14:05:21 +0000537
538 self.aborted = False
539 namespace['machines'] = machines
jadmanski808f4b12010-04-09 22:30:31 +0000540 namespace['args'] = self.args
jadmanski10646442008-08-13 14:05:21 +0000541 namespace['job'] = self
mbligh0d0f67d2009-11-06 03:15:03 +0000542 namespace['ssh_user'] = self._ssh_user
543 namespace['ssh_port'] = self._ssh_port
544 namespace['ssh_pass'] = self._ssh_pass
jadmanski10646442008-08-13 14:05:21 +0000545 test_start_time = int(time.time())
546
mbligh80e1eba2008-11-19 00:26:18 +0000547 if self.resultdir:
548 os.chdir(self.resultdir)
jadmanski779bd292009-03-19 17:33:33 +0000549 # touch status.log so that the parser knows a job is running here
jadmanski382303a2009-04-21 19:53:39 +0000550 open(self.get_status_log_path(), 'a').close()
mbligh80e1eba2008-11-19 00:26:18 +0000551 self.enable_external_logging()
jadmanskie432dd22009-01-30 15:04:51 +0000552
jadmanskicdd0c402008-09-19 21:21:31 +0000553 collect_crashinfo = True
mblighaebe3b62008-12-22 14:45:40 +0000554 temp_control_file_dir = None
jadmanski10646442008-08-13 14:05:21 +0000555 try:
showardcf8d4922009-10-14 16:08:39 +0000556 try:
557 if install_before and machines:
558 self._execute_code(INSTALL_CONTROL_FILE, namespace)
jadmanskie432dd22009-01-30 15:04:51 +0000559
showardcf8d4922009-10-14 16:08:39 +0000560 if only_collect_crashinfo:
561 return
562
beepscb6f1e22013-06-28 19:14:10 -0700563 # If the verify_job_repo_url option is set but we're unable
564 # to actually verify that the job_repo_url contains the autotest
565 # package, this job will fail.
566 if verify_job_repo_url:
567 self._execute_code(VERIFY_JOB_REPO_URL_CONTROL_FILE,
568 namespace)
569 else:
570 logging.warning('Not checking if job_repo_url contains '
571 'autotest packages on %s', machines)
572
jadmanskidef0c3c2009-03-25 20:07:10 +0000573 # determine the dir to write the control files to
574 cfd_specified = (control_file_dir
mbligh0d0f67d2009-11-06 03:15:03 +0000575 and control_file_dir is not self._USE_TEMP_DIR)
jadmanskidef0c3c2009-03-25 20:07:10 +0000576 if cfd_specified:
577 temp_control_file_dir = None
578 else:
579 temp_control_file_dir = tempfile.mkdtemp(
580 suffix='temp_control_file_dir')
581 control_file_dir = temp_control_file_dir
582 server_control_file = os.path.join(control_file_dir,
mblighe0cbc912010-03-11 18:03:07 +0000583 self._control_filename)
jadmanskidef0c3c2009-03-25 20:07:10 +0000584 client_control_file = os.path.join(control_file_dir,
585 CLIENT_CONTROL_FILENAME)
mbligh0d0f67d2009-11-06 03:15:03 +0000586 if self._client:
jadmanskidef0c3c2009-03-25 20:07:10 +0000587 namespace['control'] = control
588 utils.open_write_close(client_control_file, control)
mblighfeac0102009-04-28 18:31:12 +0000589 shutil.copyfile(CLIENT_WRAPPER_CONTROL_FILE,
590 server_control_file)
jadmanskidef0c3c2009-03-25 20:07:10 +0000591 else:
592 utils.open_write_close(server_control_file, control)
mbligh26f0d882009-06-22 18:30:01 +0000593 logging.info("Processing control file")
jadmanskidef0c3c2009-03-25 20:07:10 +0000594 self._execute_code(server_control_file, namespace)
mbligh26f0d882009-06-22 18:30:01 +0000595 logging.info("Finished processing control file")
jadmanski10646442008-08-13 14:05:21 +0000596
jadmanskidef0c3c2009-03-25 20:07:10 +0000597 # no error occured, so we don't need to collect crashinfo
598 collect_crashinfo = False
Eric Li6f27d4f2010-09-29 10:55:17 -0700599 except Exception, e:
showardcf8d4922009-10-14 16:08:39 +0000600 try:
601 logging.exception(
602 'Exception escaped control file, job aborting:')
Eric Li6f27d4f2010-09-29 10:55:17 -0700603 self.record('INFO', None, None, str(e),
604 {'job_abort_reason': str(e)})
showardcf8d4922009-10-14 16:08:39 +0000605 except:
606 pass # don't let logging exceptions here interfere
607 raise
jadmanski10646442008-08-13 14:05:21 +0000608 finally:
mblighaebe3b62008-12-22 14:45:40 +0000609 if temp_control_file_dir:
jadmanskie432dd22009-01-30 15:04:51 +0000610 # Clean up temp directory used for copies of the control files
mblighaebe3b62008-12-22 14:45:40 +0000611 try:
612 shutil.rmtree(temp_control_file_dir)
613 except Exception, e:
mblighe7d9c602009-07-02 19:02:33 +0000614 logging.warn('Could not remove temp directory %s: %s',
615 temp_control_file_dir, e)
jadmanskie432dd22009-01-30 15:04:51 +0000616
jadmanskicdd0c402008-09-19 21:21:31 +0000617 if machines and (collect_crashdumps or collect_crashinfo):
jadmanski10646442008-08-13 14:05:21 +0000618 namespace['test_start_time'] = test_start_time
Christopher Wileyf594c5e2013-07-03 18:25:30 -0700619 if skip_crash_collection:
620 logging.info('Skipping crash dump/info collection '
621 'as requested.')
622 elif collect_crashinfo:
mbligh084bc172008-10-18 14:02:45 +0000623 # includes crashdumps
624 self._execute_code(CRASHINFO_CONTROL_FILE, namespace)
jadmanskicdd0c402008-09-19 21:21:31 +0000625 else:
mbligh084bc172008-10-18 14:02:45 +0000626 self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)
jadmanski10646442008-08-13 14:05:21 +0000627 self.disable_external_logging()
showard45ae8192008-11-05 19:32:53 +0000628 if cleanup and machines:
629 self._execute_code(CLEANUP_CONTROL_FILE, namespace)
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700630 if self._uncollected_log_file and created_uncollected_logs:
631 os.remove(self._uncollected_log_file)
jadmanski10646442008-08-13 14:05:21 +0000632 if install_after and machines:
mbligh084bc172008-10-18 14:02:45 +0000633 self._execute_code(INSTALL_CONTROL_FILE, namespace)
jadmanski10646442008-08-13 14:05:21 +0000634
635
636 def run_test(self, url, *args, **dargs):
mbligh2b92b862008-11-22 13:25:32 +0000637 """
638 Summon a test object and run it.
jadmanski10646442008-08-13 14:05:21 +0000639
640 tag
641 tag to add to testname
642 url
643 url of the test to run
644 """
Christopher Wiley8a91f232013-07-09 11:02:27 -0700645 if self._disable_sysinfo:
646 dargs['disable_sysinfo'] = True
647
mblighfc3da5b2010-01-06 18:37:22 +0000648 group, testname = self.pkgmgr.get_package_name(url, 'test')
649 testname, subdir, tag = self._build_tagged_test_name(testname, dargs)
650 outputdir = self._make_test_outputdir(subdir)
jadmanski10646442008-08-13 14:05:21 +0000651
652 def group_func():
653 try:
654 test.runtest(self, url, tag, args, dargs)
655 except error.TestBaseException, e:
656 self.record(e.exit_status, subdir, testname, str(e))
657 raise
658 except Exception, e:
659 info = str(e) + "\n" + traceback.format_exc()
660 self.record('FAIL', subdir, testname, info)
661 raise
662 else:
mbligh2b92b862008-11-22 13:25:32 +0000663 self.record('GOOD', subdir, testname, 'completed successfully')
jadmanskide292df2008-08-26 20:51:14 +0000664
665 result, exc_info = self._run_group(testname, subdir, group_func)
666 if exc_info and isinstance(exc_info[1], error.TestBaseException):
667 return False
668 elif exc_info:
669 raise exc_info[0], exc_info[1], exc_info[2]
670 else:
671 return True
jadmanski10646442008-08-13 14:05:21 +0000672
673
674 def _run_group(self, name, subdir, function, *args, **dargs):
675 """\
676 Underlying method for running something inside of a group.
677 """
jadmanskide292df2008-08-26 20:51:14 +0000678 result, exc_info = None, None
jadmanski10646442008-08-13 14:05:21 +0000679 try:
680 self.record('START', subdir, name)
jadmanski52053632010-06-11 21:08:10 +0000681 result = function(*args, **dargs)
jadmanski10646442008-08-13 14:05:21 +0000682 except error.TestBaseException, e:
jadmanskib88d6dc2009-01-10 00:33:18 +0000683 self.record("END %s" % e.exit_status, subdir, name)
jadmanskide292df2008-08-26 20:51:14 +0000684 exc_info = sys.exc_info()
jadmanski10646442008-08-13 14:05:21 +0000685 except Exception, e:
686 err_msg = str(e) + '\n'
687 err_msg += traceback.format_exc()
688 self.record('END ABORT', subdir, name, err_msg)
689 raise error.JobError(name + ' failed\n' + traceback.format_exc())
690 else:
691 self.record('END GOOD', subdir, name)
692
jadmanskide292df2008-08-26 20:51:14 +0000693 return result, exc_info
jadmanski10646442008-08-13 14:05:21 +0000694
695
696 def run_group(self, function, *args, **dargs):
697 """\
698 function:
699 subroutine to run
700 *args:
701 arguments for the function
702 """
703
704 name = function.__name__
705
706 # Allow the tag for the group to be specified.
707 tag = dargs.pop('tag', None)
708 if tag:
709 name = tag
710
jadmanskide292df2008-08-26 20:51:14 +0000711 return self._run_group(name, None, function, *args, **dargs)[0]
jadmanski10646442008-08-13 14:05:21 +0000712
713
714 def run_reboot(self, reboot_func, get_kernel_func):
715 """\
716 A specialization of run_group meant specifically for handling
717 a reboot. Includes support for capturing the kernel version
718 after the reboot.
719
720 reboot_func: a function that carries out the reboot
721
722 get_kernel_func: a function that returns a string
723 representing the kernel version.
724 """
jadmanski10646442008-08-13 14:05:21 +0000725 try:
726 self.record('START', None, 'reboot')
jadmanski10646442008-08-13 14:05:21 +0000727 reboot_func()
728 except Exception, e:
jadmanski10646442008-08-13 14:05:21 +0000729 err_msg = str(e) + '\n' + traceback.format_exc()
730 self.record('END FAIL', None, 'reboot', err_msg)
jadmanski4b51d542009-04-08 14:17:16 +0000731 raise
jadmanski10646442008-08-13 14:05:21 +0000732 else:
733 kernel = get_kernel_func()
jadmanski10646442008-08-13 14:05:21 +0000734 self.record('END GOOD', None, 'reboot',
Dale Curtis74a314b2011-06-23 14:55:46 -0700735 optional_fields={"kernel": kernel})
jadmanski10646442008-08-13 14:05:21 +0000736
737
jadmanskie432dd22009-01-30 15:04:51 +0000738 def run_control(self, path):
739 """Execute a control file found at path (relative to the autotest
740 path). Intended for executing a control file within a control file,
741 not for running the top-level job control file."""
742 path = os.path.join(self.autodir, path)
743 control_file = self._load_control_file(path)
mbligh0d0f67d2009-11-06 03:15:03 +0000744 self.run(control=control_file, control_file_dir=self._USE_TEMP_DIR)
jadmanskie432dd22009-01-30 15:04:51 +0000745
746
jadmanskic09fc152008-10-15 17:56:59 +0000747 def add_sysinfo_command(self, command, logfile=None, on_every_test=False):
mbligh4395bbd2009-03-25 19:34:17 +0000748 self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),
jadmanskic09fc152008-10-15 17:56:59 +0000749 on_every_test)
750
751
752 def add_sysinfo_logfile(self, file, on_every_test=False):
753 self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)
754
755
756 def _add_sysinfo_loggable(self, loggable, on_every_test):
757 if on_every_test:
758 self.sysinfo.test_loggables.add(loggable)
759 else:
760 self.sysinfo.boot_loggables.add(loggable)
761
762
jadmanski10646442008-08-13 14:05:21 +0000763 def _read_warnings(self):
jadmanskif37df842009-02-11 00:03:26 +0000764 """Poll all the warning loggers and extract any new warnings that have
765 been logged. If the warnings belong to a category that is currently
766 disabled, this method will discard them and they will no longer be
767 retrievable.
768
769 Returns a list of (timestamp, message) tuples, where timestamp is an
770 integer epoch timestamp."""
jadmanski10646442008-08-13 14:05:21 +0000771 warnings = []
772 while True:
773 # pull in a line of output from every logger that has
774 # output ready to be read
mbligh2b92b862008-11-22 13:25:32 +0000775 loggers, _, _ = select.select(self.warning_loggers, [], [], 0)
jadmanski10646442008-08-13 14:05:21 +0000776 closed_loggers = set()
777 for logger in loggers:
778 line = logger.readline()
779 # record any broken pipes (aka line == empty)
780 if len(line) == 0:
781 closed_loggers.add(logger)
782 continue
jadmanskif37df842009-02-11 00:03:26 +0000783 # parse out the warning
784 timestamp, msgtype, msg = line.split('\t', 2)
785 timestamp = int(timestamp)
786 # if the warning is valid, add it to the results
787 if self.warning_manager.is_valid(timestamp, msgtype):
788 warnings.append((timestamp, msg.strip()))
jadmanski10646442008-08-13 14:05:21 +0000789
790 # stop listening to loggers that are closed
791 self.warning_loggers -= closed_loggers
792
793 # stop if none of the loggers have any output left
794 if not loggers:
795 break
796
797 # sort into timestamp order
798 warnings.sort()
799 return warnings
800
801
showardcc929362010-01-25 21:20:41 +0000802 def _unique_subdirectory(self, base_subdirectory_name):
803 """Compute a unique results subdirectory based on the given name.
804
805 Appends base_subdirectory_name with a number as necessary to find a
806 directory name that doesn't already exist.
807 """
808 subdirectory = base_subdirectory_name
809 counter = 1
810 while os.path.exists(os.path.join(self.resultdir, subdirectory)):
811 subdirectory = base_subdirectory_name + '.' + str(counter)
812 counter += 1
813 return subdirectory
814
815
jadmanski52053632010-06-11 21:08:10 +0000816 def get_record_context(self):
817 """Returns an object representing the current job.record context.
818
819 The object returned is an opaque object with a 0-arg restore method
820 which can be called to restore the job.record context (i.e. indentation)
821 to the current level. The intention is that it should be used when
822 something external which generate job.record calls (e.g. an autotest
823 client) can fail catastrophically and the server job record state
824 needs to be reset to its original "known good" state.
825
826 @return: A context object with a 0-arg restore() method."""
827 return self._indenter.get_context()
828
829
showardcc929362010-01-25 21:20:41 +0000830 def record_summary(self, status_code, test_name, reason='', attributes=None,
831 distinguishing_attributes=(), child_test_ids=None):
832 """Record a summary test result.
833
834 @param status_code: status code string, see
835 common_lib.log.is_valid_status()
836 @param test_name: name of the test
837 @param reason: (optional) string providing detailed reason for test
838 outcome
839 @param attributes: (optional) dict of string keyvals to associate with
840 this result
841 @param distinguishing_attributes: (optional) list of attribute names
842 that should be used to distinguish identically-named test
843 results. These attributes should be present in the attributes
844 parameter. This is used to generate user-friendly subdirectory
845 names.
846 @param child_test_ids: (optional) list of test indices for test results
847 used in generating this result.
848 """
849 subdirectory_name_parts = [test_name]
850 for attribute in distinguishing_attributes:
851 assert attributes
852 assert attribute in attributes, '%s not in %s' % (attribute,
853 attributes)
854 subdirectory_name_parts.append(attributes[attribute])
855 base_subdirectory_name = '.'.join(subdirectory_name_parts)
856
857 subdirectory = self._unique_subdirectory(base_subdirectory_name)
858 subdirectory_path = os.path.join(self.resultdir, subdirectory)
859 os.mkdir(subdirectory_path)
860
861 self.record(status_code, subdirectory, test_name,
862 status=reason, optional_fields={'is_summary': True})
863
864 if attributes:
865 utils.write_keyval(subdirectory_path, attributes)
866
867 if child_test_ids:
868 ids_string = ','.join(str(test_id) for test_id in child_test_ids)
869 summary_data = {'child_test_ids': ids_string}
870 utils.write_keyval(os.path.join(subdirectory_path, 'summary_data'),
871 summary_data)
872
873
jadmanski16a7ff72009-04-01 18:19:53 +0000874 def disable_warnings(self, warning_type):
jadmanskif37df842009-02-11 00:03:26 +0000875 self.warning_manager.disable_warnings(warning_type)
jadmanski16a7ff72009-04-01 18:19:53 +0000876 self.record("INFO", None, None,
877 "disabling %s warnings" % warning_type,
878 {"warnings.disable": warning_type})
jadmanskif37df842009-02-11 00:03:26 +0000879
880
jadmanski16a7ff72009-04-01 18:19:53 +0000881 def enable_warnings(self, warning_type):
jadmanskif37df842009-02-11 00:03:26 +0000882 self.warning_manager.enable_warnings(warning_type)
jadmanski16a7ff72009-04-01 18:19:53 +0000883 self.record("INFO", None, None,
884 "enabling %s warnings" % warning_type,
885 {"warnings.enable": warning_type})
jadmanskif37df842009-02-11 00:03:26 +0000886
887
jadmanski779bd292009-03-19 17:33:33 +0000888 def get_status_log_path(self, subdir=None):
889 """Return the path to the job status log.
890
891 @param subdir - Optional paramter indicating that you want the path
892 to a subdirectory status log.
893
894 @returns The path where the status log should be.
895 """
mbligh210bae62009-04-01 18:33:13 +0000896 if self.resultdir:
897 if subdir:
898 return os.path.join(self.resultdir, subdir, "status.log")
899 else:
900 return os.path.join(self.resultdir, "status.log")
jadmanski779bd292009-03-19 17:33:33 +0000901 else:
mbligh210bae62009-04-01 18:33:13 +0000902 return None
jadmanski779bd292009-03-19 17:33:33 +0000903
904
jadmanski6bb32d72009-03-19 20:25:24 +0000905 def _update_uncollected_logs_list(self, update_func):
906 """Updates the uncollected logs list in a multi-process safe manner.
907
908 @param update_func - a function that updates the list of uncollected
909 logs. Should take one parameter, the list to be updated.
910 """
Dan Shi07e09af2013-04-12 09:31:29 -0700911 # Skip log collection if file _uncollected_log_file does not exist.
912 if not (self._uncollected_log_file and
913 os.path.exists(self._uncollected_log_file)):
914 return
mbligh0d0f67d2009-11-06 03:15:03 +0000915 if self._uncollected_log_file:
916 log_file = open(self._uncollected_log_file, "r+")
mbligha788dc42009-03-26 21:10:16 +0000917 fcntl.flock(log_file, fcntl.LOCK_EX)
jadmanski6bb32d72009-03-19 20:25:24 +0000918 try:
919 uncollected_logs = pickle.load(log_file)
920 update_func(uncollected_logs)
921 log_file.seek(0)
922 log_file.truncate()
923 pickle.dump(uncollected_logs, log_file)
jadmanski3bff9092009-04-22 18:09:47 +0000924 log_file.flush()
jadmanski6bb32d72009-03-19 20:25:24 +0000925 finally:
926 fcntl.flock(log_file, fcntl.LOCK_UN)
927 log_file.close()
928
929
930 def add_client_log(self, hostname, remote_path, local_path):
931 """Adds a new set of client logs to the list of uncollected logs,
932 to allow for future log recovery.
933
934 @param host - the hostname of the machine holding the logs
935 @param remote_path - the directory on the remote machine holding logs
936 @param local_path - the local directory to copy the logs into
937 """
938 def update_func(logs_list):
939 logs_list.append((hostname, remote_path, local_path))
940 self._update_uncollected_logs_list(update_func)
941
942
943 def remove_client_log(self, hostname, remote_path, local_path):
944 """Removes a set of client logs from the list of uncollected logs,
945 to allow for future log recovery.
946
947 @param host - the hostname of the machine holding the logs
948 @param remote_path - the directory on the remote machine holding logs
949 @param local_path - the local directory to copy the logs into
950 """
951 def update_func(logs_list):
952 logs_list.remove((hostname, remote_path, local_path))
953 self._update_uncollected_logs_list(update_func)
954
955
mbligh0d0f67d2009-11-06 03:15:03 +0000956 def get_client_logs(self):
957 """Retrieves the list of uncollected logs, if it exists.
958
959 @returns A list of (host, remote_path, local_path) tuples. Returns
960 an empty list if no uncollected logs file exists.
961 """
962 log_exists = (self._uncollected_log_file and
963 os.path.exists(self._uncollected_log_file))
964 if log_exists:
965 return pickle.load(open(self._uncollected_log_file))
966 else:
967 return []
968
969
mbligh084bc172008-10-18 14:02:45 +0000970 def _fill_server_control_namespace(self, namespace, protect=True):
mbligh2b92b862008-11-22 13:25:32 +0000971 """
972 Prepare a namespace to be used when executing server control files.
mbligh084bc172008-10-18 14:02:45 +0000973
974 This sets up the control file API by importing modules and making them
975 available under the appropriate names within namespace.
976
977 For use by _execute_code().
978
979 Args:
980 namespace: The namespace dictionary to fill in.
981 protect: Boolean. If True (the default) any operation that would
982 clobber an existing entry in namespace will cause an error.
983 Raises:
984 error.AutoservError: When a name would be clobbered by import.
985 """
986 def _import_names(module_name, names=()):
mbligh2b92b862008-11-22 13:25:32 +0000987 """
988 Import a module and assign named attributes into namespace.
mbligh084bc172008-10-18 14:02:45 +0000989
990 Args:
991 module_name: The string module name.
992 names: A limiting list of names to import from module_name. If
993 empty (the default), all names are imported from the module
994 similar to a "from foo.bar import *" statement.
995 Raises:
996 error.AutoservError: When a name being imported would clobber
997 a name already in namespace.
998 """
999 module = __import__(module_name, {}, {}, names)
1000
1001 # No names supplied? Import * from the lowest level module.
1002 # (Ugh, why do I have to implement this part myself?)
1003 if not names:
1004 for submodule_name in module_name.split('.')[1:]:
1005 module = getattr(module, submodule_name)
1006 if hasattr(module, '__all__'):
1007 names = getattr(module, '__all__')
1008 else:
1009 names = dir(module)
1010
1011 # Install each name into namespace, checking to make sure it
1012 # doesn't override anything that already exists.
1013 for name in names:
1014 # Check for conflicts to help prevent future problems.
1015 if name in namespace and protect:
1016 if namespace[name] is not getattr(module, name):
1017 raise error.AutoservError('importing name '
1018 '%s from %s %r would override %r' %
1019 (name, module_name, getattr(module, name),
1020 namespace[name]))
1021 else:
1022 # Encourage cleanliness and the use of __all__ for a
1023 # more concrete API with less surprises on '*' imports.
1024 warnings.warn('%s (%r) being imported from %s for use '
1025 'in server control files is not the '
1026 'first occurrance of that import.' %
1027 (name, namespace[name], module_name))
1028
1029 namespace[name] = getattr(module, name)
1030
1031
1032 # This is the equivalent of prepending a bunch of import statements to
1033 # the front of the control script.
mbligha2b07dd2009-06-22 18:26:13 +00001034 namespace.update(os=os, sys=sys, logging=logging)
mbligh084bc172008-10-18 14:02:45 +00001035 _import_names('autotest_lib.server',
1036 ('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',
1037 'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))
1038 _import_names('autotest_lib.server.subcommand',
1039 ('parallel', 'parallel_simple', 'subcommand'))
1040 _import_names('autotest_lib.server.utils',
1041 ('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))
1042 _import_names('autotest_lib.client.common_lib.error')
1043 _import_names('autotest_lib.client.common_lib.barrier', ('barrier',))
1044
1045 # Inject ourself as the job object into other classes within the API.
1046 # (Yuck, this injection is a gross thing be part of a public API. -gps)
1047 #
1048 # XXX Base & SiteAutotest do not appear to use .job. Who does?
1049 namespace['autotest'].Autotest.job = self
1050 # server.hosts.base_classes.Host uses .job.
1051 namespace['hosts'].Host.job = self
Eric Li10222b82010-11-24 09:33:15 -08001052 namespace['hosts'].factory.ssh_user = self._ssh_user
1053 namespace['hosts'].factory.ssh_port = self._ssh_port
1054 namespace['hosts'].factory.ssh_pass = self._ssh_pass
mbligh084bc172008-10-18 14:02:45 +00001055
1056
1057 def _execute_code(self, code_file, namespace, protect=True):
mbligh2b92b862008-11-22 13:25:32 +00001058 """
1059 Execute code using a copy of namespace as a server control script.
mbligh084bc172008-10-18 14:02:45 +00001060
1061 Unless protect_namespace is explicitly set to False, the dict will not
1062 be modified.
1063
1064 Args:
1065 code_file: The filename of the control file to execute.
1066 namespace: A dict containing names to make available during execution.
1067 protect: Boolean. If True (the default) a copy of the namespace dict
1068 is used during execution to prevent the code from modifying its
1069 contents outside of this function. If False the raw dict is
1070 passed in and modifications will be allowed.
1071 """
1072 if protect:
1073 namespace = namespace.copy()
1074 self._fill_server_control_namespace(namespace, protect=protect)
1075 # TODO: Simplify and get rid of the special cases for only 1 machine.
showard3e66e8c2008-10-27 19:20:51 +00001076 if len(self.machines) > 1:
mbligh084bc172008-10-18 14:02:45 +00001077 machines_text = '\n'.join(self.machines) + '\n'
1078 # Only rewrite the file if it does not match our machine list.
1079 try:
1080 machines_f = open(MACHINES_FILENAME, 'r')
1081 existing_machines_text = machines_f.read()
1082 machines_f.close()
1083 except EnvironmentError:
1084 existing_machines_text = None
1085 if machines_text != existing_machines_text:
1086 utils.open_write_close(MACHINES_FILENAME, machines_text)
1087 execfile(code_file, namespace, namespace)
jadmanski10646442008-08-13 14:05:21 +00001088
1089
jadmanskie29d0e42010-06-17 16:06:52 +00001090 def _parse_status(self, new_line):
mbligh0d0f67d2009-11-06 03:15:03 +00001091 if not self._using_parser:
jadmanski10646442008-08-13 14:05:21 +00001092 return
jadmanskie29d0e42010-06-17 16:06:52 +00001093 new_tests = self.parser.process_lines([new_line])
jadmanski10646442008-08-13 14:05:21 +00001094 for test in new_tests:
1095 self.__insert_test(test)
1096
1097
1098 def __insert_test(self, test):
mbligh2b92b862008-11-22 13:25:32 +00001099 """
1100 An internal method to insert a new test result into the
jadmanski10646442008-08-13 14:05:21 +00001101 database. This method will not raise an exception, even if an
1102 error occurs during the insert, to avoid failing a test
1103 simply because of unexpected database issues."""
showard21baa452008-10-21 00:08:39 +00001104 self.num_tests_run += 1
1105 if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):
1106 self.num_tests_failed += 1
jadmanski10646442008-08-13 14:05:21 +00001107 try:
1108 self.results_db.insert_test(self.job_model, test)
1109 except Exception:
1110 msg = ("WARNING: An unexpected error occured while "
1111 "inserting test results into the database. "
1112 "Ignoring error.\n" + traceback.format_exc())
1113 print >> sys.stderr, msg
1114
mblighcaa62c22008-04-07 21:51:17 +00001115
mblighfc3da5b2010-01-06 18:37:22 +00001116 def preprocess_client_state(self):
1117 """
1118 Produce a state file for initializing the state of a client job.
1119
1120 Creates a new client state file with all the current server state, as
1121 well as some pre-set client state.
1122
1123 @returns The path of the file the state was written into.
1124 """
1125 # initialize the sysinfo state
1126 self._state.set('client', 'sysinfo', self.sysinfo.serialize())
1127
1128 # dump the state out to a tempfile
1129 fd, file_path = tempfile.mkstemp(dir=self.tmpdir)
1130 os.close(fd)
mbligha2c99492010-01-27 22:59:50 +00001131
1132 # write_to_file doesn't need locking, we exclusively own file_path
mblighfc3da5b2010-01-06 18:37:22 +00001133 self._state.write_to_file(file_path)
1134 return file_path
1135
1136
1137 def postprocess_client_state(self, state_path):
1138 """
1139 Update the state of this job with the state from a client job.
1140
1141 Updates the state of the server side of a job with the final state
1142 of a client job that was run. Updates the non-client-specific state,
1143 pulls in some specific bits from the client-specific state, and then
1144 discards the rest. Removes the state file afterwards
1145
1146 @param state_file A path to the state file from the client.
1147 """
1148 # update the on-disk state
mblighfc3da5b2010-01-06 18:37:22 +00001149 try:
jadmanskib6e7bdb2010-04-13 16:00:39 +00001150 self._state.read_from_file(state_path)
mblighfc3da5b2010-01-06 18:37:22 +00001151 os.remove(state_path)
mbligha2c99492010-01-27 22:59:50 +00001152 except OSError, e:
mblighfc3da5b2010-01-06 18:37:22 +00001153 # ignore file-not-found errors
1154 if e.errno != errno.ENOENT:
1155 raise
jadmanskib6e7bdb2010-04-13 16:00:39 +00001156 else:
1157 logging.debug('Client state file %s not found', state_path)
mblighfc3da5b2010-01-06 18:37:22 +00001158
1159 # update the sysinfo state
1160 if self._state.has('client', 'sysinfo'):
1161 self.sysinfo.deserialize(self._state.get('client', 'sysinfo'))
1162
1163 # drop all the client-specific state
1164 self._state.discard_namespace('client')
1165
1166
mbligh0a883702010-04-21 01:58:34 +00001167 def clear_all_known_hosts(self):
1168 """Clears known hosts files for all AbstractSSHHosts."""
1169 for host in self.hosts:
1170 if isinstance(host, abstract_ssh.AbstractSSHHost):
1171 host.clear_known_hosts()
1172
1173
jadmanskif37df842009-02-11 00:03:26 +00001174class warning_manager(object):
1175 """Class for controlling warning logs. Manages the enabling and disabling
1176 of warnings."""
1177 def __init__(self):
1178 # a map of warning types to a list of disabled time intervals
1179 self.disabled_warnings = {}
1180
1181
1182 def is_valid(self, timestamp, warning_type):
1183 """Indicates if a warning (based on the time it occured and its type)
1184 is a valid warning. A warning is considered "invalid" if this type of
1185 warning was marked as "disabled" at the time the warning occured."""
1186 disabled_intervals = self.disabled_warnings.get(warning_type, [])
1187 for start, end in disabled_intervals:
1188 if timestamp >= start and (end is None or timestamp < end):
1189 return False
1190 return True
1191
1192
1193 def disable_warnings(self, warning_type, current_time_func=time.time):
1194 """As of now, disables all further warnings of this type."""
1195 intervals = self.disabled_warnings.setdefault(warning_type, [])
1196 if not intervals or intervals[-1][1] is not None:
jadmanski16a7ff72009-04-01 18:19:53 +00001197 intervals.append((int(current_time_func()), None))
jadmanskif37df842009-02-11 00:03:26 +00001198
1199
1200 def enable_warnings(self, warning_type, current_time_func=time.time):
1201 """As of now, enables all further warnings of this type."""
1202 intervals = self.disabled_warnings.get(warning_type, [])
1203 if intervals and intervals[-1][1] is None:
jadmanski16a7ff72009-04-01 18:19:53 +00001204 intervals[-1] = (intervals[-1][0], int(current_time_func()))
Paul Pendlebury57593562011-06-15 10:45:49 -07001205
1206
1207# load up site-specific code for generating site-specific job data
1208get_site_job_data = utils.import_site_function(__file__,
1209 "autotest_lib.server.site_server_job", "get_site_job_data",
1210 _get_site_job_data_dummy)
1211
1212
1213site_server_job = utils.import_site_class(
1214 __file__, "autotest_lib.server.site_server_job", "site_server_job",
1215 base_server_job)
1216
1217
1218class server_job(site_server_job):
Dale Curtis456d3c12011-07-19 11:42:51 -07001219 pass