blob: efe7cfeed15d620e82463fccf534412cb338e631 [file] [log] [blame]
mbligh67647152008-11-19 00:18:14 +00001# Copyright Martin J. Bligh, Google Inc 2008
2# Released under the GPL v2
3
4"""
5This class allows you to communicate with the frontend to submit jobs etc
6It is designed for writing more sophisiticated server-side control files that
7can recursively add and manage other jobs.
8
9We turn the JSON dictionaries into real objects that are more idiomatic
10
mblighc31e4022008-12-11 19:32:30 +000011For docs, see:
jamesren1a2914a2010-02-12 00:44:31 +000012 http://autotest/afe/server/rpc_doc/
13 http://autotest/new_tko/server/rpc_doc/
mblighc31e4022008-12-11 19:32:30 +000014 http://docs.djangoproject.com/en/dev/ref/models/querysets/#queryset-api
mbligh67647152008-11-19 00:18:14 +000015"""
16
mblighdb59e3c2009-11-21 01:45:18 +000017import getpass, os, time, traceback, re
mbligh67647152008-11-19 00:18:14 +000018import common
19from autotest_lib.frontend.afe import rpc_client_lib
mbligh37eceaa2008-12-15 22:56:37 +000020from autotest_lib.client.common_lib import global_config
mbligh67647152008-11-19 00:18:14 +000021from autotest_lib.client.common_lib import utils
mbligh4e576612008-12-22 14:56:36 +000022try:
23 from autotest_lib.server.site_common import site_utils as server_utils
24except:
25 from autotest_lib.server import utils as server_utils
26form_ntuples_from_machines = server_utils.form_ntuples_from_machines
mbligh67647152008-11-19 00:18:14 +000027
mbligh37eceaa2008-12-15 22:56:37 +000028GLOBAL_CONFIG = global_config.global_config
29DEFAULT_SERVER = 'autotest'
30
mbligh67647152008-11-19 00:18:14 +000031def dump_object(header, obj):
32 """
33 Standard way to print out the frontend objects (eg job, host, acl, label)
34 in a human-readable fashion for debugging
35 """
36 result = header + '\n'
37 for key in obj.hash:
38 if key == 'afe' or key == 'hash':
39 continue
40 result += '%20s: %s\n' % (key, obj.hash[key])
41 return result
42
43
mbligh5280e3b2008-12-22 14:39:28 +000044class RpcClient(object):
mbligh67647152008-11-19 00:18:14 +000045 """
mbligh451ede12009-02-12 21:54:03 +000046 Abstract RPC class for communicating with the autotest frontend
47 Inherited for both TKO and AFE uses.
mbligh67647152008-11-19 00:18:14 +000048
mbligh1ef218d2009-08-03 16:57:56 +000049 All the constructors go in the afe / tko class.
mbligh451ede12009-02-12 21:54:03 +000050 Manipulating methods go in the object classes themselves
mbligh67647152008-11-19 00:18:14 +000051 """
mbligh99b24f42009-06-08 16:45:55 +000052 def __init__(self, path, user, server, print_log, debug, reply_debug):
mbligh67647152008-11-19 00:18:14 +000053 """
mbligh451ede12009-02-12 21:54:03 +000054 Create a cached instance of a connection to the frontend
mbligh67647152008-11-19 00:18:14 +000055
56 user: username to connect as
mbligh451ede12009-02-12 21:54:03 +000057 server: frontend server to connect to
mbligh67647152008-11-19 00:18:14 +000058 print_log: pring a logging message to stdout on every operation
59 debug: print out all RPC traffic
60 """
mblighc31e4022008-12-11 19:32:30 +000061 if not user:
mblighdb59e3c2009-11-21 01:45:18 +000062 user = getpass.getuser()
mbligh451ede12009-02-12 21:54:03 +000063 if not server:
mbligh475f7762009-01-30 00:34:04 +000064 if 'AUTOTEST_WEB' in os.environ:
mbligh451ede12009-02-12 21:54:03 +000065 server = os.environ['AUTOTEST_WEB']
mbligh475f7762009-01-30 00:34:04 +000066 else:
mbligh451ede12009-02-12 21:54:03 +000067 server = GLOBAL_CONFIG.get_config_value('SERVER', 'hostname',
68 default=DEFAULT_SERVER)
69 self.server = server
mbligh67647152008-11-19 00:18:14 +000070 self.user = user
71 self.print_log = print_log
72 self.debug = debug
mbligh99b24f42009-06-08 16:45:55 +000073 self.reply_debug = reply_debug
jamesren1a2914a2010-02-12 00:44:31 +000074 http_server = 'http://' + server
75 headers = rpc_client_lib.authorization_headers(user, http_server)
76 rpc_server = http_server + path
mbligh1354c9d2008-12-22 14:56:13 +000077 if debug:
78 print 'SERVER: %s' % rpc_server
79 print 'HEADERS: %s' % headers
mbligh67647152008-11-19 00:18:14 +000080 self.proxy = rpc_client_lib.get_proxy(rpc_server, headers=headers)
81
82
83 def run(self, call, **dargs):
84 """
85 Make a RPC call to the AFE server
86 """
87 rpc_call = getattr(self.proxy, call)
88 if self.debug:
89 print 'DEBUG: %s %s' % (call, dargs)
mbligh451ede12009-02-12 21:54:03 +000090 try:
mbligh99b24f42009-06-08 16:45:55 +000091 result = utils.strip_unicode(rpc_call(**dargs))
92 if self.reply_debug:
93 print result
94 return result
mbligh451ede12009-02-12 21:54:03 +000095 except Exception:
96 print 'FAILED RPC CALL: %s %s' % (call, dargs)
97 raise
mbligh67647152008-11-19 00:18:14 +000098
99
100 def log(self, message):
101 if self.print_log:
102 print message
103
104
jamesrenc3940222010-02-19 21:57:37 +0000105class Planner(RpcClient):
106 def __init__(self, user=None, server=None, print_log=True, debug=False,
107 reply_debug=False):
108 super(Planner, self).__init__(path='/planner/server/rpc/',
109 user=user,
110 server=server,
111 print_log=print_log,
112 debug=debug,
113 reply_debug=reply_debug)
114
115
mbligh5280e3b2008-12-22 14:39:28 +0000116class TKO(RpcClient):
mbligh99b24f42009-06-08 16:45:55 +0000117 def __init__(self, user=None, server=None, print_log=True, debug=False,
118 reply_debug=False):
jamesren1a2914a2010-02-12 00:44:31 +0000119 super(TKO, self).__init__(path='/new_tko/server/rpc/',
mbligh99b24f42009-06-08 16:45:55 +0000120 user=user,
121 server=server,
122 print_log=print_log,
123 debug=debug,
124 reply_debug=reply_debug)
mblighc31e4022008-12-11 19:32:30 +0000125
126
127 def get_status_counts(self, job, **data):
128 entries = self.run('get_status_counts',
mbligh1ef218d2009-08-03 16:57:56 +0000129 group_by=['hostname', 'test_name', 'reason'],
mblighc31e4022008-12-11 19:32:30 +0000130 job_tag__startswith='%s-' % job, **data)
mbligh5280e3b2008-12-22 14:39:28 +0000131 return [TestStatus(self, e) for e in entries['groups']]
mblighc31e4022008-12-11 19:32:30 +0000132
133
mbligh5280e3b2008-12-22 14:39:28 +0000134class AFE(RpcClient):
mbligh17c75e62009-06-08 16:18:21 +0000135 def __init__(self, user=None, server=None, print_log=True, debug=False,
mbligh99b24f42009-06-08 16:45:55 +0000136 reply_debug=False, job=None):
mbligh17c75e62009-06-08 16:18:21 +0000137 self.job = job
jamesren1a2914a2010-02-12 00:44:31 +0000138 super(AFE, self).__init__(path='/afe/server/rpc/',
mbligh99b24f42009-06-08 16:45:55 +0000139 user=user,
140 server=server,
141 print_log=print_log,
142 debug=debug,
143 reply_debug=reply_debug)
mblighc31e4022008-12-11 19:32:30 +0000144
mbligh1ef218d2009-08-03 16:57:56 +0000145
mbligh67647152008-11-19 00:18:14 +0000146 def host_statuses(self, live=None):
jamesren121eee62010-04-13 19:10:12 +0000147 dead_statuses = ['Repair Failed', 'Repairing']
mbligh67647152008-11-19 00:18:14 +0000148 statuses = self.run('get_static_data')['host_statuses']
149 if live == True:
mblighc2847b72009-03-25 19:32:20 +0000150 return list(set(statuses) - set(dead_statuses))
mbligh67647152008-11-19 00:18:14 +0000151 if live == False:
152 return dead_statuses
153 else:
154 return statuses
155
156
mbligh71094012009-12-19 05:35:21 +0000157 @staticmethod
158 def _dict_for_host_query(hostnames=(), status=None, label=None):
159 query_args = {}
mbligh4e545a52009-12-19 05:30:39 +0000160 if hostnames:
161 query_args['hostname__in'] = hostnames
162 if status:
163 query_args['status'] = status
164 if label:
165 query_args['labels__name'] = label
mbligh71094012009-12-19 05:35:21 +0000166 return query_args
167
168
169 def get_hosts(self, hostnames=(), status=None, label=None, **dargs):
170 query_args = dict(dargs)
171 query_args.update(self._dict_for_host_query(hostnames=hostnames,
172 status=status,
173 label=label))
174 hosts = self.run('get_hosts', **query_args)
175 return [Host(self, h) for h in hosts]
176
177
178 def get_hostnames(self, status=None, label=None, **dargs):
179 """Like get_hosts() but returns hostnames instead of Host objects."""
180 # This implementation can be replaced with a more efficient one
181 # that does not query for entire host objects in the future.
182 return [host_obj.hostname for host_obj in
183 self.get_hosts(status=status, label=label, **dargs)]
184
185
186 def reverify_hosts(self, hostnames=(), status=None, label=None):
187 query_args = dict(locked=False,
188 aclgroup__users__login=self.user)
189 query_args.update(self._dict_for_host_query(hostnames=hostnames,
190 status=status,
191 label=label))
mbligh4e545a52009-12-19 05:30:39 +0000192 return self.run('reverify_hosts', **query_args)
193
194
mbligh67647152008-11-19 00:18:14 +0000195 def create_host(self, hostname, **dargs):
mbligh54459c72009-01-21 19:26:44 +0000196 id = self.run('add_host', hostname=hostname, **dargs)
mbligh67647152008-11-19 00:18:14 +0000197 return self.get_hosts(id=id)[0]
198
199
Chris Masone8abb6fc2012-01-31 09:27:36 -0800200 def set_host_attribute(self, attr, val, **dargs):
201 self.run('set_host_attribute', attribute=attr, value=val, **dargs)
202
203
mbligh67647152008-11-19 00:18:14 +0000204 def get_labels(self, **dargs):
205 labels = self.run('get_labels', **dargs)
mbligh5280e3b2008-12-22 14:39:28 +0000206 return [Label(self, l) for l in labels]
mbligh67647152008-11-19 00:18:14 +0000207
208
209 def create_label(self, name, **dargs):
mbligh54459c72009-01-21 19:26:44 +0000210 id = self.run('add_label', name=name, **dargs)
mbligh67647152008-11-19 00:18:14 +0000211 return self.get_labels(id=id)[0]
212
213
214 def get_acls(self, **dargs):
215 acls = self.run('get_acl_groups', **dargs)
mbligh5280e3b2008-12-22 14:39:28 +0000216 return [Acl(self, a) for a in acls]
mbligh67647152008-11-19 00:18:14 +0000217
218
219 def create_acl(self, name, **dargs):
mbligh54459c72009-01-21 19:26:44 +0000220 id = self.run('add_acl_group', name=name, **dargs)
mbligh67647152008-11-19 00:18:14 +0000221 return self.get_acls(id=id)[0]
222
223
mbligh54459c72009-01-21 19:26:44 +0000224 def get_users(self, **dargs):
225 users = self.run('get_users', **dargs)
226 return [User(self, u) for u in users]
227
228
mbligh1354c9d2008-12-22 14:56:13 +0000229 def generate_control_file(self, tests, **dargs):
230 ret = self.run('generate_control_file', tests=tests, **dargs)
231 return ControlFile(self, ret)
232
233
mbligh67647152008-11-19 00:18:14 +0000234 def get_jobs(self, summary=False, **dargs):
235 if summary:
236 jobs_data = self.run('get_jobs_summary', **dargs)
237 else:
238 jobs_data = self.run('get_jobs', **dargs)
mblighafbba0c2009-06-08 16:44:45 +0000239 jobs = []
240 for j in jobs_data:
241 job = Job(self, j)
242 # Set up some extra information defaults
243 job.testname = re.sub('\s.*', '', job.name) # arbitrary default
244 job.platform_results = {}
245 job.platform_reasons = {}
246 jobs.append(job)
247 return jobs
mbligh67647152008-11-19 00:18:14 +0000248
249
250 def get_host_queue_entries(self, **data):
251 entries = self.run('get_host_queue_entries', **data)
mblighf9e35862009-02-26 01:03:11 +0000252 job_statuses = [JobStatus(self, e) for e in entries]
mbligh99b24f42009-06-08 16:45:55 +0000253
254 # Sadly, get_host_queue_entries doesn't return platforms, we have
255 # to get those back from an explicit get_hosts queury, then patch
256 # the new host objects back into the host list.
257 hostnames = [s.host.hostname for s in job_statuses if s.host]
258 host_hash = {}
259 for host in self.get_hosts(hostname__in=hostnames):
260 host_hash[host.hostname] = host
261 for status in job_statuses:
262 if status.host:
263 status.host = host_hash[status.host.hostname]
mblighf9e35862009-02-26 01:03:11 +0000264 # filter job statuses that have either host or meta_host
265 return [status for status in job_statuses if (status.host or
266 status.meta_host)]
mbligh67647152008-11-19 00:18:14 +0000267
268
mblighb9db5162009-04-17 22:21:41 +0000269 def create_job_by_test(self, tests, kernel=None, use_container=False,
Eric Lie0493a42010-11-15 13:05:43 -0800270 kernel_cmdline=None, **dargs):
mbligh67647152008-11-19 00:18:14 +0000271 """
272 Given a test name, fetch the appropriate control file from the server
mbligh4e576612008-12-22 14:56:36 +0000273 and submit it.
274
Eric Lie0493a42010-11-15 13:05:43 -0800275 @param kernel: A comma separated list of kernel versions to boot.
276 @param kernel_cmdline: The command line used to boot all kernels listed
277 in the kernel parameter.
278
mbligh4e576612008-12-22 14:56:36 +0000279 Returns a list of job objects
mbligh67647152008-11-19 00:18:14 +0000280 """
mblighb9db5162009-04-17 22:21:41 +0000281 assert ('hosts' in dargs or
282 'atomic_group_name' in dargs and 'synch_count' in dargs)
showarda2cd72b2009-10-01 18:43:53 +0000283 if kernel:
284 kernel_list = re.split('[\s,]+', kernel.strip())
Eric Lie0493a42010-11-15 13:05:43 -0800285 kernel_info = []
286 for version in kernel_list:
287 kernel_dict = {'version': version}
288 if kernel_cmdline is not None:
289 kernel_dict['cmdline'] = kernel_cmdline
290 kernel_info.append(kernel_dict)
showarda2cd72b2009-10-01 18:43:53 +0000291 else:
292 kernel_info = None
293 control_file = self.generate_control_file(
Dale Curtis74a314b2011-06-23 14:55:46 -0700294 tests=tests, kernel=kernel_info, use_container=use_container)
mbligh1354c9d2008-12-22 14:56:13 +0000295 if control_file.is_server:
mbligh67647152008-11-19 00:18:14 +0000296 dargs['control_type'] = 'Server'
297 else:
298 dargs['control_type'] = 'Client'
299 dargs['dependencies'] = dargs.get('dependencies', []) + \
mbligh1354c9d2008-12-22 14:56:13 +0000300 control_file.dependencies
301 dargs['control_file'] = control_file.control_file
mbligh672666c2009-07-28 23:22:13 +0000302 if not dargs.get('synch_count', None):
mblighc99fccf2009-07-11 00:59:33 +0000303 dargs['synch_count'] = control_file.synch_count
mblighb9db5162009-04-17 22:21:41 +0000304 if 'hosts' in dargs and len(dargs['hosts']) < dargs['synch_count']:
305 # will not be able to satisfy this request
mbligh38b09152009-04-28 18:34:25 +0000306 return None
307 return self.create_job(**dargs)
mbligh67647152008-11-19 00:18:14 +0000308
309
310 def create_job(self, control_file, name=' ', priority='Medium',
311 control_type='Client', **dargs):
312 id = self.run('create_job', name=name, priority=priority,
313 control_file=control_file, control_type=control_type, **dargs)
314 return self.get_jobs(id=id)[0]
315
316
mbligh282ce892010-01-06 18:40:17 +0000317 def run_test_suites(self, pairings, kernel, kernel_label=None,
318 priority='Medium', wait=True, poll_interval=10,
jamesren37d4a612010-06-04 22:30:56 +0000319 email_from=None, email_to=None, timeout=168,
Eric Lie0493a42010-11-15 13:05:43 -0800320 max_runtime_hrs=168, kernel_cmdline=None):
mbligh5b618382008-12-03 15:24:01 +0000321 """
322 Run a list of test suites on a particular kernel.
mbligh1ef218d2009-08-03 16:57:56 +0000323
mbligh5b618382008-12-03 15:24:01 +0000324 Poll for them to complete, and return whether they worked or not.
mbligh1ef218d2009-08-03 16:57:56 +0000325
mbligh282ce892010-01-06 18:40:17 +0000326 @param pairings: List of MachineTestPairing objects to invoke.
327 @param kernel: Name of the kernel to run.
328 @param kernel_label: Label (string) of the kernel to run such as
329 '<kernel-version> : <config> : <date>'
330 If any pairing object has its job_label attribute set it
331 will override this value for that particular job.
Eric Lie0493a42010-11-15 13:05:43 -0800332 @param kernel_cmdline: The command line to boot the kernel(s) with.
mbligh282ce892010-01-06 18:40:17 +0000333 @param wait: boolean - Wait for the results to come back?
334 @param poll_interval: Interval between polling for job results (in mins)
335 @param email_from: Send notification email upon completion from here.
336 @param email_from: Send notification email upon completion to here.
mbligh5b618382008-12-03 15:24:01 +0000337 """
338 jobs = []
339 for pairing in pairings:
mbligh0c4f8d72009-05-12 20:52:18 +0000340 try:
341 new_job = self.invoke_test(pairing, kernel, kernel_label,
jamesren37d4a612010-06-04 22:30:56 +0000342 priority, timeout=timeout,
Eric Lie0493a42010-11-15 13:05:43 -0800343 kernel_cmdline=kernel_cmdline,
jamesren37d4a612010-06-04 22:30:56 +0000344 max_runtime_hrs=max_runtime_hrs)
mbligh0c4f8d72009-05-12 20:52:18 +0000345 if not new_job:
346 continue
mbligh0c4f8d72009-05-12 20:52:18 +0000347 jobs.append(new_job)
348 except Exception, e:
349 traceback.print_exc()
mblighb9db5162009-04-17 22:21:41 +0000350 if not wait or not jobs:
mbligh5b618382008-12-03 15:24:01 +0000351 return
mbligh5280e3b2008-12-22 14:39:28 +0000352 tko = TKO()
mbligh5b618382008-12-03 15:24:01 +0000353 while True:
354 time.sleep(60 * poll_interval)
mbligh5280e3b2008-12-22 14:39:28 +0000355 result = self.poll_all_jobs(tko, jobs, email_from, email_to)
mbligh5b618382008-12-03 15:24:01 +0000356 if result is not None:
357 return result
358
359
mbligh45ffc432008-12-09 23:35:17 +0000360 def result_notify(self, job, email_from, email_to):
mbligh5b618382008-12-03 15:24:01 +0000361 """
mbligh45ffc432008-12-09 23:35:17 +0000362 Notify about the result of a job. Will always print, if email data
363 is provided, will send email for it as well.
364
365 job: job object to notify about
366 email_from: send notification email upon completion from here
367 email_from: send notification email upon completion to here
368 """
369 if job.result == True:
370 subject = 'Testing PASSED: '
371 else:
372 subject = 'Testing FAILED: '
373 subject += '%s : %s\n' % (job.name, job.id)
374 text = []
375 for platform in job.results_platform_map:
376 for status in job.results_platform_map[platform]:
377 if status == 'Total':
378 continue
mbligh451ede12009-02-12 21:54:03 +0000379 for host in job.results_platform_map[platform][status]:
380 text.append('%20s %10s %10s' % (platform, status, host))
381 if status == 'Failed':
382 for test_status in job.test_status[host].fail:
383 text.append('(%s, %s) : %s' % \
384 (host, test_status.test_name,
385 test_status.reason))
386 text.append('')
mbligh37eceaa2008-12-15 22:56:37 +0000387
mbligh451ede12009-02-12 21:54:03 +0000388 base_url = 'http://' + self.server
mbligh37eceaa2008-12-15 22:56:37 +0000389
390 params = ('columns=test',
391 'rows=machine_group',
392 "condition=tag~'%s-%%25'" % job.id,
393 'title=Report')
394 query_string = '&'.join(params)
mbligh451ede12009-02-12 21:54:03 +0000395 url = '%s/tko/compose_query.cgi?%s' % (base_url, query_string)
396 text.append(url + '\n')
397 url = '%s/afe/#tab_id=view_job&object_id=%s' % (base_url, job.id)
398 text.append(url + '\n')
mbligh37eceaa2008-12-15 22:56:37 +0000399
400 body = '\n'.join(text)
401 print '---------------------------------------------------'
402 print 'Subject: ', subject
mbligh45ffc432008-12-09 23:35:17 +0000403 print body
mbligh37eceaa2008-12-15 22:56:37 +0000404 print '---------------------------------------------------'
mbligh45ffc432008-12-09 23:35:17 +0000405 if email_from and email_to:
mbligh37eceaa2008-12-15 22:56:37 +0000406 print 'Sending email ...'
mbligh45ffc432008-12-09 23:35:17 +0000407 utils.send_email(email_from, email_to, subject, body)
408 print
mbligh37eceaa2008-12-15 22:56:37 +0000409
mbligh45ffc432008-12-09 23:35:17 +0000410
mbligh1354c9d2008-12-22 14:56:13 +0000411 def print_job_result(self, job):
412 """
413 Print the result of a single job.
414 job: a job object
415 """
416 if job.result is None:
417 print 'PENDING',
418 elif job.result == True:
419 print 'PASSED',
420 elif job.result == False:
421 print 'FAILED',
mbligh912c3f32009-03-25 19:31:30 +0000422 elif job.result == "Abort":
423 print 'ABORT',
mbligh1354c9d2008-12-22 14:56:13 +0000424 print ' %s : %s' % (job.id, job.name)
425
426
mbligh451ede12009-02-12 21:54:03 +0000427 def poll_all_jobs(self, tko, jobs, email_from=None, email_to=None):
mbligh45ffc432008-12-09 23:35:17 +0000428 """
429 Poll all jobs in a list.
430 jobs: list of job objects to poll
431 email_from: send notification email upon completion from here
432 email_from: send notification email upon completion to here
433
434 Returns:
mbligh5b618382008-12-03 15:24:01 +0000435 a) All complete successfully (return True)
436 b) One or more has failed (return False)
437 c) Cannot tell yet (return None)
438 """
mbligh45ffc432008-12-09 23:35:17 +0000439 results = []
mbligh5b618382008-12-03 15:24:01 +0000440 for job in jobs:
mbligh676dcbe2009-06-15 21:57:27 +0000441 if getattr(job, 'result', None) is None:
Chris Masone6fed6462011-10-20 16:36:43 -0700442 job.result = self.poll_job_results(tko, job)
mbligh676dcbe2009-06-15 21:57:27 +0000443 if job.result is not None:
444 self.result_notify(job, email_from, email_to)
mbligh45ffc432008-12-09 23:35:17 +0000445
mbligh676dcbe2009-06-15 21:57:27 +0000446 results.append(job.result)
mbligh1354c9d2008-12-22 14:56:13 +0000447 self.print_job_result(job)
mbligh45ffc432008-12-09 23:35:17 +0000448
449 if None in results:
450 return None
mbligh912c3f32009-03-25 19:31:30 +0000451 elif False in results or "Abort" in results:
mbligh45ffc432008-12-09 23:35:17 +0000452 return False
453 else:
454 return True
mbligh5b618382008-12-03 15:24:01 +0000455
456
mbligh1f23f362008-12-22 14:46:12 +0000457 def _included_platform(self, host, platforms):
458 """
459 See if host's platforms matches any of the patterns in the included
460 platforms list.
461 """
462 if not platforms:
463 return True # No filtering of platforms
464 for platform in platforms:
465 if re.search(platform, host.platform):
466 return True
467 return False
468
469
mbligh7b312282009-01-07 16:45:43 +0000470 def invoke_test(self, pairing, kernel, kernel_label, priority='Medium',
Eric Lie0493a42010-11-15 13:05:43 -0800471 kernel_cmdline=None, **dargs):
mbligh5b618382008-12-03 15:24:01 +0000472 """
473 Given a pairing of a control file to a machine label, find all machines
474 with that label, and submit that control file to them.
mbligh1ef218d2009-08-03 16:57:56 +0000475
mbligh282ce892010-01-06 18:40:17 +0000476 @param kernel_label: Label (string) of the kernel to run such as
477 '<kernel-version> : <config> : <date>'
478 If any pairing object has its job_label attribute set it
479 will override this value for that particular job.
480
481 @returns A list of job objects.
mbligh5b618382008-12-03 15:24:01 +0000482 """
mbligh282ce892010-01-06 18:40:17 +0000483 # The pairing can override the job label.
484 if pairing.job_label:
485 kernel_label = pairing.job_label
mbligh5b618382008-12-03 15:24:01 +0000486 job_name = '%s : %s' % (pairing.machine_label, kernel_label)
487 hosts = self.get_hosts(multiple_labels=[pairing.machine_label])
mbligh1f23f362008-12-22 14:46:12 +0000488 platforms = pairing.platforms
489 hosts = [h for h in hosts if self._included_platform(h, platforms)]
mblighc2847b72009-03-25 19:32:20 +0000490 dead_statuses = self.host_statuses(live=False)
491 host_list = [h.hostname for h in hosts if h.status not in dead_statuses]
mbligh1f23f362008-12-22 14:46:12 +0000492 print 'HOSTS: %s' % host_list
mblighb9db5162009-04-17 22:21:41 +0000493 if pairing.atomic_group_sched:
mblighc99fccf2009-07-11 00:59:33 +0000494 dargs['synch_count'] = pairing.synch_count
mblighb9db5162009-04-17 22:21:41 +0000495 dargs['atomic_group_name'] = pairing.machine_label
496 else:
497 dargs['hosts'] = host_list
mbligh38b09152009-04-28 18:34:25 +0000498 new_job = self.create_job_by_test(name=job_name,
mbligh17c75e62009-06-08 16:18:21 +0000499 dependencies=[pairing.machine_label],
500 tests=[pairing.control_file],
501 priority=priority,
502 kernel=kernel,
Eric Lie0493a42010-11-15 13:05:43 -0800503 kernel_cmdline=kernel_cmdline,
mbligh17c75e62009-06-08 16:18:21 +0000504 use_container=pairing.container,
505 **dargs)
mbligh38b09152009-04-28 18:34:25 +0000506 if new_job:
mbligh17c75e62009-06-08 16:18:21 +0000507 if pairing.testname:
508 new_job.testname = pairing.testname
mbligh4e576612008-12-22 14:56:36 +0000509 print 'Invoked test %s : %s' % (new_job.id, job_name)
mbligh38b09152009-04-28 18:34:25 +0000510 return new_job
mbligh5b618382008-12-03 15:24:01 +0000511
512
mblighb9db5162009-04-17 22:21:41 +0000513 def _job_test_results(self, tko, job, debug, tests=[]):
mbligh5b618382008-12-03 15:24:01 +0000514 """
mbligh5280e3b2008-12-22 14:39:28 +0000515 Retrieve test results for a job
mbligh5b618382008-12-03 15:24:01 +0000516 """
mbligh5280e3b2008-12-22 14:39:28 +0000517 job.test_status = {}
518 try:
519 test_statuses = tko.get_status_counts(job=job.id)
520 except Exception:
521 print "Ignoring exception on poll job; RPC interface is flaky"
522 traceback.print_exc()
523 return
524
525 for test_status in test_statuses:
mbligh7479a182009-01-07 16:46:24 +0000526 # SERVER_JOB is buggy, and often gives false failures. Ignore it.
527 if test_status.test_name == 'SERVER_JOB':
528 continue
mblighb9db5162009-04-17 22:21:41 +0000529 # if tests is not empty, restrict list of test_statuses to tests
530 if tests and test_status.test_name not in tests:
531 continue
mbligh451ede12009-02-12 21:54:03 +0000532 if debug:
533 print test_status
mbligh5280e3b2008-12-22 14:39:28 +0000534 hostname = test_status.hostname
535 if hostname not in job.test_status:
536 job.test_status[hostname] = TestResults()
537 job.test_status[hostname].add(test_status)
538
539
mbligh451ede12009-02-12 21:54:03 +0000540 def _job_results_platform_map(self, job, debug):
mblighc9e427e2009-04-28 18:35:06 +0000541 # Figure out which hosts passed / failed / aborted in a job
542 # Creates a 2-dimensional hash, stored as job.results_platform_map
543 # 1st index - platform type (string)
544 # 2nd index - Status (string)
545 # 'Completed' / 'Failed' / 'Aborted'
546 # Data indexed by this hash is a list of hostnames (text strings)
mbligh5280e3b2008-12-22 14:39:28 +0000547 job.results_platform_map = {}
mbligh5b618382008-12-03 15:24:01 +0000548 try:
mbligh45ffc432008-12-09 23:35:17 +0000549 job_statuses = self.get_host_queue_entries(job=job.id)
mbligh5b618382008-12-03 15:24:01 +0000550 except Exception:
551 print "Ignoring exception on poll job; RPC interface is flaky"
552 traceback.print_exc()
553 return None
mbligh5280e3b2008-12-22 14:39:28 +0000554
mbligh5b618382008-12-03 15:24:01 +0000555 platform_map = {}
mbligh5280e3b2008-12-22 14:39:28 +0000556 job.job_status = {}
mbligh451ede12009-02-12 21:54:03 +0000557 job.metahost_index = {}
mbligh5b618382008-12-03 15:24:01 +0000558 for job_status in job_statuses:
mblighc9e427e2009-04-28 18:35:06 +0000559 # This is basically "for each host / metahost in the job"
mbligh451ede12009-02-12 21:54:03 +0000560 if job_status.host:
561 hostname = job_status.host.hostname
562 else: # This is a metahost
563 metahost = job_status.meta_host
564 index = job.metahost_index.get(metahost, 1)
565 job.metahost_index[metahost] = index + 1
566 hostname = '%s.%s' % (metahost, index)
mbligh5280e3b2008-12-22 14:39:28 +0000567 job.job_status[hostname] = job_status.status
mbligh5b618382008-12-03 15:24:01 +0000568 status = job_status.status
mbligh0ecbe632009-05-13 21:34:56 +0000569 # Skip hosts that failed verify or repair:
570 # that's a machine failure, not a job failure
mbligh451ede12009-02-12 21:54:03 +0000571 if hostname in job.test_status:
572 verify_failed = False
573 for failure in job.test_status[hostname].fail:
mbligh0ecbe632009-05-13 21:34:56 +0000574 if (failure.test_name == 'verify' or
575 failure.test_name == 'repair'):
mbligh451ede12009-02-12 21:54:03 +0000576 verify_failed = True
577 break
578 if verify_failed:
579 continue
mblighc9e427e2009-04-28 18:35:06 +0000580 if hostname in job.test_status and job.test_status[hostname].fail:
581 # If the any tests failed in the job, we want to mark the
582 # job result as failed, overriding the default job status.
583 if status != "Aborted": # except if it's an aborted job
584 status = 'Failed'
mbligh451ede12009-02-12 21:54:03 +0000585 if job_status.host:
586 platform = job_status.host.platform
587 else: # This is a metahost
588 platform = job_status.meta_host
mbligh5b618382008-12-03 15:24:01 +0000589 if platform not in platform_map:
590 platform_map[platform] = {'Total' : [hostname]}
591 else:
592 platform_map[platform]['Total'].append(hostname)
593 new_host_list = platform_map[platform].get(status, []) + [hostname]
594 platform_map[platform][status] = new_host_list
mbligh45ffc432008-12-09 23:35:17 +0000595 job.results_platform_map = platform_map
mbligh5280e3b2008-12-22 14:39:28 +0000596
597
mbligh17c75e62009-06-08 16:18:21 +0000598 def set_platform_results(self, test_job, platform, result):
599 """
600 Result must be None, 'FAIL', 'WARN' or 'GOOD'
601 """
602 if test_job.platform_results[platform] is not None:
603 # We're already done, and results recorded. This can't change later.
604 return
605 test_job.platform_results[platform] = result
606 # Note that self.job refers to the metajob we're IN, not the job
607 # that we're excuting from here.
608 testname = '%s.%s' % (test_job.testname, platform)
609 if self.job:
610 self.job.record(result, None, testname, status='')
611
Chris Masone6fed6462011-10-20 16:36:43 -0700612 def poll_job_results(self, tko, job, enough=1, debug=False):
mbligh5280e3b2008-12-22 14:39:28 +0000613 """
Chris Masone3a560bd2011-11-14 16:53:56 -0800614 Analyse all job results by platform
mbligh1ef218d2009-08-03 16:57:56 +0000615
Chris Masone3a560bd2011-11-14 16:53:56 -0800616 params:
617 tko: a TKO object representing the results DB.
618 job: the job to be examined.
Chris Masone6fed6462011-10-20 16:36:43 -0700619 enough: the acceptable delta between the number of completed
620 tests and the total number of tests.
Chris Masone3a560bd2011-11-14 16:53:56 -0800621 debug: enable debugging output.
622
623 returns:
Chris Masone6fed6462011-10-20 16:36:43 -0700624 False: if any platform has more than |enough| failures
625 None: if any platform has less than |enough| machines
Chris Masone3a560bd2011-11-14 16:53:56 -0800626 not yet Good.
Chris Masone6fed6462011-10-20 16:36:43 -0700627 True: if all platforms have at least |enough| machines
Chris Masone3a560bd2011-11-14 16:53:56 -0800628 Good.
mbligh5280e3b2008-12-22 14:39:28 +0000629 """
mbligh451ede12009-02-12 21:54:03 +0000630 self._job_test_results(tko, job, debug)
mblighe7fcf562009-05-21 01:43:17 +0000631 if job.test_status == {}:
632 return None
mbligh451ede12009-02-12 21:54:03 +0000633 self._job_results_platform_map(job, debug)
mbligh5280e3b2008-12-22 14:39:28 +0000634
mbligh5b618382008-12-03 15:24:01 +0000635 good_platforms = []
mbligh912c3f32009-03-25 19:31:30 +0000636 failed_platforms = []
637 aborted_platforms = []
mbligh5b618382008-12-03 15:24:01 +0000638 unknown_platforms = []
mbligh5280e3b2008-12-22 14:39:28 +0000639 platform_map = job.results_platform_map
mbligh5b618382008-12-03 15:24:01 +0000640 for platform in platform_map:
mbligh17c75e62009-06-08 16:18:21 +0000641 if not job.platform_results.has_key(platform):
642 # record test start, but there's no way to do this right now
643 job.platform_results[platform] = None
mbligh5b618382008-12-03 15:24:01 +0000644 total = len(platform_map[platform]['Total'])
645 completed = len(platform_map[platform].get('Completed', []))
mbligh912c3f32009-03-25 19:31:30 +0000646 failed = len(platform_map[platform].get('Failed', []))
647 aborted = len(platform_map[platform].get('Aborted', []))
mbligh17c75e62009-06-08 16:18:21 +0000648
mbligh1ef218d2009-08-03 16:57:56 +0000649 # We set up what we want to record here, but don't actually do
mbligh17c75e62009-06-08 16:18:21 +0000650 # it yet, until we have a decisive answer for this platform
651 if aborted or failed:
652 bad = aborted + failed
653 if (bad > 1) or (bad * 2 >= total):
654 platform_test_result = 'FAIL'
655 else:
656 platform_test_result = 'WARN'
657
Chris Masone6fed6462011-10-20 16:36:43 -0700658 if aborted > enough:
mbligh912c3f32009-03-25 19:31:30 +0000659 aborted_platforms.append(platform)
mbligh17c75e62009-06-08 16:18:21 +0000660 self.set_platform_results(job, platform, platform_test_result)
Chris Masone6fed6462011-10-20 16:36:43 -0700661 elif (failed * 2 >= total) or (failed > enough):
mbligh912c3f32009-03-25 19:31:30 +0000662 failed_platforms.append(platform)
mbligh17c75e62009-06-08 16:18:21 +0000663 self.set_platform_results(job, platform, platform_test_result)
Chris Masone6fed6462011-10-20 16:36:43 -0700664 elif (completed >= enough) and (completed + enough >= total):
mbligh5b618382008-12-03 15:24:01 +0000665 good_platforms.append(platform)
mbligh17c75e62009-06-08 16:18:21 +0000666 self.set_platform_results(job, platform, 'GOOD')
mbligh5b618382008-12-03 15:24:01 +0000667 else:
668 unknown_platforms.append(platform)
669 detail = []
670 for status in platform_map[platform]:
671 if status == 'Total':
672 continue
673 detail.append('%s=%s' % (status,platform_map[platform][status]))
674 if debug:
mbligh1ef218d2009-08-03 16:57:56 +0000675 print '%20s %d/%d %s' % (platform, completed, total,
mbligh5b618382008-12-03 15:24:01 +0000676 ' '.join(detail))
677 print
mbligh1ef218d2009-08-03 16:57:56 +0000678
mbligh912c3f32009-03-25 19:31:30 +0000679 if len(aborted_platforms) > 0:
mbligh5b618382008-12-03 15:24:01 +0000680 if debug:
mbligh17c75e62009-06-08 16:18:21 +0000681 print 'Result aborted - platforms: ',
682 print ' '.join(aborted_platforms)
mbligh912c3f32009-03-25 19:31:30 +0000683 return "Abort"
684 if len(failed_platforms) > 0:
685 if debug:
686 print 'Result bad - platforms: ' + ' '.join(failed_platforms)
mbligh5b618382008-12-03 15:24:01 +0000687 return False
688 if len(unknown_platforms) > 0:
689 if debug:
690 platform_list = ' '.join(unknown_platforms)
691 print 'Result unknown - platforms: ', platform_list
692 return None
693 if debug:
694 platform_list = ' '.join(good_platforms)
695 print 'Result good - all platforms passed: ', platform_list
696 return True
697
698
mbligh5280e3b2008-12-22 14:39:28 +0000699class TestResults(object):
700 """
701 Container class used to hold the results of the tests for a job
702 """
703 def __init__(self):
704 self.good = []
705 self.fail = []
mbligh451ede12009-02-12 21:54:03 +0000706 self.pending = []
mbligh5280e3b2008-12-22 14:39:28 +0000707
708
709 def add(self, result):
mbligh451ede12009-02-12 21:54:03 +0000710 if result.complete_count > result.pass_count:
711 self.fail.append(result)
712 elif result.incomplete_count > 0:
713 self.pending.append(result)
mbligh5280e3b2008-12-22 14:39:28 +0000714 else:
mbligh451ede12009-02-12 21:54:03 +0000715 self.good.append(result)
mbligh5280e3b2008-12-22 14:39:28 +0000716
717
718class RpcObject(object):
mbligh67647152008-11-19 00:18:14 +0000719 """
720 Generic object used to construct python objects from rpc calls
721 """
722 def __init__(self, afe, hash):
723 self.afe = afe
724 self.hash = hash
725 self.__dict__.update(hash)
726
727
728 def __str__(self):
729 return dump_object(self.__repr__(), self)
730
731
mbligh1354c9d2008-12-22 14:56:13 +0000732class ControlFile(RpcObject):
733 """
734 AFE control file object
735
736 Fields: synch_count, dependencies, control_file, is_server
737 """
738 def __repr__(self):
739 return 'CONTROL FILE: %s' % self.control_file
740
741
mbligh5280e3b2008-12-22 14:39:28 +0000742class Label(RpcObject):
mbligh67647152008-11-19 00:18:14 +0000743 """
744 AFE label object
745
746 Fields:
747 name, invalid, platform, kernel_config, id, only_if_needed
748 """
749 def __repr__(self):
750 return 'LABEL: %s' % self.name
751
752
753 def add_hosts(self, hosts):
Chris Masone3a560bd2011-11-14 16:53:56 -0800754 return self.afe.run('label_add_hosts', id=self.id, hosts=hosts)
mbligh67647152008-11-19 00:18:14 +0000755
756
757 def remove_hosts(self, hosts):
Chris Masone3a560bd2011-11-14 16:53:56 -0800758 return self.afe.run('label_remove_hosts', id=self.id, hosts=hosts)
mbligh67647152008-11-19 00:18:14 +0000759
760
mbligh5280e3b2008-12-22 14:39:28 +0000761class Acl(RpcObject):
mbligh67647152008-11-19 00:18:14 +0000762 """
763 AFE acl object
764
765 Fields:
766 users, hosts, description, name, id
767 """
768 def __repr__(self):
769 return 'ACL: %s' % self.name
770
771
772 def add_hosts(self, hosts):
773 self.afe.log('Adding hosts %s to ACL %s' % (hosts, self.name))
774 return self.afe.run('acl_group_add_hosts', self.id, hosts)
775
776
777 def remove_hosts(self, hosts):
778 self.afe.log('Removing hosts %s from ACL %s' % (hosts, self.name))
779 return self.afe.run('acl_group_remove_hosts', self.id, hosts)
780
781
mbligh54459c72009-01-21 19:26:44 +0000782 def add_users(self, users):
783 self.afe.log('Adding users %s to ACL %s' % (users, self.name))
784 return self.afe.run('acl_group_add_users', id=self.name, users=users)
785
786
mbligh5280e3b2008-12-22 14:39:28 +0000787class Job(RpcObject):
mbligh67647152008-11-19 00:18:14 +0000788 """
789 AFE job object
790
791 Fields:
792 name, control_file, control_type, synch_count, reboot_before,
793 run_verify, priority, email_list, created_on, dependencies,
794 timeout, owner, reboot_after, id
795 """
796 def __repr__(self):
797 return 'JOB: %s' % self.id
798
799
mbligh5280e3b2008-12-22 14:39:28 +0000800class JobStatus(RpcObject):
mbligh67647152008-11-19 00:18:14 +0000801 """
802 AFE job_status object
803
804 Fields:
805 status, complete, deleted, meta_host, host, active, execution_subdir, id
806 """
807 def __init__(self, afe, hash):
808 # This should call super
809 self.afe = afe
810 self.hash = hash
811 self.__dict__.update(hash)
mbligh5280e3b2008-12-22 14:39:28 +0000812 self.job = Job(afe, self.job)
Dale Curtis8adf7892011-09-08 16:13:36 -0700813 if getattr(self, 'host'):
mbligh99b24f42009-06-08 16:45:55 +0000814 self.host = Host(afe, self.host)
mbligh67647152008-11-19 00:18:14 +0000815
816
817 def __repr__(self):
mbligh451ede12009-02-12 21:54:03 +0000818 if self.host and self.host.hostname:
819 hostname = self.host.hostname
820 else:
821 hostname = 'None'
822 return 'JOB STATUS: %s-%s' % (self.job.id, hostname)
mbligh67647152008-11-19 00:18:14 +0000823
824
mbligh5280e3b2008-12-22 14:39:28 +0000825class Host(RpcObject):
mbligh67647152008-11-19 00:18:14 +0000826 """
827 AFE host object
828
829 Fields:
830 status, lock_time, locked_by, locked, hostname, invalid,
831 synch_id, labels, platform, protection, dirty, id
832 """
833 def __repr__(self):
834 return 'HOST OBJECT: %s' % self.hostname
835
836
837 def show(self):
838 labels = list(set(self.labels) - set([self.platform]))
839 print '%-6s %-7s %-7s %-16s %s' % (self.hostname, self.status,
840 self.locked, self.platform,
841 ', '.join(labels))
842
843
mbligh54459c72009-01-21 19:26:44 +0000844 def delete(self):
845 return self.afe.run('delete_host', id=self.id)
846
847
mbligh6463c4b2009-01-30 00:33:37 +0000848 def modify(self, **dargs):
849 return self.afe.run('modify_host', id=self.id, **dargs)
850
851
mbligh67647152008-11-19 00:18:14 +0000852 def get_acls(self):
853 return self.afe.get_acls(hosts__hostname=self.hostname)
854
855
856 def add_acl(self, acl_name):
857 self.afe.log('Adding ACL %s to host %s' % (acl_name, self.hostname))
858 return self.afe.run('acl_group_add_hosts', id=acl_name,
859 hosts=[self.hostname])
860
861
862 def remove_acl(self, acl_name):
863 self.afe.log('Removing ACL %s from host %s' % (acl_name, self.hostname))
864 return self.afe.run('acl_group_remove_hosts', id=acl_name,
865 hosts=[self.hostname])
866
867
868 def get_labels(self):
869 return self.afe.get_labels(host__hostname__in=[self.hostname])
870
871
872 def add_labels(self, labels):
873 self.afe.log('Adding labels %s to host %s' % (labels, self.hostname))
874 return self.afe.run('host_add_labels', id=self.id, labels=labels)
875
876
877 def remove_labels(self, labels):
878 self.afe.log('Removing labels %s from host %s' % (labels,self.hostname))
879 return self.afe.run('host_remove_labels', id=self.id, labels=labels)
mbligh5b618382008-12-03 15:24:01 +0000880
881
mbligh54459c72009-01-21 19:26:44 +0000882class User(RpcObject):
883 def __repr__(self):
884 return 'USER: %s' % self.login
885
886
mbligh5280e3b2008-12-22 14:39:28 +0000887class TestStatus(RpcObject):
mblighc31e4022008-12-11 19:32:30 +0000888 """
889 TKO test status object
890
891 Fields:
892 test_idx, hostname, testname, id
893 complete_count, incomplete_count, group_count, pass_count
894 """
895 def __repr__(self):
896 return 'TEST STATUS: %s' % self.id
897
898
mbligh5b618382008-12-03 15:24:01 +0000899class MachineTestPairing(object):
900 """
901 Object representing the pairing of a machine label with a control file
mbligh1f23f362008-12-22 14:46:12 +0000902
903 machine_label: use machines from this label
904 control_file: use this control file (by name in the frontend)
905 platforms: list of rexeps to filter platforms by. [] => no filtering
mbligh282ce892010-01-06 18:40:17 +0000906 job_label: The label (name) to give to the autotest job launched
907 to run this pairing. '<kernel-version> : <config> : <date>'
mbligh5b618382008-12-03 15:24:01 +0000908 """
mbligh1354c9d2008-12-22 14:56:13 +0000909 def __init__(self, machine_label, control_file, platforms=[],
mbligh17c75e62009-06-08 16:18:21 +0000910 container=False, atomic_group_sched=False, synch_count=0,
mbligh282ce892010-01-06 18:40:17 +0000911 testname=None, job_label=None):
mbligh5b618382008-12-03 15:24:01 +0000912 self.machine_label = machine_label
913 self.control_file = control_file
mbligh1f23f362008-12-22 14:46:12 +0000914 self.platforms = platforms
mbligh1354c9d2008-12-22 14:56:13 +0000915 self.container = container
mblighb9db5162009-04-17 22:21:41 +0000916 self.atomic_group_sched = atomic_group_sched
917 self.synch_count = synch_count
mbligh17c75e62009-06-08 16:18:21 +0000918 self.testname = testname
mbligh282ce892010-01-06 18:40:17 +0000919 self.job_label = job_label
mbligh1354c9d2008-12-22 14:56:13 +0000920
921
922 def __repr__(self):
923 return '%s %s %s %s' % (self.machine_label, self.control_file,
924 self.platforms, self.container)