blob: 49112cd23b8cce587cb160e21e4bf24b0f453404 [file] [log] [blame]
mbligh67647152008-11-19 00:18:14 +00001# Copyright Martin J. Bligh, Google Inc 2008
2# Released under the GPL v2
3
4"""
5This class allows you to communicate with the frontend to submit jobs etc
6It is designed for writing more sophisiticated server-side control files that
7can recursively add and manage other jobs.
8
9We turn the JSON dictionaries into real objects that are more idiomatic
10
mblighc31e4022008-12-11 19:32:30 +000011For docs, see:
12 http://autotest/afe/server/noauth/rpc/
13 http://autotest/new_tko/server/noauth/rpc/
14 http://docs.djangoproject.com/en/dev/ref/models/querysets/#queryset-api
mbligh67647152008-11-19 00:18:14 +000015"""
16
mbligh5b618382008-12-03 15:24:01 +000017import os, time, traceback
mbligh67647152008-11-19 00:18:14 +000018import common
19from autotest_lib.frontend.afe import rpc_client_lib
mbligh37eceaa2008-12-15 22:56:37 +000020from autotest_lib.client.common_lib import global_config
mbligh67647152008-11-19 00:18:14 +000021from autotest_lib.client.common_lib import utils
22
mbligh37eceaa2008-12-15 22:56:37 +000023GLOBAL_CONFIG = global_config.global_config
24DEFAULT_SERVER = 'autotest'
25
mbligh67647152008-11-19 00:18:14 +000026
27def dump_object(header, obj):
28 """
29 Standard way to print out the frontend objects (eg job, host, acl, label)
30 in a human-readable fashion for debugging
31 """
32 result = header + '\n'
33 for key in obj.hash:
34 if key == 'afe' or key == 'hash':
35 continue
36 result += '%20s: %s\n' % (key, obj.hash[key])
37 return result
38
39
mbligh5280e3b2008-12-22 14:39:28 +000040class RpcClient(object):
mbligh67647152008-11-19 00:18:14 +000041 """
42 AFE class for communicating with the autotest frontend
43
44 All the constructors go in the afe class.
45 Manipulating methods go in the classes themselves
46 """
mblighc31e4022008-12-11 19:32:30 +000047 def __init__(self, path, user, web_server, print_log, debug):
mbligh67647152008-11-19 00:18:14 +000048 """
49 Create a cached instance of a connection to the AFE
50
51 user: username to connect as
52 web_server: AFE instance to connect to
53 print_log: pring a logging message to stdout on every operation
54 debug: print out all RPC traffic
55 """
mblighc31e4022008-12-11 19:32:30 +000056 if not user:
57 user = os.environ.get('LOGNAME')
58 if not web_server:
mbligh37eceaa2008-12-15 22:56:37 +000059 web_server = 'http://' + GLOBAL_CONFIG.get_config_value(
60 'SERVER', 'hostname', default=DEFAULT_SERVER)
mbligh67647152008-11-19 00:18:14 +000061 self.user = user
62 self.print_log = print_log
63 self.debug = debug
64 headers = {'AUTHORIZATION' : self.user}
mblighc31e4022008-12-11 19:32:30 +000065 rpc_server = web_server + path
mbligh67647152008-11-19 00:18:14 +000066 self.proxy = rpc_client_lib.get_proxy(rpc_server, headers=headers)
67
68
69 def run(self, call, **dargs):
70 """
71 Make a RPC call to the AFE server
72 """
73 rpc_call = getattr(self.proxy, call)
74 if self.debug:
75 print 'DEBUG: %s %s' % (call, dargs)
76 return utils.strip_unicode(rpc_call(**dargs))
77
78
79 def log(self, message):
80 if self.print_log:
81 print message
82
83
mbligh5280e3b2008-12-22 14:39:28 +000084class TKO(RpcClient):
mblighc31e4022008-12-11 19:32:30 +000085 def __init__(self, user=None, web_server=None, print_log=True, debug=False):
mbligh5280e3b2008-12-22 14:39:28 +000086 super(TKO, self).__init__('/new_tko/server/noauth/rpc/', user,
mblighc31e4022008-12-11 19:32:30 +000087 web_server, print_log, debug)
88
89
90 def get_status_counts(self, job, **data):
91 entries = self.run('get_status_counts',
92 group_by=['hostname', 'test_name'],
93 job_tag__startswith='%s-' % job, **data)
mbligh5280e3b2008-12-22 14:39:28 +000094 return [TestStatus(self, e) for e in entries['groups']]
mblighc31e4022008-12-11 19:32:30 +000095
96
mbligh5280e3b2008-12-22 14:39:28 +000097class AFE(RpcClient):
mblighc31e4022008-12-11 19:32:30 +000098 def __init__(self, user=None, web_server=None, print_log=True, debug=False):
mbligh5280e3b2008-12-22 14:39:28 +000099 super(AFE, self).__init__('/afe/server/noauth/rpc/', user, web_server,
mblighc31e4022008-12-11 19:32:30 +0000100 print_log, debug)
101
102
mbligh67647152008-11-19 00:18:14 +0000103 def host_statuses(self, live=None):
104 dead_statuses = ['Dead', 'Repair Failed']
105 statuses = self.run('get_static_data')['host_statuses']
106 if live == True:
107 return list(set(statuses) - set(['Dead', 'Repair Failed']))
108 if live == False:
109 return dead_statuses
110 else:
111 return statuses
112
113
114 def get_hosts(self, **dargs):
115 hosts = self.run('get_hosts', **dargs)
mbligh5280e3b2008-12-22 14:39:28 +0000116 return [Host(self, h) for h in hosts]
mbligh67647152008-11-19 00:18:14 +0000117
118
119 def create_host(self, hostname, **dargs):
120 id = self.run('add_host', **dargs)
121 return self.get_hosts(id=id)[0]
122
123
124 def get_labels(self, **dargs):
125 labels = self.run('get_labels', **dargs)
mbligh5280e3b2008-12-22 14:39:28 +0000126 return [Label(self, l) for l in labels]
mbligh67647152008-11-19 00:18:14 +0000127
128
129 def create_label(self, name, **dargs):
130 id = self.run('add_label', **dargs)
131 return self.get_labels(id=id)[0]
132
133
134 def get_acls(self, **dargs):
135 acls = self.run('get_acl_groups', **dargs)
mbligh5280e3b2008-12-22 14:39:28 +0000136 return [Acl(self, a) for a in acls]
mbligh67647152008-11-19 00:18:14 +0000137
138
139 def create_acl(self, name, **dargs):
140 id = self.run('add_acl_group', **dargs)
141 return self.get_acls(id=id)[0]
142
143
144 def get_jobs(self, summary=False, **dargs):
145 if summary:
146 jobs_data = self.run('get_jobs_summary', **dargs)
147 else:
148 jobs_data = self.run('get_jobs', **dargs)
mbligh5280e3b2008-12-22 14:39:28 +0000149 return [Job(self, j) for j in jobs_data]
mbligh67647152008-11-19 00:18:14 +0000150
151
152 def get_host_queue_entries(self, **data):
153 entries = self.run('get_host_queue_entries', **data)
mbligh5280e3b2008-12-22 14:39:28 +0000154 return [JobStatus(self, e) for e in entries]
mbligh67647152008-11-19 00:18:14 +0000155
156
157 def create_job_by_test(self, tests, kernel=None, **dargs):
158 """
159 Given a test name, fetch the appropriate control file from the server
160 and submit it
161 """
162 results = self.run('generate_control_file', tests=tests, kernel=kernel,
163 use_container=False, do_push_packages=True)
164 if results['is_server']:
165 dargs['control_type'] = 'Server'
166 else:
167 dargs['control_type'] = 'Client'
168 dargs['dependencies'] = dargs.get('dependencies', []) + \
169 results['dependencies']
170 dargs['control_file'] = results['control_file']
171 dargs['synch_count'] = results['synch_count']
172 return self.create_job(**dargs)
173
174
175 def create_job(self, control_file, name=' ', priority='Medium',
176 control_type='Client', **dargs):
177 id = self.run('create_job', name=name, priority=priority,
178 control_file=control_file, control_type=control_type, **dargs)
179 return self.get_jobs(id=id)[0]
180
181
mbligh5b618382008-12-03 15:24:01 +0000182 def run_test_suites(self, pairings, kernel, kernel_label, wait=True,
mbligh45ffc432008-12-09 23:35:17 +0000183 poll_interval=5, email_from=None, email_to=None):
mbligh5b618382008-12-03 15:24:01 +0000184 """
185 Run a list of test suites on a particular kernel.
186
187 Poll for them to complete, and return whether they worked or not.
188
189 pairings: list of MachineTestPairing objects to invoke
190 kernel: name of the kernel to run
191 kernel_label: label of the kernel to run
192 (<kernel-version> : <config> : <date>)
193 wait: boolean - wait for the results to come back?
194 poll_interval: interval between polling for job results (in minutes)
mbligh45ffc432008-12-09 23:35:17 +0000195 email_from: send notification email upon completion from here
196 email_from: send notification email upon completion to here
mbligh5b618382008-12-03 15:24:01 +0000197 """
198 jobs = []
199 for pairing in pairings:
mbligh45ffc432008-12-09 23:35:17 +0000200 job = self.invoke_test(pairing, kernel, kernel_label)
201 job.notified = False
202 jobs.append(job)
mbligh5280e3b2008-12-22 14:39:28 +0000203 # disabled - this is just for debugging: mbligh
204 # if email_from and email_to:
205 # subject = 'Testing started: %s : %s' % (job.name, job.id)
206 # utils.send_email(email_from, email_to, subject, subject)
mbligh5b618382008-12-03 15:24:01 +0000207 if not wait:
208 return
mbligh5280e3b2008-12-22 14:39:28 +0000209 tko = TKO()
mbligh5b618382008-12-03 15:24:01 +0000210 while True:
211 time.sleep(60 * poll_interval)
mbligh5280e3b2008-12-22 14:39:28 +0000212 result = self.poll_all_jobs(tko, jobs, email_from, email_to)
mbligh5b618382008-12-03 15:24:01 +0000213 if result is not None:
214 return result
215
216
mbligh45ffc432008-12-09 23:35:17 +0000217 def result_notify(self, job, email_from, email_to):
mbligh5b618382008-12-03 15:24:01 +0000218 """
mbligh45ffc432008-12-09 23:35:17 +0000219 Notify about the result of a job. Will always print, if email data
220 is provided, will send email for it as well.
221
222 job: job object to notify about
223 email_from: send notification email upon completion from here
224 email_from: send notification email upon completion to here
225 """
226 if job.result == True:
227 subject = 'Testing PASSED: '
228 else:
229 subject = 'Testing FAILED: '
230 subject += '%s : %s\n' % (job.name, job.id)
231 text = []
232 for platform in job.results_platform_map:
233 for status in job.results_platform_map[platform]:
234 if status == 'Total':
235 continue
236 hosts = ','.join(job.results_platform_map[platform][status])
mbligh37eceaa2008-12-15 22:56:37 +0000237 text.append('%20s %10s %s\n' % (platform, status, hosts))
238
239 tko_base_url = 'http://%s/tko' % GLOBAL_CONFIG.get_config_value(
240 'SERVER', 'hostname', default=DEFAULT_SERVER)
241
242 params = ('columns=test',
243 'rows=machine_group',
244 "condition=tag~'%s-%%25'" % job.id,
245 'title=Report')
246 query_string = '&'.join(params)
247 url = '%s/compose_query.cgi?%s' % (tko_base_url, query_string)
248 text.append('\n')
249 text.append(url)
250
251 body = '\n'.join(text)
252 print '---------------------------------------------------'
253 print 'Subject: ', subject
mbligh45ffc432008-12-09 23:35:17 +0000254 print body
mbligh37eceaa2008-12-15 22:56:37 +0000255 print '---------------------------------------------------'
mbligh45ffc432008-12-09 23:35:17 +0000256 if email_from and email_to:
mbligh37eceaa2008-12-15 22:56:37 +0000257 print 'Sending email ...'
mbligh45ffc432008-12-09 23:35:17 +0000258 utils.send_email(email_from, email_to, subject, body)
259 print
mbligh37eceaa2008-12-15 22:56:37 +0000260
mbligh45ffc432008-12-09 23:35:17 +0000261
mbligh5280e3b2008-12-22 14:39:28 +0000262 def poll_all_jobs(self, tko, jobs, email_from, email_to):
mbligh45ffc432008-12-09 23:35:17 +0000263 """
264 Poll all jobs in a list.
265 jobs: list of job objects to poll
266 email_from: send notification email upon completion from here
267 email_from: send notification email upon completion to here
268
269 Returns:
mbligh5b618382008-12-03 15:24:01 +0000270 a) All complete successfully (return True)
271 b) One or more has failed (return False)
272 c) Cannot tell yet (return None)
273 """
mbligh45ffc432008-12-09 23:35:17 +0000274 results = []
mbligh5b618382008-12-03 15:24:01 +0000275 for job in jobs:
mbligh5280e3b2008-12-22 14:39:28 +0000276 job.result = self.poll_job_results(tko, job, debug=False)
mbligh45ffc432008-12-09 23:35:17 +0000277 results.append(job.result)
278 if job.result is not None and not job.notified:
279 self.result_notify(job, email_from, email_to)
280 job.notified = True
281
282 if job.result is None:
283 print 'PENDING',
284 elif job.result == True:
285 print 'PASSED',
286 elif job.result == False:
287 print 'FAILED',
288 print ' %s : %s' % (job.id, job.name)
289
290 if None in results:
291 return None
292 elif False in results:
293 return False
294 else:
295 return True
mbligh5b618382008-12-03 15:24:01 +0000296
297
298 def invoke_test(self, pairing, kernel, kernel_label, priority='Medium'):
299 """
300 Given a pairing of a control file to a machine label, find all machines
301 with that label, and submit that control file to them.
302
303 Returns a job object
304 """
305 job_name = '%s : %s' % (pairing.machine_label, kernel_label)
306 hosts = self.get_hosts(multiple_labels=[pairing.machine_label])
mbligh45ffc432008-12-09 23:35:17 +0000307 host_list = [h.hostname for h in hosts if h.status != 'Repair Failed']
mbligh5b618382008-12-03 15:24:01 +0000308 new_job = self.create_job_by_test(name=job_name,
309 dependencies=[pairing.machine_label],
310 tests=[pairing.control_file],
311 priority=priority,
312 hosts=host_list,
313 kernel=kernel)
314 print 'Invoked test %s : %s' % (new_job.id, job_name)
315 return new_job
316
317
mbligh5280e3b2008-12-22 14:39:28 +0000318 def _job_test_results(self, tko, job):
mbligh5b618382008-12-03 15:24:01 +0000319 """
mbligh5280e3b2008-12-22 14:39:28 +0000320 Retrieve test results for a job
mbligh5b618382008-12-03 15:24:01 +0000321 """
mbligh5280e3b2008-12-22 14:39:28 +0000322 job.test_status = {}
323 try:
324 test_statuses = tko.get_status_counts(job=job.id)
325 except Exception:
326 print "Ignoring exception on poll job; RPC interface is flaky"
327 traceback.print_exc()
328 return
329
330 for test_status in test_statuses:
331 hostname = test_status.hostname
332 if hostname not in job.test_status:
333 job.test_status[hostname] = TestResults()
334 job.test_status[hostname].add(test_status)
335
336
337 def _job_results_platform_map(self, job):
338 job.results_platform_map = {}
mbligh5b618382008-12-03 15:24:01 +0000339 try:
mbligh45ffc432008-12-09 23:35:17 +0000340 job_statuses = self.get_host_queue_entries(job=job.id)
mbligh5b618382008-12-03 15:24:01 +0000341 except Exception:
342 print "Ignoring exception on poll job; RPC interface is flaky"
343 traceback.print_exc()
344 return None
mbligh5280e3b2008-12-22 14:39:28 +0000345
mbligh5b618382008-12-03 15:24:01 +0000346 platform_map = {}
mbligh5280e3b2008-12-22 14:39:28 +0000347 job.job_status = {}
mbligh5b618382008-12-03 15:24:01 +0000348 for job_status in job_statuses:
349 hostname = job_status.host.hostname
mbligh5280e3b2008-12-22 14:39:28 +0000350 job.job_status[hostname] = job_status.status
mbligh5b618382008-12-03 15:24:01 +0000351 status = job_status.status
mbligh5280e3b2008-12-22 14:39:28 +0000352 if hostname in job.test_status and job.test_status[hostname].fail:
353 # Job status doesn't reflect failed tests, override that
354 status = 'Failed'
mbligh5b618382008-12-03 15:24:01 +0000355 platform = job_status.host.platform
356 if platform not in platform_map:
357 platform_map[platform] = {'Total' : [hostname]}
358 else:
359 platform_map[platform]['Total'].append(hostname)
360 new_host_list = platform_map[platform].get(status, []) + [hostname]
361 platform_map[platform][status] = new_host_list
mbligh45ffc432008-12-09 23:35:17 +0000362 job.results_platform_map = platform_map
mbligh5280e3b2008-12-22 14:39:28 +0000363
364
365 def poll_job_results(self, tko, job, debug=False):
366 """
367 Analyse all job results by platform, return:
mbligh5b618382008-12-03 15:24:01 +0000368
mbligh5280e3b2008-12-22 14:39:28 +0000369 False: if any platform has more than one failure
370 None: if any platform has more than one machine not yet Good.
371 True: if all platforms have at least all-but-one machines Good.
372 """
373 self._job_test_results(tko, job)
374 self._job_results_platform_map(job)
375
mbligh5b618382008-12-03 15:24:01 +0000376 good_platforms = []
377 bad_platforms = []
378 unknown_platforms = []
mbligh5280e3b2008-12-22 14:39:28 +0000379 platform_map = job.results_platform_map
mbligh5b618382008-12-03 15:24:01 +0000380 for platform in platform_map:
381 total = len(platform_map[platform]['Total'])
382 completed = len(platform_map[platform].get('Completed', []))
383 failed = len(platform_map[platform].get('Failed', []))
384 if failed > 1:
385 bad_platforms.append(platform)
386 elif completed + 1 >= total:
387 # if all or all but one are good, call the job good.
388 good_platforms.append(platform)
389 else:
390 unknown_platforms.append(platform)
391 detail = []
392 for status in platform_map[platform]:
393 if status == 'Total':
394 continue
395 detail.append('%s=%s' % (status,platform_map[platform][status]))
396 if debug:
397 print '%20s %d/%d %s' % (platform, completed, total,
398 ' '.join(detail))
399 print
400
401 if len(bad_platforms) > 0:
402 if debug:
403 print 'Result bad - platforms: ' + ' '.join(bad_platforms)
404 return False
405 if len(unknown_platforms) > 0:
406 if debug:
407 platform_list = ' '.join(unknown_platforms)
408 print 'Result unknown - platforms: ', platform_list
409 return None
410 if debug:
411 platform_list = ' '.join(good_platforms)
412 print 'Result good - all platforms passed: ', platform_list
413 return True
414
415
mbligh5280e3b2008-12-22 14:39:28 +0000416class TestResults(object):
417 """
418 Container class used to hold the results of the tests for a job
419 """
420 def __init__(self):
421 self.good = []
422 self.fail = []
423
424
425 def add(self, result):
426 if result.complete_count - result.pass_count > 0:
427 self.fail.append(result.test_name)
428 else:
429 self.good.append(result.test_name)
430
431
432class RpcObject(object):
mbligh67647152008-11-19 00:18:14 +0000433 """
434 Generic object used to construct python objects from rpc calls
435 """
436 def __init__(self, afe, hash):
437 self.afe = afe
438 self.hash = hash
439 self.__dict__.update(hash)
440
441
442 def __str__(self):
443 return dump_object(self.__repr__(), self)
444
445
mbligh5280e3b2008-12-22 14:39:28 +0000446class Label(RpcObject):
mbligh67647152008-11-19 00:18:14 +0000447 """
448 AFE label object
449
450 Fields:
451 name, invalid, platform, kernel_config, id, only_if_needed
452 """
453 def __repr__(self):
454 return 'LABEL: %s' % self.name
455
456
457 def add_hosts(self, hosts):
458 return self.afe.run('label_add_hosts', self.id, hosts)
459
460
461 def remove_hosts(self, hosts):
462 return self.afe.run('label_remove_hosts', self.id, hosts)
463
464
mbligh5280e3b2008-12-22 14:39:28 +0000465class Acl(RpcObject):
mbligh67647152008-11-19 00:18:14 +0000466 """
467 AFE acl object
468
469 Fields:
470 users, hosts, description, name, id
471 """
472 def __repr__(self):
473 return 'ACL: %s' % self.name
474
475
476 def add_hosts(self, hosts):
477 self.afe.log('Adding hosts %s to ACL %s' % (hosts, self.name))
478 return self.afe.run('acl_group_add_hosts', self.id, hosts)
479
480
481 def remove_hosts(self, hosts):
482 self.afe.log('Removing hosts %s from ACL %s' % (hosts, self.name))
483 return self.afe.run('acl_group_remove_hosts', self.id, hosts)
484
485
mbligh5280e3b2008-12-22 14:39:28 +0000486class Job(RpcObject):
mbligh67647152008-11-19 00:18:14 +0000487 """
488 AFE job object
489
490 Fields:
491 name, control_file, control_type, synch_count, reboot_before,
492 run_verify, priority, email_list, created_on, dependencies,
493 timeout, owner, reboot_after, id
494 """
495 def __repr__(self):
496 return 'JOB: %s' % self.id
497
498
mbligh5280e3b2008-12-22 14:39:28 +0000499class JobStatus(RpcObject):
mbligh67647152008-11-19 00:18:14 +0000500 """
501 AFE job_status object
502
503 Fields:
504 status, complete, deleted, meta_host, host, active, execution_subdir, id
505 """
506 def __init__(self, afe, hash):
507 # This should call super
508 self.afe = afe
509 self.hash = hash
510 self.__dict__.update(hash)
mbligh5280e3b2008-12-22 14:39:28 +0000511 self.job = Job(afe, self.job)
mbligh67647152008-11-19 00:18:14 +0000512 if self.host:
513 self.host = afe.get_hosts(hostname=self.host['hostname'])[0]
514
515
516 def __repr__(self):
517 return 'JOB STATUS: %s-%s' % (self.job.id, self.host.hostname)
518
519
mbligh5280e3b2008-12-22 14:39:28 +0000520class Host(RpcObject):
mbligh67647152008-11-19 00:18:14 +0000521 """
522 AFE host object
523
524 Fields:
525 status, lock_time, locked_by, locked, hostname, invalid,
526 synch_id, labels, platform, protection, dirty, id
527 """
528 def __repr__(self):
529 return 'HOST OBJECT: %s' % self.hostname
530
531
532 def show(self):
533 labels = list(set(self.labels) - set([self.platform]))
534 print '%-6s %-7s %-7s %-16s %s' % (self.hostname, self.status,
535 self.locked, self.platform,
536 ', '.join(labels))
537
538
539 def get_acls(self):
540 return self.afe.get_acls(hosts__hostname=self.hostname)
541
542
543 def add_acl(self, acl_name):
544 self.afe.log('Adding ACL %s to host %s' % (acl_name, self.hostname))
545 return self.afe.run('acl_group_add_hosts', id=acl_name,
546 hosts=[self.hostname])
547
548
549 def remove_acl(self, acl_name):
550 self.afe.log('Removing ACL %s from host %s' % (acl_name, self.hostname))
551 return self.afe.run('acl_group_remove_hosts', id=acl_name,
552 hosts=[self.hostname])
553
554
555 def get_labels(self):
556 return self.afe.get_labels(host__hostname__in=[self.hostname])
557
558
559 def add_labels(self, labels):
560 self.afe.log('Adding labels %s to host %s' % (labels, self.hostname))
561 return self.afe.run('host_add_labels', id=self.id, labels=labels)
562
563
564 def remove_labels(self, labels):
565 self.afe.log('Removing labels %s from host %s' % (labels,self.hostname))
566 return self.afe.run('host_remove_labels', id=self.id, labels=labels)
mbligh5b618382008-12-03 15:24:01 +0000567
568
mbligh5280e3b2008-12-22 14:39:28 +0000569class TestStatus(RpcObject):
mblighc31e4022008-12-11 19:32:30 +0000570 """
571 TKO test status object
572
573 Fields:
574 test_idx, hostname, testname, id
575 complete_count, incomplete_count, group_count, pass_count
576 """
577 def __repr__(self):
578 return 'TEST STATUS: %s' % self.id
579
580
mbligh5b618382008-12-03 15:24:01 +0000581class MachineTestPairing(object):
582 """
583 Object representing the pairing of a machine label with a control file
584 """
585 def __init__(self, machine_label, control_file):
586 self.machine_label = machine_label
587 self.control_file = control_file