blob: 73e88737f2f42aa008f532208d8571c470b15c64 [file] [log] [blame]
mbligh67647152008-11-19 00:18:14 +00001# Copyright Martin J. Bligh, Google Inc 2008
2# Released under the GPL v2
3
4"""
5This class allows you to communicate with the frontend to submit jobs etc
6It is designed for writing more sophisiticated server-side control files that
7can recursively add and manage other jobs.
8
9We turn the JSON dictionaries into real objects that are more idiomatic
10
mblighc31e4022008-12-11 19:32:30 +000011For docs, see:
12 http://autotest/afe/server/noauth/rpc/
13 http://autotest/new_tko/server/noauth/rpc/
14 http://docs.djangoproject.com/en/dev/ref/models/querysets/#queryset-api
mbligh67647152008-11-19 00:18:14 +000015"""
16
mbligh5b618382008-12-03 15:24:01 +000017import os, time, traceback
mbligh67647152008-11-19 00:18:14 +000018import common
19from autotest_lib.frontend.afe import rpc_client_lib
mbligh37eceaa2008-12-15 22:56:37 +000020from autotest_lib.client.common_lib import global_config
mbligh67647152008-11-19 00:18:14 +000021from autotest_lib.client.common_lib import utils
22
mbligh37eceaa2008-12-15 22:56:37 +000023GLOBAL_CONFIG = global_config.global_config
24DEFAULT_SERVER = 'autotest'
25
mbligh67647152008-11-19 00:18:14 +000026
27def dump_object(header, obj):
28 """
29 Standard way to print out the frontend objects (eg job, host, acl, label)
30 in a human-readable fashion for debugging
31 """
32 result = header + '\n'
33 for key in obj.hash:
34 if key == 'afe' or key == 'hash':
35 continue
36 result += '%20s: %s\n' % (key, obj.hash[key])
37 return result
38
39
mblighc31e4022008-12-11 19:32:30 +000040class rpc_client(object):
mbligh67647152008-11-19 00:18:14 +000041 """
42 AFE class for communicating with the autotest frontend
43
44 All the constructors go in the afe class.
45 Manipulating methods go in the classes themselves
46 """
mblighc31e4022008-12-11 19:32:30 +000047 def __init__(self, path, user, web_server, print_log, debug):
mbligh67647152008-11-19 00:18:14 +000048 """
49 Create a cached instance of a connection to the AFE
50
51 user: username to connect as
52 web_server: AFE instance to connect to
53 print_log: pring a logging message to stdout on every operation
54 debug: print out all RPC traffic
55 """
mblighc31e4022008-12-11 19:32:30 +000056 if not user:
57 user = os.environ.get('LOGNAME')
58 if not web_server:
mbligh37eceaa2008-12-15 22:56:37 +000059 web_server = 'http://' + GLOBAL_CONFIG.get_config_value(
60 'SERVER', 'hostname', default=DEFAULT_SERVER)
mbligh67647152008-11-19 00:18:14 +000061 self.user = user
62 self.print_log = print_log
63 self.debug = debug
64 headers = {'AUTHORIZATION' : self.user}
mblighc31e4022008-12-11 19:32:30 +000065 rpc_server = web_server + path
mbligh67647152008-11-19 00:18:14 +000066 self.proxy = rpc_client_lib.get_proxy(rpc_server, headers=headers)
67
68
69 def run(self, call, **dargs):
70 """
71 Make a RPC call to the AFE server
72 """
73 rpc_call = getattr(self.proxy, call)
74 if self.debug:
75 print 'DEBUG: %s %s' % (call, dargs)
76 return utils.strip_unicode(rpc_call(**dargs))
77
78
79 def log(self, message):
80 if self.print_log:
81 print message
82
83
mblighc31e4022008-12-11 19:32:30 +000084class tko(rpc_client):
85 def __init__(self, user=None, web_server=None, print_log=True, debug=False):
86 super(tko, self).__init__('/new_tko/server/noauth/rpc/', user,
87 web_server, print_log, debug)
88
89
90 def get_status_counts(self, job, **data):
91 entries = self.run('get_status_counts',
92 group_by=['hostname', 'test_name'],
93 job_tag__startswith='%s-' % job, **data)
94 return [test_status(self, e) for e in entries['groups']]
95
96
97class afe(rpc_client):
98 def __init__(self, user=None, web_server=None, print_log=True, debug=False):
99 super(afe, self).__init__('/afe/server/noauth/rpc/', user, web_server,
100 print_log, debug)
101
102
mbligh67647152008-11-19 00:18:14 +0000103 def host_statuses(self, live=None):
104 dead_statuses = ['Dead', 'Repair Failed']
105 statuses = self.run('get_static_data')['host_statuses']
106 if live == True:
107 return list(set(statuses) - set(['Dead', 'Repair Failed']))
108 if live == False:
109 return dead_statuses
110 else:
111 return statuses
112
113
114 def get_hosts(self, **dargs):
115 hosts = self.run('get_hosts', **dargs)
116 return [host(self, h) for h in hosts]
117
118
119 def create_host(self, hostname, **dargs):
120 id = self.run('add_host', **dargs)
121 return self.get_hosts(id=id)[0]
122
123
124 def get_labels(self, **dargs):
125 labels = self.run('get_labels', **dargs)
126 return [label(self, l) for l in labels]
127
128
129 def create_label(self, name, **dargs):
130 id = self.run('add_label', **dargs)
131 return self.get_labels(id=id)[0]
132
133
134 def get_acls(self, **dargs):
135 acls = self.run('get_acl_groups', **dargs)
136 return [acl(self, a) for a in acls]
137
138
139 def create_acl(self, name, **dargs):
140 id = self.run('add_acl_group', **dargs)
141 return self.get_acls(id=id)[0]
142
143
144 def get_jobs(self, summary=False, **dargs):
145 if summary:
146 jobs_data = self.run('get_jobs_summary', **dargs)
147 else:
148 jobs_data = self.run('get_jobs', **dargs)
149 return [job(self, j) for j in jobs_data]
150
151
152 def get_host_queue_entries(self, **data):
153 entries = self.run('get_host_queue_entries', **data)
154 return [job_status(self, e) for e in entries]
155
156
157 def create_job_by_test(self, tests, kernel=None, **dargs):
158 """
159 Given a test name, fetch the appropriate control file from the server
160 and submit it
161 """
162 results = self.run('generate_control_file', tests=tests, kernel=kernel,
163 use_container=False, do_push_packages=True)
164 if results['is_server']:
165 dargs['control_type'] = 'Server'
166 else:
167 dargs['control_type'] = 'Client'
168 dargs['dependencies'] = dargs.get('dependencies', []) + \
169 results['dependencies']
170 dargs['control_file'] = results['control_file']
171 dargs['synch_count'] = results['synch_count']
172 return self.create_job(**dargs)
173
174
175 def create_job(self, control_file, name=' ', priority='Medium',
176 control_type='Client', **dargs):
177 id = self.run('create_job', name=name, priority=priority,
178 control_file=control_file, control_type=control_type, **dargs)
179 return self.get_jobs(id=id)[0]
180
181
mbligh5b618382008-12-03 15:24:01 +0000182 def run_test_suites(self, pairings, kernel, kernel_label, wait=True,
mbligh45ffc432008-12-09 23:35:17 +0000183 poll_interval=5, email_from=None, email_to=None):
mbligh5b618382008-12-03 15:24:01 +0000184 """
185 Run a list of test suites on a particular kernel.
186
187 Poll for them to complete, and return whether they worked or not.
188
189 pairings: list of MachineTestPairing objects to invoke
190 kernel: name of the kernel to run
191 kernel_label: label of the kernel to run
192 (<kernel-version> : <config> : <date>)
193 wait: boolean - wait for the results to come back?
194 poll_interval: interval between polling for job results (in minutes)
mbligh45ffc432008-12-09 23:35:17 +0000195 email_from: send notification email upon completion from here
196 email_from: send notification email upon completion to here
mbligh5b618382008-12-03 15:24:01 +0000197 """
198 jobs = []
199 for pairing in pairings:
mbligh45ffc432008-12-09 23:35:17 +0000200 job = self.invoke_test(pairing, kernel, kernel_label)
201 job.notified = False
202 jobs.append(job)
203 if email_from and email_to:
204 subject = 'Testing started: %s : %s' % (job.name, job.id)
205 utils.send_email(email_from, email_to, subject, subject)
mbligh5b618382008-12-03 15:24:01 +0000206 if not wait:
207 return
208 while True:
209 time.sleep(60 * poll_interval)
mbligh45ffc432008-12-09 23:35:17 +0000210 result = self.poll_all_jobs(jobs, email_from, email_to)
mbligh5b618382008-12-03 15:24:01 +0000211 if result is not None:
212 return result
213
214
mbligh45ffc432008-12-09 23:35:17 +0000215 def result_notify(self, job, email_from, email_to):
mbligh5b618382008-12-03 15:24:01 +0000216 """
mbligh45ffc432008-12-09 23:35:17 +0000217 Notify about the result of a job. Will always print, if email data
218 is provided, will send email for it as well.
219
220 job: job object to notify about
221 email_from: send notification email upon completion from here
222 email_from: send notification email upon completion to here
223 """
224 if job.result == True:
225 subject = 'Testing PASSED: '
226 else:
227 subject = 'Testing FAILED: '
228 subject += '%s : %s\n' % (job.name, job.id)
229 text = []
230 for platform in job.results_platform_map:
231 for status in job.results_platform_map[platform]:
232 if status == 'Total':
233 continue
234 hosts = ','.join(job.results_platform_map[platform][status])
mbligh37eceaa2008-12-15 22:56:37 +0000235 text.append('%20s %10s %s\n' % (platform, status, hosts))
236
237 tko_base_url = 'http://%s/tko' % GLOBAL_CONFIG.get_config_value(
238 'SERVER', 'hostname', default=DEFAULT_SERVER)
239
240 params = ('columns=test',
241 'rows=machine_group',
242 "condition=tag~'%s-%%25'" % job.id,
243 'title=Report')
244 query_string = '&'.join(params)
245 url = '%s/compose_query.cgi?%s' % (tko_base_url, query_string)
246 text.append('\n')
247 text.append(url)
248
249 body = '\n'.join(text)
250 print '---------------------------------------------------'
251 print 'Subject: ', subject
mbligh45ffc432008-12-09 23:35:17 +0000252 print body
mbligh37eceaa2008-12-15 22:56:37 +0000253 print '---------------------------------------------------'
mbligh45ffc432008-12-09 23:35:17 +0000254 if email_from and email_to:
mbligh37eceaa2008-12-15 22:56:37 +0000255 print 'Sending email ...'
mbligh45ffc432008-12-09 23:35:17 +0000256 utils.send_email(email_from, email_to, subject, body)
257 print
mbligh37eceaa2008-12-15 22:56:37 +0000258
mbligh45ffc432008-12-09 23:35:17 +0000259
260 def poll_all_jobs(self, jobs, email_from, email_to):
261 """
262 Poll all jobs in a list.
263 jobs: list of job objects to poll
264 email_from: send notification email upon completion from here
265 email_from: send notification email upon completion to here
266
267 Returns:
mbligh5b618382008-12-03 15:24:01 +0000268 a) All complete successfully (return True)
269 b) One or more has failed (return False)
270 c) Cannot tell yet (return None)
271 """
mbligh45ffc432008-12-09 23:35:17 +0000272 results = []
mbligh5b618382008-12-03 15:24:01 +0000273 for job in jobs:
mbligh45ffc432008-12-09 23:35:17 +0000274 job.result = self.poll_job_results(job, debug=False)
275 results.append(job.result)
276 if job.result is not None and not job.notified:
277 self.result_notify(job, email_from, email_to)
278 job.notified = True
279
280 if job.result is None:
281 print 'PENDING',
282 elif job.result == True:
283 print 'PASSED',
284 elif job.result == False:
285 print 'FAILED',
286 print ' %s : %s' % (job.id, job.name)
287
288 if None in results:
289 return None
290 elif False in results:
291 return False
292 else:
293 return True
mbligh5b618382008-12-03 15:24:01 +0000294
295
296 def invoke_test(self, pairing, kernel, kernel_label, priority='Medium'):
297 """
298 Given a pairing of a control file to a machine label, find all machines
299 with that label, and submit that control file to them.
300
301 Returns a job object
302 """
303 job_name = '%s : %s' % (pairing.machine_label, kernel_label)
304 hosts = self.get_hosts(multiple_labels=[pairing.machine_label])
mbligh45ffc432008-12-09 23:35:17 +0000305 host_list = [h.hostname for h in hosts if h.status != 'Repair Failed']
mbligh5b618382008-12-03 15:24:01 +0000306 new_job = self.create_job_by_test(name=job_name,
307 dependencies=[pairing.machine_label],
308 tests=[pairing.control_file],
309 priority=priority,
310 hosts=host_list,
311 kernel=kernel)
312 print 'Invoked test %s : %s' % (new_job.id, job_name)
313 return new_job
314
315
mbligh45ffc432008-12-09 23:35:17 +0000316 def poll_job_results(self, job, debug=False):
mbligh5b618382008-12-03 15:24:01 +0000317 """
318 Analyse all job results by platform, return:
319
320 False: if any platform has more than one failure
321 None: if any platform has more than one machine not yet Good.
322 True: if all platforms have at least all-but-one machines Good.
323 """
324 try:
mbligh45ffc432008-12-09 23:35:17 +0000325 job_statuses = self.get_host_queue_entries(job=job.id)
mbligh5b618382008-12-03 15:24:01 +0000326 except Exception:
327 print "Ignoring exception on poll job; RPC interface is flaky"
328 traceback.print_exc()
329 return None
330
331 platform_map = {}
332 for job_status in job_statuses:
333 hostname = job_status.host.hostname
334 status = job_status.status
335 platform = job_status.host.platform
336 if platform not in platform_map:
337 platform_map[platform] = {'Total' : [hostname]}
338 else:
339 platform_map[platform]['Total'].append(hostname)
340 new_host_list = platform_map[platform].get(status, []) + [hostname]
341 platform_map[platform][status] = new_host_list
mbligh45ffc432008-12-09 23:35:17 +0000342 job.results_platform_map = platform_map
mbligh5b618382008-12-03 15:24:01 +0000343
344 good_platforms = []
345 bad_platforms = []
346 unknown_platforms = []
347 for platform in platform_map:
348 total = len(platform_map[platform]['Total'])
349 completed = len(platform_map[platform].get('Completed', []))
350 failed = len(platform_map[platform].get('Failed', []))
351 if failed > 1:
352 bad_platforms.append(platform)
353 elif completed + 1 >= total:
354 # if all or all but one are good, call the job good.
355 good_platforms.append(platform)
356 else:
357 unknown_platforms.append(platform)
358 detail = []
359 for status in platform_map[platform]:
360 if status == 'Total':
361 continue
362 detail.append('%s=%s' % (status,platform_map[platform][status]))
363 if debug:
364 print '%20s %d/%d %s' % (platform, completed, total,
365 ' '.join(detail))
366 print
367
368 if len(bad_platforms) > 0:
369 if debug:
370 print 'Result bad - platforms: ' + ' '.join(bad_platforms)
371 return False
372 if len(unknown_platforms) > 0:
373 if debug:
374 platform_list = ' '.join(unknown_platforms)
375 print 'Result unknown - platforms: ', platform_list
376 return None
377 if debug:
378 platform_list = ' '.join(good_platforms)
379 print 'Result good - all platforms passed: ', platform_list
380 return True
381
382
mbligh67647152008-11-19 00:18:14 +0000383class rpc_object(object):
384 """
385 Generic object used to construct python objects from rpc calls
386 """
387 def __init__(self, afe, hash):
388 self.afe = afe
389 self.hash = hash
390 self.__dict__.update(hash)
391
392
393 def __str__(self):
394 return dump_object(self.__repr__(), self)
395
396
397class label(rpc_object):
398 """
399 AFE label object
400
401 Fields:
402 name, invalid, platform, kernel_config, id, only_if_needed
403 """
404 def __repr__(self):
405 return 'LABEL: %s' % self.name
406
407
408 def add_hosts(self, hosts):
409 return self.afe.run('label_add_hosts', self.id, hosts)
410
411
412 def remove_hosts(self, hosts):
413 return self.afe.run('label_remove_hosts', self.id, hosts)
414
415
416class acl(rpc_object):
417 """
418 AFE acl object
419
420 Fields:
421 users, hosts, description, name, id
422 """
423 def __repr__(self):
424 return 'ACL: %s' % self.name
425
426
427 def add_hosts(self, hosts):
428 self.afe.log('Adding hosts %s to ACL %s' % (hosts, self.name))
429 return self.afe.run('acl_group_add_hosts', self.id, hosts)
430
431
432 def remove_hosts(self, hosts):
433 self.afe.log('Removing hosts %s from ACL %s' % (hosts, self.name))
434 return self.afe.run('acl_group_remove_hosts', self.id, hosts)
435
436
437class job(rpc_object):
438 """
439 AFE job object
440
441 Fields:
442 name, control_file, control_type, synch_count, reboot_before,
443 run_verify, priority, email_list, created_on, dependencies,
444 timeout, owner, reboot_after, id
445 """
446 def __repr__(self):
447 return 'JOB: %s' % self.id
448
449
450class job_status(rpc_object):
451 """
452 AFE job_status object
453
454 Fields:
455 status, complete, deleted, meta_host, host, active, execution_subdir, id
456 """
457 def __init__(self, afe, hash):
458 # This should call super
459 self.afe = afe
460 self.hash = hash
461 self.__dict__.update(hash)
462 self.job = job(afe, self.job)
463 if self.host:
464 self.host = afe.get_hosts(hostname=self.host['hostname'])[0]
465
466
467 def __repr__(self):
468 return 'JOB STATUS: %s-%s' % (self.job.id, self.host.hostname)
469
470
471class host(rpc_object):
472 """
473 AFE host object
474
475 Fields:
476 status, lock_time, locked_by, locked, hostname, invalid,
477 synch_id, labels, platform, protection, dirty, id
478 """
479 def __repr__(self):
480 return 'HOST OBJECT: %s' % self.hostname
481
482
483 def show(self):
484 labels = list(set(self.labels) - set([self.platform]))
485 print '%-6s %-7s %-7s %-16s %s' % (self.hostname, self.status,
486 self.locked, self.platform,
487 ', '.join(labels))
488
489
490 def get_acls(self):
491 return self.afe.get_acls(hosts__hostname=self.hostname)
492
493
494 def add_acl(self, acl_name):
495 self.afe.log('Adding ACL %s to host %s' % (acl_name, self.hostname))
496 return self.afe.run('acl_group_add_hosts', id=acl_name,
497 hosts=[self.hostname])
498
499
500 def remove_acl(self, acl_name):
501 self.afe.log('Removing ACL %s from host %s' % (acl_name, self.hostname))
502 return self.afe.run('acl_group_remove_hosts', id=acl_name,
503 hosts=[self.hostname])
504
505
506 def get_labels(self):
507 return self.afe.get_labels(host__hostname__in=[self.hostname])
508
509
510 def add_labels(self, labels):
511 self.afe.log('Adding labels %s to host %s' % (labels, self.hostname))
512 return self.afe.run('host_add_labels', id=self.id, labels=labels)
513
514
515 def remove_labels(self, labels):
516 self.afe.log('Removing labels %s from host %s' % (labels,self.hostname))
517 return self.afe.run('host_remove_labels', id=self.id, labels=labels)
mbligh5b618382008-12-03 15:24:01 +0000518
519
mblighc31e4022008-12-11 19:32:30 +0000520class test_status(rpc_object):
521 """
522 TKO test status object
523
524 Fields:
525 test_idx, hostname, testname, id
526 complete_count, incomplete_count, group_count, pass_count
527 """
528 def __repr__(self):
529 return 'TEST STATUS: %s' % self.id
530
531
mbligh5b618382008-12-03 15:24:01 +0000532class MachineTestPairing(object):
533 """
534 Object representing the pairing of a machine label with a control file
535 """
536 def __init__(self, machine_label, control_file):
537 self.machine_label = machine_label
538 self.control_file = control_file