blob: 10c4749c0e3886652cb1a4a560ac5855b1122858 [file] [log] [blame]
mbligh67647152008-11-19 00:18:14 +00001# Copyright Martin J. Bligh, Google Inc 2008
2# Released under the GPL v2
3
4"""
5This class allows you to communicate with the frontend to submit jobs etc
6It is designed for writing more sophisiticated server-side control files that
7can recursively add and manage other jobs.
8
9We turn the JSON dictionaries into real objects that are more idiomatic
10
mblighc31e4022008-12-11 19:32:30 +000011For docs, see:
12 http://autotest/afe/server/noauth/rpc/
13 http://autotest/new_tko/server/noauth/rpc/
14 http://docs.djangoproject.com/en/dev/ref/models/querysets/#queryset-api
mbligh67647152008-11-19 00:18:14 +000015"""
16
mbligh5b618382008-12-03 15:24:01 +000017import os, time, traceback
mbligh67647152008-11-19 00:18:14 +000018import common
19from autotest_lib.frontend.afe import rpc_client_lib
20from autotest_lib.client.common_lib import utils
21
22
23def dump_object(header, obj):
24 """
25 Standard way to print out the frontend objects (eg job, host, acl, label)
26 in a human-readable fashion for debugging
27 """
28 result = header + '\n'
29 for key in obj.hash:
30 if key == 'afe' or key == 'hash':
31 continue
32 result += '%20s: %s\n' % (key, obj.hash[key])
33 return result
34
35
mblighc31e4022008-12-11 19:32:30 +000036class rpc_client(object):
mbligh67647152008-11-19 00:18:14 +000037 """
38 AFE class for communicating with the autotest frontend
39
40 All the constructors go in the afe class.
41 Manipulating methods go in the classes themselves
42 """
mblighc31e4022008-12-11 19:32:30 +000043 def __init__(self, path, user, web_server, print_log, debug):
mbligh67647152008-11-19 00:18:14 +000044 """
45 Create a cached instance of a connection to the AFE
46
47 user: username to connect as
48 web_server: AFE instance to connect to
49 print_log: pring a logging message to stdout on every operation
50 debug: print out all RPC traffic
51 """
mblighc31e4022008-12-11 19:32:30 +000052 if not user:
53 user = os.environ.get('LOGNAME')
54 if not web_server:
55 web_server = 'http://autotest'
mbligh67647152008-11-19 00:18:14 +000056 self.user = user
57 self.print_log = print_log
58 self.debug = debug
59 headers = {'AUTHORIZATION' : self.user}
mblighc31e4022008-12-11 19:32:30 +000060 rpc_server = web_server + path
mbligh67647152008-11-19 00:18:14 +000061 self.proxy = rpc_client_lib.get_proxy(rpc_server, headers=headers)
62
63
64 def run(self, call, **dargs):
65 """
66 Make a RPC call to the AFE server
67 """
68 rpc_call = getattr(self.proxy, call)
69 if self.debug:
70 print 'DEBUG: %s %s' % (call, dargs)
71 return utils.strip_unicode(rpc_call(**dargs))
72
73
74 def log(self, message):
75 if self.print_log:
76 print message
77
78
mblighc31e4022008-12-11 19:32:30 +000079class tko(rpc_client):
80 def __init__(self, user=None, web_server=None, print_log=True, debug=False):
81 super(tko, self).__init__('/new_tko/server/noauth/rpc/', user,
82 web_server, print_log, debug)
83
84
85 def get_status_counts(self, job, **data):
86 entries = self.run('get_status_counts',
87 group_by=['hostname', 'test_name'],
88 job_tag__startswith='%s-' % job, **data)
89 return [test_status(self, e) for e in entries['groups']]
90
91
92class afe(rpc_client):
93 def __init__(self, user=None, web_server=None, print_log=True, debug=False):
94 super(afe, self).__init__('/afe/server/noauth/rpc/', user, web_server,
95 print_log, debug)
96
97
mbligh67647152008-11-19 00:18:14 +000098 def host_statuses(self, live=None):
99 dead_statuses = ['Dead', 'Repair Failed']
100 statuses = self.run('get_static_data')['host_statuses']
101 if live == True:
102 return list(set(statuses) - set(['Dead', 'Repair Failed']))
103 if live == False:
104 return dead_statuses
105 else:
106 return statuses
107
108
109 def get_hosts(self, **dargs):
110 hosts = self.run('get_hosts', **dargs)
111 return [host(self, h) for h in hosts]
112
113
114 def create_host(self, hostname, **dargs):
115 id = self.run('add_host', **dargs)
116 return self.get_hosts(id=id)[0]
117
118
119 def get_labels(self, **dargs):
120 labels = self.run('get_labels', **dargs)
121 return [label(self, l) for l in labels]
122
123
124 def create_label(self, name, **dargs):
125 id = self.run('add_label', **dargs)
126 return self.get_labels(id=id)[0]
127
128
129 def get_acls(self, **dargs):
130 acls = self.run('get_acl_groups', **dargs)
131 return [acl(self, a) for a in acls]
132
133
134 def create_acl(self, name, **dargs):
135 id = self.run('add_acl_group', **dargs)
136 return self.get_acls(id=id)[0]
137
138
139 def get_jobs(self, summary=False, **dargs):
140 if summary:
141 jobs_data = self.run('get_jobs_summary', **dargs)
142 else:
143 jobs_data = self.run('get_jobs', **dargs)
144 return [job(self, j) for j in jobs_data]
145
146
147 def get_host_queue_entries(self, **data):
148 entries = self.run('get_host_queue_entries', **data)
149 return [job_status(self, e) for e in entries]
150
151
152 def create_job_by_test(self, tests, kernel=None, **dargs):
153 """
154 Given a test name, fetch the appropriate control file from the server
155 and submit it
156 """
157 results = self.run('generate_control_file', tests=tests, kernel=kernel,
158 use_container=False, do_push_packages=True)
159 if results['is_server']:
160 dargs['control_type'] = 'Server'
161 else:
162 dargs['control_type'] = 'Client'
163 dargs['dependencies'] = dargs.get('dependencies', []) + \
164 results['dependencies']
165 dargs['control_file'] = results['control_file']
166 dargs['synch_count'] = results['synch_count']
167 return self.create_job(**dargs)
168
169
170 def create_job(self, control_file, name=' ', priority='Medium',
171 control_type='Client', **dargs):
172 id = self.run('create_job', name=name, priority=priority,
173 control_file=control_file, control_type=control_type, **dargs)
174 return self.get_jobs(id=id)[0]
175
176
mbligh5b618382008-12-03 15:24:01 +0000177 def run_test_suites(self, pairings, kernel, kernel_label, wait=True,
mbligh45ffc432008-12-09 23:35:17 +0000178 poll_interval=5, email_from=None, email_to=None):
mbligh5b618382008-12-03 15:24:01 +0000179 """
180 Run a list of test suites on a particular kernel.
181
182 Poll for them to complete, and return whether they worked or not.
183
184 pairings: list of MachineTestPairing objects to invoke
185 kernel: name of the kernel to run
186 kernel_label: label of the kernel to run
187 (<kernel-version> : <config> : <date>)
188 wait: boolean - wait for the results to come back?
189 poll_interval: interval between polling for job results (in minutes)
mbligh45ffc432008-12-09 23:35:17 +0000190 email_from: send notification email upon completion from here
191 email_from: send notification email upon completion to here
mbligh5b618382008-12-03 15:24:01 +0000192 """
193 jobs = []
194 for pairing in pairings:
mbligh45ffc432008-12-09 23:35:17 +0000195 job = self.invoke_test(pairing, kernel, kernel_label)
196 job.notified = False
197 jobs.append(job)
198 if email_from and email_to:
199 subject = 'Testing started: %s : %s' % (job.name, job.id)
200 utils.send_email(email_from, email_to, subject, subject)
mbligh5b618382008-12-03 15:24:01 +0000201 if not wait:
202 return
203 while True:
204 time.sleep(60 * poll_interval)
mbligh45ffc432008-12-09 23:35:17 +0000205 result = self.poll_all_jobs(jobs, email_from, email_to)
mbligh5b618382008-12-03 15:24:01 +0000206 if result is not None:
207 return result
208
209
mbligh45ffc432008-12-09 23:35:17 +0000210 def result_notify(self, job, email_from, email_to):
mbligh5b618382008-12-03 15:24:01 +0000211 """
mbligh45ffc432008-12-09 23:35:17 +0000212 Notify about the result of a job. Will always print, if email data
213 is provided, will send email for it as well.
214
215 job: job object to notify about
216 email_from: send notification email upon completion from here
217 email_from: send notification email upon completion to here
218 """
219 if job.result == True:
220 subject = 'Testing PASSED: '
221 else:
222 subject = 'Testing FAILED: '
223 subject += '%s : %s\n' % (job.name, job.id)
224 text = []
225 for platform in job.results_platform_map:
226 for status in job.results_platform_map[platform]:
227 if status == 'Total':
228 continue
229 hosts = ','.join(job.results_platform_map[platform][status])
230 text.append('%20s %10s %s' % (platform, status, hosts))
231 text.append("\nhttp://autotest/tko/compose_query.cgi?columns=test&rows=machine_group&condition=tag~'%s-%%25'&title=Report" % job.id)
232 body = "\n".join(text)
233 print "---------------------------------------------------"
234 print "Subject: ", subject
235 print body
236 print "---------------------------------------------------"
237 if email_from and email_to:
238 print "Sending email ..."
239 utils.send_email(email_from, email_to, subject, body)
240 print
241
242
243 def poll_all_jobs(self, jobs, email_from, email_to):
244 """
245 Poll all jobs in a list.
246 jobs: list of job objects to poll
247 email_from: send notification email upon completion from here
248 email_from: send notification email upon completion to here
249
250 Returns:
mbligh5b618382008-12-03 15:24:01 +0000251 a) All complete successfully (return True)
252 b) One or more has failed (return False)
253 c) Cannot tell yet (return None)
254 """
mbligh45ffc432008-12-09 23:35:17 +0000255 results = []
mbligh5b618382008-12-03 15:24:01 +0000256 for job in jobs:
mbligh45ffc432008-12-09 23:35:17 +0000257 job.result = self.poll_job_results(job, debug=False)
258 results.append(job.result)
259 if job.result is not None and not job.notified:
260 self.result_notify(job, email_from, email_to)
261 job.notified = True
262
263 if job.result is None:
264 print 'PENDING',
265 elif job.result == True:
266 print 'PASSED',
267 elif job.result == False:
268 print 'FAILED',
269 print ' %s : %s' % (job.id, job.name)
270
271 if None in results:
272 return None
273 elif False in results:
274 return False
275 else:
276 return True
mbligh5b618382008-12-03 15:24:01 +0000277
278
279 def invoke_test(self, pairing, kernel, kernel_label, priority='Medium'):
280 """
281 Given a pairing of a control file to a machine label, find all machines
282 with that label, and submit that control file to them.
283
284 Returns a job object
285 """
286 job_name = '%s : %s' % (pairing.machine_label, kernel_label)
287 hosts = self.get_hosts(multiple_labels=[pairing.machine_label])
mbligh45ffc432008-12-09 23:35:17 +0000288 host_list = [h.hostname for h in hosts if h.status != 'Repair Failed']
mbligh5b618382008-12-03 15:24:01 +0000289 new_job = self.create_job_by_test(name=job_name,
290 dependencies=[pairing.machine_label],
291 tests=[pairing.control_file],
292 priority=priority,
293 hosts=host_list,
294 kernel=kernel)
295 print 'Invoked test %s : %s' % (new_job.id, job_name)
296 return new_job
297
298
mbligh45ffc432008-12-09 23:35:17 +0000299 def poll_job_results(self, job, debug=False):
mbligh5b618382008-12-03 15:24:01 +0000300 """
301 Analyse all job results by platform, return:
302
303 False: if any platform has more than one failure
304 None: if any platform has more than one machine not yet Good.
305 True: if all platforms have at least all-but-one machines Good.
306 """
307 try:
mbligh45ffc432008-12-09 23:35:17 +0000308 job_statuses = self.get_host_queue_entries(job=job.id)
mbligh5b618382008-12-03 15:24:01 +0000309 except Exception:
310 print "Ignoring exception on poll job; RPC interface is flaky"
311 traceback.print_exc()
312 return None
313
314 platform_map = {}
315 for job_status in job_statuses:
316 hostname = job_status.host.hostname
317 status = job_status.status
318 platform = job_status.host.platform
319 if platform not in platform_map:
320 platform_map[platform] = {'Total' : [hostname]}
321 else:
322 platform_map[platform]['Total'].append(hostname)
323 new_host_list = platform_map[platform].get(status, []) + [hostname]
324 platform_map[platform][status] = new_host_list
mbligh45ffc432008-12-09 23:35:17 +0000325 job.results_platform_map = platform_map
mbligh5b618382008-12-03 15:24:01 +0000326
327 good_platforms = []
328 bad_platforms = []
329 unknown_platforms = []
330 for platform in platform_map:
331 total = len(platform_map[platform]['Total'])
332 completed = len(platform_map[platform].get('Completed', []))
333 failed = len(platform_map[platform].get('Failed', []))
334 if failed > 1:
335 bad_platforms.append(platform)
336 elif completed + 1 >= total:
337 # if all or all but one are good, call the job good.
338 good_platforms.append(platform)
339 else:
340 unknown_platforms.append(platform)
341 detail = []
342 for status in platform_map[platform]:
343 if status == 'Total':
344 continue
345 detail.append('%s=%s' % (status,platform_map[platform][status]))
346 if debug:
347 print '%20s %d/%d %s' % (platform, completed, total,
348 ' '.join(detail))
349 print
350
351 if len(bad_platforms) > 0:
352 if debug:
353 print 'Result bad - platforms: ' + ' '.join(bad_platforms)
354 return False
355 if len(unknown_platforms) > 0:
356 if debug:
357 platform_list = ' '.join(unknown_platforms)
358 print 'Result unknown - platforms: ', platform_list
359 return None
360 if debug:
361 platform_list = ' '.join(good_platforms)
362 print 'Result good - all platforms passed: ', platform_list
363 return True
364
365
mbligh67647152008-11-19 00:18:14 +0000366class rpc_object(object):
367 """
368 Generic object used to construct python objects from rpc calls
369 """
370 def __init__(self, afe, hash):
371 self.afe = afe
372 self.hash = hash
373 self.__dict__.update(hash)
374
375
376 def __str__(self):
377 return dump_object(self.__repr__(), self)
378
379
380class label(rpc_object):
381 """
382 AFE label object
383
384 Fields:
385 name, invalid, platform, kernel_config, id, only_if_needed
386 """
387 def __repr__(self):
388 return 'LABEL: %s' % self.name
389
390
391 def add_hosts(self, hosts):
392 return self.afe.run('label_add_hosts', self.id, hosts)
393
394
395 def remove_hosts(self, hosts):
396 return self.afe.run('label_remove_hosts', self.id, hosts)
397
398
399class acl(rpc_object):
400 """
401 AFE acl object
402
403 Fields:
404 users, hosts, description, name, id
405 """
406 def __repr__(self):
407 return 'ACL: %s' % self.name
408
409
410 def add_hosts(self, hosts):
411 self.afe.log('Adding hosts %s to ACL %s' % (hosts, self.name))
412 return self.afe.run('acl_group_add_hosts', self.id, hosts)
413
414
415 def remove_hosts(self, hosts):
416 self.afe.log('Removing hosts %s from ACL %s' % (hosts, self.name))
417 return self.afe.run('acl_group_remove_hosts', self.id, hosts)
418
419
420class job(rpc_object):
421 """
422 AFE job object
423
424 Fields:
425 name, control_file, control_type, synch_count, reboot_before,
426 run_verify, priority, email_list, created_on, dependencies,
427 timeout, owner, reboot_after, id
428 """
429 def __repr__(self):
430 return 'JOB: %s' % self.id
431
432
433class job_status(rpc_object):
434 """
435 AFE job_status object
436
437 Fields:
438 status, complete, deleted, meta_host, host, active, execution_subdir, id
439 """
440 def __init__(self, afe, hash):
441 # This should call super
442 self.afe = afe
443 self.hash = hash
444 self.__dict__.update(hash)
445 self.job = job(afe, self.job)
446 if self.host:
447 self.host = afe.get_hosts(hostname=self.host['hostname'])[0]
448
449
450 def __repr__(self):
451 return 'JOB STATUS: %s-%s' % (self.job.id, self.host.hostname)
452
453
454class host(rpc_object):
455 """
456 AFE host object
457
458 Fields:
459 status, lock_time, locked_by, locked, hostname, invalid,
460 synch_id, labels, platform, protection, dirty, id
461 """
462 def __repr__(self):
463 return 'HOST OBJECT: %s' % self.hostname
464
465
466 def show(self):
467 labels = list(set(self.labels) - set([self.platform]))
468 print '%-6s %-7s %-7s %-16s %s' % (self.hostname, self.status,
469 self.locked, self.platform,
470 ', '.join(labels))
471
472
473 def get_acls(self):
474 return self.afe.get_acls(hosts__hostname=self.hostname)
475
476
477 def add_acl(self, acl_name):
478 self.afe.log('Adding ACL %s to host %s' % (acl_name, self.hostname))
479 return self.afe.run('acl_group_add_hosts', id=acl_name,
480 hosts=[self.hostname])
481
482
483 def remove_acl(self, acl_name):
484 self.afe.log('Removing ACL %s from host %s' % (acl_name, self.hostname))
485 return self.afe.run('acl_group_remove_hosts', id=acl_name,
486 hosts=[self.hostname])
487
488
489 def get_labels(self):
490 return self.afe.get_labels(host__hostname__in=[self.hostname])
491
492
493 def add_labels(self, labels):
494 self.afe.log('Adding labels %s to host %s' % (labels, self.hostname))
495 return self.afe.run('host_add_labels', id=self.id, labels=labels)
496
497
498 def remove_labels(self, labels):
499 self.afe.log('Removing labels %s from host %s' % (labels,self.hostname))
500 return self.afe.run('host_remove_labels', id=self.id, labels=labels)
mbligh5b618382008-12-03 15:24:01 +0000501
502
mblighc31e4022008-12-11 19:32:30 +0000503class test_status(rpc_object):
504 """
505 TKO test status object
506
507 Fields:
508 test_idx, hostname, testname, id
509 complete_count, incomplete_count, group_count, pass_count
510 """
511 def __repr__(self):
512 return 'TEST STATUS: %s' % self.id
513
514
mbligh5b618382008-12-03 15:24:01 +0000515class MachineTestPairing(object):
516 """
517 Object representing the pairing of a machine label with a control file
518 """
519 def __init__(self, machine_label, control_file):
520 self.machine_label = machine_label
521 self.control_file = control_file