blob: ee16e7661b590fa25a8fe96ce537cd0188c9804c [file] [log] [blame]
mbligh67647152008-11-19 00:18:14 +00001# Copyright Martin J. Bligh, Google Inc 2008
2# Released under the GPL v2
3
4"""
5This class allows you to communicate with the frontend to submit jobs etc
6It is designed for writing more sophisiticated server-side control files that
7can recursively add and manage other jobs.
8
9We turn the JSON dictionaries into real objects that are more idiomatic
10
11For docs, see http://autotest//afe/server/noauth/rpc/
12http://docs.djangoproject.com/en/dev/ref/models/querysets/#queryset-api
13"""
14
mbligh5b618382008-12-03 15:24:01 +000015import os, time, traceback
mbligh67647152008-11-19 00:18:14 +000016import common
17from autotest_lib.frontend.afe import rpc_client_lib
18from autotest_lib.client.common_lib import utils
19
20
21def dump_object(header, obj):
22 """
23 Standard way to print out the frontend objects (eg job, host, acl, label)
24 in a human-readable fashion for debugging
25 """
26 result = header + '\n'
27 for key in obj.hash:
28 if key == 'afe' or key == 'hash':
29 continue
30 result += '%20s: %s\n' % (key, obj.hash[key])
31 return result
32
33
34class afe(object):
35 """
36 AFE class for communicating with the autotest frontend
37
38 All the constructors go in the afe class.
39 Manipulating methods go in the classes themselves
40 """
41 def __init__(self, user=os.environ.get('LOGNAME'),
42 web_server='http://autotest', print_log=True, debug=False):
43 """
44 Create a cached instance of a connection to the AFE
45
46 user: username to connect as
47 web_server: AFE instance to connect to
48 print_log: pring a logging message to stdout on every operation
49 debug: print out all RPC traffic
50 """
51 self.user = user
52 self.print_log = print_log
53 self.debug = debug
54 headers = {'AUTHORIZATION' : self.user}
55 rpc_server = web_server + '/afe/server/noauth/rpc/'
56 self.proxy = rpc_client_lib.get_proxy(rpc_server, headers=headers)
57
58
59 def run(self, call, **dargs):
60 """
61 Make a RPC call to the AFE server
62 """
63 rpc_call = getattr(self.proxy, call)
64 if self.debug:
65 print 'DEBUG: %s %s' % (call, dargs)
66 return utils.strip_unicode(rpc_call(**dargs))
67
68
69 def log(self, message):
70 if self.print_log:
71 print message
72
73
74 def host_statuses(self, live=None):
75 dead_statuses = ['Dead', 'Repair Failed']
76 statuses = self.run('get_static_data')['host_statuses']
77 if live == True:
78 return list(set(statuses) - set(['Dead', 'Repair Failed']))
79 if live == False:
80 return dead_statuses
81 else:
82 return statuses
83
84
85 def get_hosts(self, **dargs):
86 hosts = self.run('get_hosts', **dargs)
87 return [host(self, h) for h in hosts]
88
89
90 def create_host(self, hostname, **dargs):
91 id = self.run('add_host', **dargs)
92 return self.get_hosts(id=id)[0]
93
94
95 def get_labels(self, **dargs):
96 labels = self.run('get_labels', **dargs)
97 return [label(self, l) for l in labels]
98
99
100 def create_label(self, name, **dargs):
101 id = self.run('add_label', **dargs)
102 return self.get_labels(id=id)[0]
103
104
105 def get_acls(self, **dargs):
106 acls = self.run('get_acl_groups', **dargs)
107 return [acl(self, a) for a in acls]
108
109
110 def create_acl(self, name, **dargs):
111 id = self.run('add_acl_group', **dargs)
112 return self.get_acls(id=id)[0]
113
114
115 def get_jobs(self, summary=False, **dargs):
116 if summary:
117 jobs_data = self.run('get_jobs_summary', **dargs)
118 else:
119 jobs_data = self.run('get_jobs', **dargs)
120 return [job(self, j) for j in jobs_data]
121
122
123 def get_host_queue_entries(self, **data):
124 entries = self.run('get_host_queue_entries', **data)
125 return [job_status(self, e) for e in entries]
126
127
128 def create_job_by_test(self, tests, kernel=None, **dargs):
129 """
130 Given a test name, fetch the appropriate control file from the server
131 and submit it
132 """
133 results = self.run('generate_control_file', tests=tests, kernel=kernel,
134 use_container=False, do_push_packages=True)
135 if results['is_server']:
136 dargs['control_type'] = 'Server'
137 else:
138 dargs['control_type'] = 'Client'
139 dargs['dependencies'] = dargs.get('dependencies', []) + \
140 results['dependencies']
141 dargs['control_file'] = results['control_file']
142 dargs['synch_count'] = results['synch_count']
143 return self.create_job(**dargs)
144
145
146 def create_job(self, control_file, name=' ', priority='Medium',
147 control_type='Client', **dargs):
148 id = self.run('create_job', name=name, priority=priority,
149 control_file=control_file, control_type=control_type, **dargs)
150 return self.get_jobs(id=id)[0]
151
152
mbligh5b618382008-12-03 15:24:01 +0000153 def run_test_suites(self, pairings, kernel, kernel_label, wait=True,
mbligh45ffc432008-12-09 23:35:17 +0000154 poll_interval=5, email_from=None, email_to=None):
mbligh5b618382008-12-03 15:24:01 +0000155 """
156 Run a list of test suites on a particular kernel.
157
158 Poll for them to complete, and return whether they worked or not.
159
160 pairings: list of MachineTestPairing objects to invoke
161 kernel: name of the kernel to run
162 kernel_label: label of the kernel to run
163 (<kernel-version> : <config> : <date>)
164 wait: boolean - wait for the results to come back?
165 poll_interval: interval between polling for job results (in minutes)
mbligh45ffc432008-12-09 23:35:17 +0000166 email_from: send notification email upon completion from here
167 email_from: send notification email upon completion to here
mbligh5b618382008-12-03 15:24:01 +0000168 """
169 jobs = []
170 for pairing in pairings:
mbligh45ffc432008-12-09 23:35:17 +0000171 job = self.invoke_test(pairing, kernel, kernel_label)
172 job.notified = False
173 jobs.append(job)
174 if email_from and email_to:
175 subject = 'Testing started: %s : %s' % (job.name, job.id)
176 utils.send_email(email_from, email_to, subject, subject)
mbligh5b618382008-12-03 15:24:01 +0000177 if not wait:
178 return
179 while True:
180 time.sleep(60 * poll_interval)
mbligh45ffc432008-12-09 23:35:17 +0000181 result = self.poll_all_jobs(jobs, email_from, email_to)
mbligh5b618382008-12-03 15:24:01 +0000182 if result is not None:
183 return result
184
185
mbligh45ffc432008-12-09 23:35:17 +0000186 def result_notify(self, job, email_from, email_to):
mbligh5b618382008-12-03 15:24:01 +0000187 """
mbligh45ffc432008-12-09 23:35:17 +0000188 Notify about the result of a job. Will always print, if email data
189 is provided, will send email for it as well.
190
191 job: job object to notify about
192 email_from: send notification email upon completion from here
193 email_from: send notification email upon completion to here
194 """
195 if job.result == True:
196 subject = 'Testing PASSED: '
197 else:
198 subject = 'Testing FAILED: '
199 subject += '%s : %s\n' % (job.name, job.id)
200 text = []
201 for platform in job.results_platform_map:
202 for status in job.results_platform_map[platform]:
203 if status == 'Total':
204 continue
205 hosts = ','.join(job.results_platform_map[platform][status])
206 text.append('%20s %10s %s' % (platform, status, hosts))
207 text.append("\nhttp://autotest/tko/compose_query.cgi?columns=test&rows=machine_group&condition=tag~'%s-%%25'&title=Report" % job.id)
208 body = "\n".join(text)
209 print "---------------------------------------------------"
210 print "Subject: ", subject
211 print body
212 print "---------------------------------------------------"
213 if email_from and email_to:
214 print "Sending email ..."
215 utils.send_email(email_from, email_to, subject, body)
216 print
217
218
219 def poll_all_jobs(self, jobs, email_from, email_to):
220 """
221 Poll all jobs in a list.
222 jobs: list of job objects to poll
223 email_from: send notification email upon completion from here
224 email_from: send notification email upon completion to here
225
226 Returns:
mbligh5b618382008-12-03 15:24:01 +0000227 a) All complete successfully (return True)
228 b) One or more has failed (return False)
229 c) Cannot tell yet (return None)
230 """
mbligh45ffc432008-12-09 23:35:17 +0000231 results = []
mbligh5b618382008-12-03 15:24:01 +0000232 for job in jobs:
mbligh45ffc432008-12-09 23:35:17 +0000233 job.result = self.poll_job_results(job, debug=False)
234 results.append(job.result)
235 if job.result is not None and not job.notified:
236 self.result_notify(job, email_from, email_to)
237 job.notified = True
238
239 if job.result is None:
240 print 'PENDING',
241 elif job.result == True:
242 print 'PASSED',
243 elif job.result == False:
244 print 'FAILED',
245 print ' %s : %s' % (job.id, job.name)
246
247 if None in results:
248 return None
249 elif False in results:
250 return False
251 else:
252 return True
mbligh5b618382008-12-03 15:24:01 +0000253
254
255 def invoke_test(self, pairing, kernel, kernel_label, priority='Medium'):
256 """
257 Given a pairing of a control file to a machine label, find all machines
258 with that label, and submit that control file to them.
259
260 Returns a job object
261 """
262 job_name = '%s : %s' % (pairing.machine_label, kernel_label)
263 hosts = self.get_hosts(multiple_labels=[pairing.machine_label])
mbligh45ffc432008-12-09 23:35:17 +0000264 host_list = [h.hostname for h in hosts if h.status != 'Repair Failed']
mbligh5b618382008-12-03 15:24:01 +0000265 new_job = self.create_job_by_test(name=job_name,
266 dependencies=[pairing.machine_label],
267 tests=[pairing.control_file],
268 priority=priority,
269 hosts=host_list,
270 kernel=kernel)
271 print 'Invoked test %s : %s' % (new_job.id, job_name)
272 return new_job
273
274
mbligh45ffc432008-12-09 23:35:17 +0000275 def poll_job_results(self, job, debug=False):
mbligh5b618382008-12-03 15:24:01 +0000276 """
277 Analyse all job results by platform, return:
278
279 False: if any platform has more than one failure
280 None: if any platform has more than one machine not yet Good.
281 True: if all platforms have at least all-but-one machines Good.
282 """
283 try:
mbligh45ffc432008-12-09 23:35:17 +0000284 job_statuses = self.get_host_queue_entries(job=job.id)
mbligh5b618382008-12-03 15:24:01 +0000285 except Exception:
286 print "Ignoring exception on poll job; RPC interface is flaky"
287 traceback.print_exc()
288 return None
289
290 platform_map = {}
291 for job_status in job_statuses:
292 hostname = job_status.host.hostname
293 status = job_status.status
294 platform = job_status.host.platform
295 if platform not in platform_map:
296 platform_map[platform] = {'Total' : [hostname]}
297 else:
298 platform_map[platform]['Total'].append(hostname)
299 new_host_list = platform_map[platform].get(status, []) + [hostname]
300 platform_map[platform][status] = new_host_list
mbligh45ffc432008-12-09 23:35:17 +0000301 job.results_platform_map = platform_map
mbligh5b618382008-12-03 15:24:01 +0000302
303 good_platforms = []
304 bad_platforms = []
305 unknown_platforms = []
306 for platform in platform_map:
307 total = len(platform_map[platform]['Total'])
308 completed = len(platform_map[platform].get('Completed', []))
309 failed = len(platform_map[platform].get('Failed', []))
310 if failed > 1:
311 bad_platforms.append(platform)
312 elif completed + 1 >= total:
313 # if all or all but one are good, call the job good.
314 good_platforms.append(platform)
315 else:
316 unknown_platforms.append(platform)
317 detail = []
318 for status in platform_map[platform]:
319 if status == 'Total':
320 continue
321 detail.append('%s=%s' % (status,platform_map[platform][status]))
322 if debug:
323 print '%20s %d/%d %s' % (platform, completed, total,
324 ' '.join(detail))
325 print
326
327 if len(bad_platforms) > 0:
328 if debug:
329 print 'Result bad - platforms: ' + ' '.join(bad_platforms)
330 return False
331 if len(unknown_platforms) > 0:
332 if debug:
333 platform_list = ' '.join(unknown_platforms)
334 print 'Result unknown - platforms: ', platform_list
335 return None
336 if debug:
337 platform_list = ' '.join(good_platforms)
338 print 'Result good - all platforms passed: ', platform_list
339 return True
340
341
mbligh67647152008-11-19 00:18:14 +0000342class rpc_object(object):
343 """
344 Generic object used to construct python objects from rpc calls
345 """
346 def __init__(self, afe, hash):
347 self.afe = afe
348 self.hash = hash
349 self.__dict__.update(hash)
350
351
352 def __str__(self):
353 return dump_object(self.__repr__(), self)
354
355
356class label(rpc_object):
357 """
358 AFE label object
359
360 Fields:
361 name, invalid, platform, kernel_config, id, only_if_needed
362 """
363 def __repr__(self):
364 return 'LABEL: %s' % self.name
365
366
367 def add_hosts(self, hosts):
368 return self.afe.run('label_add_hosts', self.id, hosts)
369
370
371 def remove_hosts(self, hosts):
372 return self.afe.run('label_remove_hosts', self.id, hosts)
373
374
375class acl(rpc_object):
376 """
377 AFE acl object
378
379 Fields:
380 users, hosts, description, name, id
381 """
382 def __repr__(self):
383 return 'ACL: %s' % self.name
384
385
386 def add_hosts(self, hosts):
387 self.afe.log('Adding hosts %s to ACL %s' % (hosts, self.name))
388 return self.afe.run('acl_group_add_hosts', self.id, hosts)
389
390
391 def remove_hosts(self, hosts):
392 self.afe.log('Removing hosts %s from ACL %s' % (hosts, self.name))
393 return self.afe.run('acl_group_remove_hosts', self.id, hosts)
394
395
396class job(rpc_object):
397 """
398 AFE job object
399
400 Fields:
401 name, control_file, control_type, synch_count, reboot_before,
402 run_verify, priority, email_list, created_on, dependencies,
403 timeout, owner, reboot_after, id
404 """
405 def __repr__(self):
406 return 'JOB: %s' % self.id
407
408
409class job_status(rpc_object):
410 """
411 AFE job_status object
412
413 Fields:
414 status, complete, deleted, meta_host, host, active, execution_subdir, id
415 """
416 def __init__(self, afe, hash):
417 # This should call super
418 self.afe = afe
419 self.hash = hash
420 self.__dict__.update(hash)
421 self.job = job(afe, self.job)
422 if self.host:
423 self.host = afe.get_hosts(hostname=self.host['hostname'])[0]
424
425
426 def __repr__(self):
427 return 'JOB STATUS: %s-%s' % (self.job.id, self.host.hostname)
428
429
430class host(rpc_object):
431 """
432 AFE host object
433
434 Fields:
435 status, lock_time, locked_by, locked, hostname, invalid,
436 synch_id, labels, platform, protection, dirty, id
437 """
438 def __repr__(self):
439 return 'HOST OBJECT: %s' % self.hostname
440
441
442 def show(self):
443 labels = list(set(self.labels) - set([self.platform]))
444 print '%-6s %-7s %-7s %-16s %s' % (self.hostname, self.status,
445 self.locked, self.platform,
446 ', '.join(labels))
447
448
449 def get_acls(self):
450 return self.afe.get_acls(hosts__hostname=self.hostname)
451
452
453 def add_acl(self, acl_name):
454 self.afe.log('Adding ACL %s to host %s' % (acl_name, self.hostname))
455 return self.afe.run('acl_group_add_hosts', id=acl_name,
456 hosts=[self.hostname])
457
458
459 def remove_acl(self, acl_name):
460 self.afe.log('Removing ACL %s from host %s' % (acl_name, self.hostname))
461 return self.afe.run('acl_group_remove_hosts', id=acl_name,
462 hosts=[self.hostname])
463
464
465 def get_labels(self):
466 return self.afe.get_labels(host__hostname__in=[self.hostname])
467
468
469 def add_labels(self, labels):
470 self.afe.log('Adding labels %s to host %s' % (labels, self.hostname))
471 return self.afe.run('host_add_labels', id=self.id, labels=labels)
472
473
474 def remove_labels(self, labels):
475 self.afe.log('Removing labels %s from host %s' % (labels,self.hostname))
476 return self.afe.run('host_remove_labels', id=self.id, labels=labels)
mbligh5b618382008-12-03 15:24:01 +0000477
478
479class MachineTestPairing(object):
480 """
481 Object representing the pairing of a machine label with a control file
482 """
483 def __init__(self, machine_label, control_file):
484 self.machine_label = machine_label
485 self.control_file = control_file