Blame - server/frontend.py - chromium.googlesource.com/chromiumos/third_party/labpack

blob: ee16e7661b590fa25a8fe96ce537cd0188c9804c [file] [log] [blame]

mbligh	6764715	2008-11-19 00:18:14 +0000	[diff] [blame]	1	# Copyright Martin J. Bligh, Google Inc 2008
				2	# Released under the GPL v2
				3
				4	"""
				5	This class allows you to communicate with the frontend to submit jobs etc
				6	It is designed for writing more sophisiticated server-side control files that
				7	can recursively add and manage other jobs.
				8
				9	We turn the JSON dictionaries into real objects that are more idiomatic
				10
				11	For docs, see http://autotest//afe/server/noauth/rpc/
				12	http://docs.djangoproject.com/en/dev/ref/models/querysets/#queryset-api
				13	"""
				14
mbligh	5b61838	2008-12-03 15:24:01 +0000	[diff] [blame]	15	import os, time, traceback
mbligh	6764715	2008-11-19 00:18:14 +0000	[diff] [blame]	16	import common
				17	from autotest_lib.frontend.afe import rpc_client_lib
				18	from autotest_lib.client.common_lib import utils
				19
				20
				21	def dump_object(header, obj):
				22	"""
				23	Standard way to print out the frontend objects (eg job, host, acl, label)
				24	in a human-readable fashion for debugging
				25	"""
				26	result = header + '\n'
				27	for key in obj.hash:
				28	if key == 'afe' or key == 'hash':
				29	continue
				30	result += '%20s: %s\n' % (key, obj.hash[key])
				31	return result
				32
				33
				34	class afe(object):
				35	"""
				36	AFE class for communicating with the autotest frontend
				37
				38	All the constructors go in the afe class.
				39	Manipulating methods go in the classes themselves
				40	"""
				41	def __init__(self, user=os.environ.get('LOGNAME'),
				42	web_server='http://autotest', print_log=True, debug=False):
				43	"""
				44	Create a cached instance of a connection to the AFE
				45
				46	user: username to connect as
				47	web_server: AFE instance to connect to
				48	print_log: pring a logging message to stdout on every operation
				49	debug: print out all RPC traffic
				50	"""
				51	self.user = user
				52	self.print_log = print_log
				53	self.debug = debug
				54	headers = {'AUTHORIZATION' : self.user}
				55	rpc_server = web_server + '/afe/server/noauth/rpc/'
				56	self.proxy = rpc_client_lib.get_proxy(rpc_server, headers=headers)
				57
				58
				59	def run(self, call, **dargs):
				60	"""
				61	Make a RPC call to the AFE server
				62	"""
				63	rpc_call = getattr(self.proxy, call)
				64	if self.debug:
				65	print 'DEBUG: %s %s' % (call, dargs)
				66	return utils.strip_unicode(rpc_call(**dargs))
				67
				68
				69	def log(self, message):
				70	if self.print_log:
				71	print message
				72
				73
				74	def host_statuses(self, live=None):
				75	dead_statuses = ['Dead', 'Repair Failed']
				76	statuses = self.run('get_static_data')['host_statuses']
				77	if live == True:
				78	return list(set(statuses) - set(['Dead', 'Repair Failed']))
				79	if live == False:
				80	return dead_statuses
				81	else:
				82	return statuses
				83
				84
				85	def get_hosts(self, **dargs):
				86	hosts = self.run('get_hosts', **dargs)
				87	return [host(self, h) for h in hosts]
				88
				89
				90	def create_host(self, hostname, **dargs):
				91	id = self.run('add_host', **dargs)
				92	return self.get_hosts(id=id)[0]
				93
				94
				95	def get_labels(self, **dargs):
				96	labels = self.run('get_labels', **dargs)
				97	return [label(self, l) for l in labels]
				98
				99
				100	def create_label(self, name, **dargs):
				101	id = self.run('add_label', **dargs)
				102	return self.get_labels(id=id)[0]
				103
				104
				105	def get_acls(self, **dargs):
				106	acls = self.run('get_acl_groups', **dargs)
				107	return [acl(self, a) for a in acls]
				108
				109
				110	def create_acl(self, name, **dargs):
				111	id = self.run('add_acl_group', **dargs)
				112	return self.get_acls(id=id)[0]
				113
				114
				115	def get_jobs(self, summary=False, **dargs):
				116	if summary:
				117	jobs_data = self.run('get_jobs_summary', **dargs)
				118	else:
				119	jobs_data = self.run('get_jobs', **dargs)
				120	return [job(self, j) for j in jobs_data]
				121
				122
				123	def get_host_queue_entries(self, **data):
				124	entries = self.run('get_host_queue_entries', **data)
				125	return [job_status(self, e) for e in entries]
				126
				127
				128	def create_job_by_test(self, tests, kernel=None, **dargs):
				129	"""
				130	Given a test name, fetch the appropriate control file from the server
				131	and submit it
				132	"""
				133	results = self.run('generate_control_file', tests=tests, kernel=kernel,
				134	use_container=False, do_push_packages=True)
				135	if results['is_server']:
				136	dargs['control_type'] = 'Server'
				137	else:
				138	dargs['control_type'] = 'Client'
				139	dargs['dependencies'] = dargs.get('dependencies', []) + \
				140	results['dependencies']
				141	dargs['control_file'] = results['control_file']
				142	dargs['synch_count'] = results['synch_count']
				143	return self.create_job(**dargs)
				144
				145
				146	def create_job(self, control_file, name=' ', priority='Medium',
				147	control_type='Client', **dargs):
				148	id = self.run('create_job', name=name, priority=priority,
				149	control_file=control_file, control_type=control_type, **dargs)
				150	return self.get_jobs(id=id)[0]
				151
				152
mbligh	5b61838	2008-12-03 15:24:01 +0000	[diff] [blame]	153	def run_test_suites(self, pairings, kernel, kernel_label, wait=True,
mbligh	45ffc43	2008-12-09 23:35:17 +0000	[diff] [blame^]	154	poll_interval=5, email_from=None, email_to=None):
mbligh	5b61838	2008-12-03 15:24:01 +0000	[diff] [blame]	155	"""
				156	Run a list of test suites on a particular kernel.
				157
				158	Poll for them to complete, and return whether they worked or not.
				159
				160	pairings: list of MachineTestPairing objects to invoke
				161	kernel: name of the kernel to run
				162	kernel_label: label of the kernel to run
				163	(<kernel-version> : <config> : <date>)
				164	wait: boolean - wait for the results to come back?
				165	poll_interval: interval between polling for job results (in minutes)
mbligh	45ffc43	2008-12-09 23:35:17 +0000	[diff] [blame^]	166	email_from: send notification email upon completion from here
				167	email_from: send notification email upon completion to here
mbligh	5b61838	2008-12-03 15:24:01 +0000	[diff] [blame]	168	"""
				169	jobs = []
				170	for pairing in pairings:
mbligh	45ffc43	2008-12-09 23:35:17 +0000	[diff] [blame^]	171	job = self.invoke_test(pairing, kernel, kernel_label)
				172	job.notified = False
				173	jobs.append(job)
				174	if email_from and email_to:
				175	subject = 'Testing started: %s : %s' % (job.name, job.id)
				176	utils.send_email(email_from, email_to, subject, subject)
mbligh	5b61838	2008-12-03 15:24:01 +0000	[diff] [blame]	177	if not wait:
				178	return
				179	while True:
				180	time.sleep(60 * poll_interval)
mbligh	45ffc43	2008-12-09 23:35:17 +0000	[diff] [blame^]	181	result = self.poll_all_jobs(jobs, email_from, email_to)
mbligh	5b61838	2008-12-03 15:24:01 +0000	[diff] [blame]	182	if result is not None:
				183	return result
				184
				185
mbligh	45ffc43	2008-12-09 23:35:17 +0000	[diff] [blame^]	186	def result_notify(self, job, email_from, email_to):
mbligh	5b61838	2008-12-03 15:24:01 +0000	[diff] [blame]	187	"""
mbligh	45ffc43	2008-12-09 23:35:17 +0000	[diff] [blame^]	188	Notify about the result of a job. Will always print, if email data
				189	is provided, will send email for it as well.
				190
				191	job: job object to notify about
				192	email_from: send notification email upon completion from here
				193	email_from: send notification email upon completion to here
				194	"""
				195	if job.result == True:
				196	subject = 'Testing PASSED: '
				197	else:
				198	subject = 'Testing FAILED: '
				199	subject += '%s : %s\n' % (job.name, job.id)
				200	text = []
				201	for platform in job.results_platform_map:
				202	for status in job.results_platform_map[platform]:
				203	if status == 'Total':
				204	continue
				205	hosts = ','.join(job.results_platform_map[platform][status])
				206	text.append('%20s %10s %s' % (platform, status, hosts))
				207	text.append("\nhttp://autotest/tko/compose_query.cgi?columns=test&rows=machine_group&condition=tag~'%s-%%25'&title=Report" % job.id)
				208	body = "\n".join(text)
				209	print "---------------------------------------------------"
				210	print "Subject: ", subject
				211	print body
				212	print "---------------------------------------------------"
				213	if email_from and email_to:
				214	print "Sending email ..."
				215	utils.send_email(email_from, email_to, subject, body)
				216	print
				217
				218
				219	def poll_all_jobs(self, jobs, email_from, email_to):
				220	"""
				221	Poll all jobs in a list.
				222	jobs: list of job objects to poll
				223	email_from: send notification email upon completion from here
				224	email_from: send notification email upon completion to here
				225
				226	Returns:
mbligh	5b61838	2008-12-03 15:24:01 +0000	[diff] [blame]	227	a) All complete successfully (return True)
				228	b) One or more has failed (return False)
				229	c) Cannot tell yet (return None)
				230	"""
mbligh	45ffc43	2008-12-09 23:35:17 +0000	[diff] [blame^]	231	results = []
mbligh	5b61838	2008-12-03 15:24:01 +0000	[diff] [blame]	232	for job in jobs:
mbligh	45ffc43	2008-12-09 23:35:17 +0000	[diff] [blame^]	233	job.result = self.poll_job_results(job, debug=False)
				234	results.append(job.result)
				235	if job.result is not None and not job.notified:
				236	self.result_notify(job, email_from, email_to)
				237	job.notified = True
				238
				239	if job.result is None:
				240	print 'PENDING',
				241	elif job.result == True:
				242	print 'PASSED',
				243	elif job.result == False:
				244	print 'FAILED',
				245	print ' %s : %s' % (job.id, job.name)
				246
				247	if None in results:
				248	return None
				249	elif False in results:
				250	return False
				251	else:
				252	return True
mbligh	5b61838	2008-12-03 15:24:01 +0000	[diff] [blame]	253
				254
				255	def invoke_test(self, pairing, kernel, kernel_label, priority='Medium'):
				256	"""
				257	Given a pairing of a control file to a machine label, find all machines
				258	with that label, and submit that control file to them.
				259
				260	Returns a job object
				261	"""
				262	job_name = '%s : %s' % (pairing.machine_label, kernel_label)
				263	hosts = self.get_hosts(multiple_labels=[pairing.machine_label])
mbligh	45ffc43	2008-12-09 23:35:17 +0000	[diff] [blame^]	264	host_list = [h.hostname for h in hosts if h.status != 'Repair Failed']
mbligh	5b61838	2008-12-03 15:24:01 +0000	[diff] [blame]	265	new_job = self.create_job_by_test(name=job_name,
				266	dependencies=[pairing.machine_label],
				267	tests=[pairing.control_file],
				268	priority=priority,
				269	hosts=host_list,
				270	kernel=kernel)
				271	print 'Invoked test %s : %s' % (new_job.id, job_name)
				272	return new_job
				273
				274
mbligh	45ffc43	2008-12-09 23:35:17 +0000	[diff] [blame^]	275	def poll_job_results(self, job, debug=False):
mbligh	5b61838	2008-12-03 15:24:01 +0000	[diff] [blame]	276	"""
				277	Analyse all job results by platform, return:
				278
				279	False: if any platform has more than one failure
				280	None: if any platform has more than one machine not yet Good.
				281	True: if all platforms have at least all-but-one machines Good.
				282	"""
				283	try:
mbligh	45ffc43	2008-12-09 23:35:17 +0000	[diff] [blame^]	284	job_statuses = self.get_host_queue_entries(job=job.id)
mbligh	5b61838	2008-12-03 15:24:01 +0000	[diff] [blame]	285	except Exception:
				286	print "Ignoring exception on poll job; RPC interface is flaky"
				287	traceback.print_exc()
				288	return None
				289
				290	platform_map = {}
				291	for job_status in job_statuses:
				292	hostname = job_status.host.hostname
				293	status = job_status.status
				294	platform = job_status.host.platform
				295	if platform not in platform_map:
				296	platform_map[platform] = {'Total' : [hostname]}
				297	else:
				298	platform_map[platform]['Total'].append(hostname)
				299	new_host_list = platform_map[platform].get(status, []) + [hostname]
				300	platform_map[platform][status] = new_host_list
mbligh	45ffc43	2008-12-09 23:35:17 +0000	[diff] [blame^]	301	job.results_platform_map = platform_map
mbligh	5b61838	2008-12-03 15:24:01 +0000	[diff] [blame]	302
				303	good_platforms = []
				304	bad_platforms = []
				305	unknown_platforms = []
				306	for platform in platform_map:
				307	total = len(platform_map[platform]['Total'])
				308	completed = len(platform_map[platform].get('Completed', []))
				309	failed = len(platform_map[platform].get('Failed', []))
				310	if failed > 1:
				311	bad_platforms.append(platform)
				312	elif completed + 1 >= total:
				313	# if all or all but one are good, call the job good.
				314	good_platforms.append(platform)
				315	else:
				316	unknown_platforms.append(platform)
				317	detail = []
				318	for status in platform_map[platform]:
				319	if status == 'Total':
				320	continue
				321	detail.append('%s=%s' % (status,platform_map[platform][status]))
				322	if debug:
				323	print '%20s %d/%d %s' % (platform, completed, total,
				324	' '.join(detail))
				325	print
				326
				327	if len(bad_platforms) > 0:
				328	if debug:
				329	print 'Result bad - platforms: ' + ' '.join(bad_platforms)
				330	return False
				331	if len(unknown_platforms) > 0:
				332	if debug:
				333	platform_list = ' '.join(unknown_platforms)
				334	print 'Result unknown - platforms: ', platform_list
				335	return None
				336	if debug:
				337	platform_list = ' '.join(good_platforms)
				338	print 'Result good - all platforms passed: ', platform_list
				339	return True
				340
				341
mbligh	6764715	2008-11-19 00:18:14 +0000	[diff] [blame]	342	class rpc_object(object):
				343	"""
				344	Generic object used to construct python objects from rpc calls
				345	"""
				346	def __init__(self, afe, hash):
				347	self.afe = afe
				348	self.hash = hash
				349	self.__dict__.update(hash)
				350
				351
				352	def __str__(self):
				353	return dump_object(self.__repr__(), self)
				354
				355
				356	class label(rpc_object):
				357	"""
				358	AFE label object
				359
				360	Fields:
				361	name, invalid, platform, kernel_config, id, only_if_needed
				362	"""
				363	def __repr__(self):
				364	return 'LABEL: %s' % self.name
				365
				366
				367	def add_hosts(self, hosts):
				368	return self.afe.run('label_add_hosts', self.id, hosts)
				369
				370
				371	def remove_hosts(self, hosts):
				372	return self.afe.run('label_remove_hosts', self.id, hosts)
				373
				374
				375	class acl(rpc_object):
				376	"""
				377	AFE acl object
				378
				379	Fields:
				380	users, hosts, description, name, id
				381	"""
				382	def __repr__(self):
				383	return 'ACL: %s' % self.name
				384
				385
				386	def add_hosts(self, hosts):
				387	self.afe.log('Adding hosts %s to ACL %s' % (hosts, self.name))
				388	return self.afe.run('acl_group_add_hosts', self.id, hosts)
				389
				390
				391	def remove_hosts(self, hosts):
				392	self.afe.log('Removing hosts %s from ACL %s' % (hosts, self.name))
				393	return self.afe.run('acl_group_remove_hosts', self.id, hosts)
				394
				395
				396	class job(rpc_object):
				397	"""
				398	AFE job object
				399
				400	Fields:
				401	name, control_file, control_type, synch_count, reboot_before,
				402	run_verify, priority, email_list, created_on, dependencies,
				403	timeout, owner, reboot_after, id
				404	"""
				405	def __repr__(self):
				406	return 'JOB: %s' % self.id
				407
				408
				409	class job_status(rpc_object):
				410	"""
				411	AFE job_status object
				412
				413	Fields:
				414	status, complete, deleted, meta_host, host, active, execution_subdir, id
				415	"""
				416	def __init__(self, afe, hash):
				417	# This should call super
				418	self.afe = afe
				419	self.hash = hash
				420	self.__dict__.update(hash)
				421	self.job = job(afe, self.job)
				422	if self.host:
				423	self.host = afe.get_hosts(hostname=self.host['hostname'])[0]
				424
				425
				426	def __repr__(self):
				427	return 'JOB STATUS: %s-%s' % (self.job.id, self.host.hostname)
				428
				429
				430	class host(rpc_object):
				431	"""
				432	AFE host object
				433
				434	Fields:
				435	status, lock_time, locked_by, locked, hostname, invalid,
				436	synch_id, labels, platform, protection, dirty, id
				437	"""
				438	def __repr__(self):
				439	return 'HOST OBJECT: %s' % self.hostname
				440
				441
				442	def show(self):
				443	labels = list(set(self.labels) - set([self.platform]))
				444	print '%-6s %-7s %-7s %-16s %s' % (self.hostname, self.status,
				445	self.locked, self.platform,
				446	', '.join(labels))
				447
				448
				449	def get_acls(self):
				450	return self.afe.get_acls(hosts__hostname=self.hostname)
				451
				452
				453	def add_acl(self, acl_name):
				454	self.afe.log('Adding ACL %s to host %s' % (acl_name, self.hostname))
				455	return self.afe.run('acl_group_add_hosts', id=acl_name,
				456	hosts=[self.hostname])
				457
				458
				459	def remove_acl(self, acl_name):
				460	self.afe.log('Removing ACL %s from host %s' % (acl_name, self.hostname))
				461	return self.afe.run('acl_group_remove_hosts', id=acl_name,
				462	hosts=[self.hostname])
				463
				464
				465	def get_labels(self):
				466	return self.afe.get_labels(host__hostname__in=[self.hostname])
				467
				468
				469	def add_labels(self, labels):
				470	self.afe.log('Adding labels %s to host %s' % (labels, self.hostname))
				471	return self.afe.run('host_add_labels', id=self.id, labels=labels)
				472
				473
				474	def remove_labels(self, labels):
				475	self.afe.log('Removing labels %s from host %s' % (labels,self.hostname))
				476	return self.afe.run('host_remove_labels', id=self.id, labels=labels)
mbligh	5b61838	2008-12-03 15:24:01 +0000	[diff] [blame]	477
				478
				479	class MachineTestPairing(object):
				480	"""
				481	Object representing the pairing of a machine label with a control file
				482	"""
				483	def __init__(self, machine_label, control_file):
				484	self.machine_label = machine_label
				485	self.control_file = control_file