Blame - scripts/decodetree.py - chromium.googlesource.com/chromiumos/third_party/qemu

blob: 457cffea90aff462debe7e36a6fb9e46d9f8a6a6 [file] [log] [blame]

Richard Henderson	568ae7e	2017-12-07 12:44:09 -0800	[diff] [blame]	1	#!/usr/bin/env python
				2	# Copyright (c) 2018 Linaro Limited
				3	#
				4	# This library is free software; you can redistribute it and/or
				5	# modify it under the terms of the GNU Lesser General Public
				6	# License as published by the Free Software Foundation; either
				7	# version 2 of the License, or (at your option) any later version.
				8	#
				9	# This library is distributed in the hope that it will be useful,
				10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				12	# Lesser General Public License for more details.
				13	#
				14	# You should have received a copy of the GNU Lesser General Public
				15	# License along with this library; if not, see <http://www.gnu.org/licenses/>.
				16	#
				17
				18	#
				19	# Generate a decoding tree from a specification file.
				20	#
				21	# The tree is built from instruction "patterns". A pattern may represent
				22	# a single architectural instruction or a group of same, depending on what
				23	# is convenient for further processing.
				24	#
				25	# Each pattern has "fixedbits" & "fixedmask", the combination of which
				26	# describes the condition under which the pattern is matched:
				27	#
				28	# (insn & fixedmask) == fixedbits
				29	#
				30	# Each pattern may have "fields", which are extracted from the insn and
				31	# passed along to the translator. Examples of such are registers,
				32	# immediates, and sub-opcodes.
				33	#
				34	# In support of patterns, one may declare fields, argument sets, and
				35	# formats, each of which may be re-used to simplify further definitions.
				36	#
				37	# *** Field syntax:
				38	#
				39	# field_def := '%' identifier ( unnamed_field )+ ( !function=identifier )?
				40	# unnamed_field := number ':' ( 's' ) number
				41	#
				42	# For unnamed_field, the first number is the least-significant bit position of
				43	# the field and the second number is the length of the field. If the 's' is
				44	# present, the field is considered signed. If multiple unnamed_fields are
				45	# present, they are concatenated. In this way one can define disjoint fields.
				46	#
				47	# If !function is specified, the concatenated result is passed through the
				48	# named function, taking and returning an integral value.
				49	#
				50	# FIXME: the fields of the structure into which this result will be stored
				51	# is restricted to "int". Which means that we cannot expand 64-bit items.
				52	#
				53	# Field examples:
				54	#
				55	# %disp 0:s16 -- sextract(i, 0, 16)
				56	# %imm9 16:6 10:3 -- extract(i, 16, 6) << 3 \| extract(i, 10, 3)
				57	# %disp12 0:s1 1:1 2:10 -- sextract(i, 0, 1) << 11
				58	# \| extract(i, 1, 1) << 10
				59	# \| extract(i, 2, 10)
				60	# %shimm8 5:s8 13:1 !function=expand_shimm8
				61	# -- expand_shimm8(sextract(i, 5, 8) << 1
				62	# \| extract(i, 13, 1))
				63	#
				64	# *** Argument set syntax:
				65	#
				66	# args_def := '&' identifier ( args_elt )+
				67	# args_elt := identifier
				68	#
				69	# Each args_elt defines an argument within the argument set.
				70	# Each argument set will be rendered as a C structure "arg_$name"
				71	# with each of the fields being one of the member arguments.
				72	#
				73	# Argument set examples:
				74	#
				75	# &reg3 ra rb rc
				76	# &loadstore reg base offset
				77	#
				78	# *** Format syntax:
				79	#
				80	# fmt_def := '@' identifier ( fmt_elt )+
				81	# fmt_elt := fixedbit_elt \| field_elt \| field_ref \| args_ref
				82	# fixedbit_elt := [01.-]+
				83	# field_elt := identifier ':' 's'? number
				84	# field_ref := '%' identifier \| identifier '=' '%' identifier
				85	# args_ref := '&' identifier
				86	#
				87	# Defining a format is a handy way to avoid replicating groups of fields
				88	# across many instruction patterns.
				89	#
				90	# A fixedbit_elt describes a contiguous sequence of bits that must
				91	# be 1, 0, [.-] for don't care. The difference between '.' and '-'
				92	# is that '.' means that the bit will be covered with a field or a
				93	# final [01] from the pattern, and '-' means that the bit is really
				94	# ignored by the cpu and will not be specified.
				95	#
				96	# A field_elt describes a simple field only given a width; the position of
				97	# the field is implied by its position with respect to other fixedbit_elt
				98	# and field_elt.
				99	#
				100	# If any fixedbit_elt or field_elt appear then all bits must be defined.
				101	# Padding with a fixedbit_elt of all '.' is an easy way to accomplish that.
				102	#
				103	# A field_ref incorporates a field by reference. This is the only way to
				104	# add a complex field to a format. A field may be renamed in the process
				105	# via assignment to another identifier. This is intended to allow the
				106	# same argument set be used with disjoint named fields.
				107	#
				108	# A single args_ref may specify an argument set to use for the format.
				109	# The set of fields in the format must be a subset of the arguments in
				110	# the argument set. If an argument set is not specified, one will be
				111	# inferred from the set of fields.
				112	#
				113	# It is recommended, but not required, that all field_ref and args_ref
				114	# appear at the end of the line, not interleaving with fixedbit_elf or
				115	# field_elt.
				116	#
				117	# Format examples:
				118	#
				119	# @opr ...... ra:5 rb:5 ... 0 ....... rc:5
				120	# @opi ...... ra:5 lit:8 1 ....... rc:5
				121	#
				122	# *** Pattern syntax:
				123	#
				124	# pat_def := identifier ( pat_elt )+
				125	# pat_elt := fixedbit_elt \| field_elt \| field_ref
				126	# \| args_ref \| fmt_ref \| const_elt
				127	# fmt_ref := '@' identifier
				128	# const_elt := identifier '=' number
				129	#
				130	# The fixedbit_elt and field_elt specifiers are unchanged from formats.
				131	# A pattern that does not specify a named format will have one inferred
				132	# from a referenced argument set (if present) and the set of fields.
				133	#
				134	# A const_elt allows a argument to be set to a constant value. This may
				135	# come in handy when fields overlap between patterns and one has to
				136	# include the values in the fixedbit_elt instead.
				137	#
				138	# The decoder will call a translator function for each pattern matched.
				139	#
				140	# Pattern examples:
				141	#
				142	# addl_r 010000 ..... ..... .... 0000000 ..... @opr
				143	# addl_i 010000 ..... ..... .... 0000000 ..... @opi
				144	#
				145	# which will, in part, invoke
				146	#
				147	# trans_addl_r(ctx, &arg_opr, insn)
				148	# and
				149	# trans_addl_i(ctx, &arg_opi, insn)
				150	#
				151
Richard Henderson	568ae7e	2017-12-07 12:44:09 -0800	[diff] [blame]	152	import os
				153	import re
				154	import sys
				155	import getopt
Richard Henderson	568ae7e	2017-12-07 12:44:09 -0800	[diff] [blame]	156
				157	insnwidth = 32
				158	insnmask = 0xffffffff
				159	fields = {}
				160	arguments = {}
				161	formats = {}
				162	patterns = []
				163
				164	translate_prefix = 'trans'
				165	translate_scope = 'static '
				166	input_file = ''
				167	output_file = None
				168	output_fd = None
				169	insntype = 'uint32_t'
				170
				171	re_ident = '[a-zA-Z][a-zA-Z0-9_]*'
				172
				173
				174	def error(lineno, *args):
				175	"""Print an error message from file:line and args and exit."""
				176	global output_file
				177	global output_fd
				178
				179	if lineno:
				180	r = '{0}:{1}: error:'.format(input_file, lineno)
				181	elif input_file:
				182	r = '{0}: error:'.format(input_file)
				183	else:
				184	r = 'error:'
				185	for a in args:
				186	r += ' ' + str(a)
				187	r += '\n'
				188	sys.stderr.write(r)
				189	if output_file and output_fd:
				190	output_fd.close()
				191	os.remove(output_file)
				192	exit(1)
				193
				194
				195	def output(*args):
				196	global output_fd
				197	for a in args:
				198	output_fd.write(a)
				199
				200
				201	if sys.version_info >= (3, 0):
				202	re_fullmatch = re.fullmatch
				203	else:
				204	def re_fullmatch(pat, str):
				205	return re.match('^' + pat + '$', str)
				206
				207
				208	def output_autogen():
				209	output('/* This file is autogenerated by scripts/decodetree.py. */\n\n')
				210
				211
				212	def str_indent(c):
				213	"""Return a string with C spaces"""
				214	return ' ' * c
				215
				216
				217	def str_fields(fields):
				218	"""Return a string uniquely identifing FIELDS"""
				219	r = ''
				220	for n in sorted(fields.keys()):
				221	r += '_' + n
				222	return r[1:]
				223
				224
				225	def str_match_bits(bits, mask):
				226	"""Return a string pretty-printing BITS/MASK"""
				227	global insnwidth
				228
				229	i = 1 << (insnwidth - 1)
				230	space = 0x01010100
				231	r = ''
				232	while i != 0:
				233	if i & mask:
				234	if i & bits:
				235	r += '1'
				236	else:
				237	r += '0'
				238	else:
				239	r += '.'
				240	if i & space:
				241	r += ' '
				242	i >>= 1
				243	return r
				244
				245
				246	def is_pow2(x):
				247	"""Return true iff X is equal to a power of 2."""
				248	return (x & (x - 1)) == 0
				249
				250
				251	def ctz(x):
				252	"""Return the number of times 2 factors into X."""
				253	r = 0
				254	while ((x >> r) & 1) == 0:
				255	r += 1
				256	return r
				257
				258
				259	def is_contiguous(bits):
				260	shift = ctz(bits)
				261	if is_pow2((bits >> shift) + 1):
				262	return shift
				263	else:
				264	return -1
				265
				266
				267	def eq_fields_for_args(flds_a, flds_b):
				268	if len(flds_a) != len(flds_b):
				269	return False
				270	for k, a in flds_a.items():
				271	if k not in flds_b:
				272	return False
				273	return True
				274
				275
				276	def eq_fields_for_fmts(flds_a, flds_b):
				277	if len(flds_a) != len(flds_b):
				278	return False
				279	for k, a in flds_a.items():
				280	if k not in flds_b:
				281	return False
				282	b = flds_b[k]
				283	if a.__class__ != b.__class__ or a != b:
				284	return False
				285	return True
				286
				287
				288	class Field:
				289	"""Class representing a simple instruction field"""
				290	def __init__(self, sign, pos, len):
				291	self.sign = sign
				292	self.pos = pos
				293	self.len = len
				294	self.mask = ((1 << len) - 1) << pos
				295
				296	def __str__(self):
				297	if self.sign:
				298	s = 's'
				299	else:
				300	s = ''
				301	return str(pos) + ':' + s + str(len)
				302
				303	def str_extract(self):
				304	if self.sign:
				305	extr = 'sextract32'
				306	else:
				307	extr = 'extract32'
				308	return '{0}(insn, {1}, {2})'.format(extr, self.pos, self.len)
				309
				310	def __eq__(self, other):
				311	return self.sign == other.sign and self.sign == other.sign
				312
				313	def __ne__(self, other):
				314	return not self.__eq__(other)
				315	# end Field
				316
				317
				318	class MultiField:
				319	"""Class representing a compound instruction field"""
				320	def __init__(self, subs, mask):
				321	self.subs = subs
				322	self.sign = subs[0].sign
				323	self.mask = mask
				324
				325	def __str__(self):
				326	return str(self.subs)
				327
				328	def str_extract(self):
				329	ret = '0'
				330	pos = 0
				331	for f in reversed(self.subs):
				332	if pos == 0:
				333	ret = f.str_extract()
				334	else:
				335	ret = 'deposit32({0}, {1}, {2}, {3})' \
				336	.format(ret, pos, 32 - pos, f.str_extract())
				337	pos += f.len
				338	return ret
				339
				340	def __ne__(self, other):
				341	if len(self.subs) != len(other.subs):
				342	return True
				343	for a, b in zip(self.subs, other.subs):
				344	if a.__class__ != b.__class__ or a != b:
				345	return True
				346	return False
				347
				348	def __eq__(self, other):
				349	return not self.__ne__(other)
				350	# end MultiField
				351
				352
				353	class ConstField:
				354	"""Class representing an argument field with constant value"""
				355	def __init__(self, value):
				356	self.value = value
				357	self.mask = 0
				358	self.sign = value < 0
				359
				360	def __str__(self):
				361	return str(self.value)
				362
				363	def str_extract(self):
				364	return str(self.value)
				365
				366	def __cmp__(self, other):
				367	return self.value - other.value
				368	# end ConstField
				369
				370
				371	class FunctionField:
				372	"""Class representing a field passed through an expander"""
				373	def __init__(self, func, base):
				374	self.mask = base.mask
				375	self.sign = base.sign
				376	self.base = base
				377	self.func = func
				378
				379	def __str__(self):
				380	return self.func + '(' + str(self.base) + ')'
				381
				382	def str_extract(self):
				383	return self.func + '(' + self.base.str_extract() + ')'
				384
				385	def __eq__(self, other):
				386	return self.func == other.func and self.base == other.base
				387
				388	def __ne__(self, other):
				389	return not self.__eq__(other)
				390	# end FunctionField
				391
				392
				393	class Arguments:
				394	"""Class representing the extracted fields of a format"""
				395	def __init__(self, nm, flds):
				396	self.name = nm
				397	self.fields = sorted(flds)
				398
				399	def __str__(self):
				400	return self.name + ' ' + str(self.fields)
				401
				402	def struct_name(self):
				403	return 'arg_' + self.name
				404
				405	def output_def(self):
				406	output('typedef struct {\n')
				407	for n in self.fields:
				408	output(' int ', n, ';\n')
				409	output('} ', self.struct_name(), ';\n\n')
				410	# end Arguments
				411
				412
				413	class General:
				414	"""Common code between instruction formats and instruction patterns"""
				415	def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds):
				416	self.name = name
				417	self.lineno = lineno
				418	self.base = base
				419	self.fixedbits = fixb
				420	self.fixedmask = fixm
				421	self.undefmask = udfm
				422	self.fieldmask = fldm
				423	self.fields = flds
				424
				425	def __str__(self):
				426	r = self.name
				427	if self.base:
				428	r = r + ' ' + self.base.name
				429	else:
				430	r = r + ' ' + str(self.fields)
				431	r = r + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
				432	return r
				433
				434	def str1(self, i):
				435	return str_indent(i) + self.__str__()
				436	# end General
				437
				438
				439	class Format(General):
				440	"""Class representing an instruction format"""
				441
				442	def extract_name(self):
				443	return 'extract_' + self.name
				444
				445	def output_extract(self):
				446	output('static void ', self.extract_name(), '(',
				447	self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
				448	for n, f in self.fields.items():
				449	output(' a->', n, ' = ', f.str_extract(), ';\n')
				450	output('}\n\n')
				451	# end Format
				452
				453
				454	class Pattern(General):
				455	"""Class representing an instruction pattern"""
				456
				457	def output_decl(self):
				458	global translate_scope
				459	global translate_prefix
				460	output('typedef ', self.base.base.struct_name(),
				461	' arg_', self.name, ';\n')
Richard Henderson	7680559	2018-03-02 10:45:35 +0000	[diff] [blame]	462	output(translate_scope, 'bool ', translate_prefix, '_', self.name,
Richard Henderson	568ae7e	2017-12-07 12:44:09 -0800	[diff] [blame]	463	'(DisasContext *ctx, arg_', self.name,
				464	' *a, ', insntype, ' insn);\n')
				465
				466	def output_code(self, i, extracted, outerbits, outermask):
				467	global translate_prefix
				468	ind = str_indent(i)
				469	arg = self.base.base.name
				470	output(ind, '/* line ', str(self.lineno), ' */\n')
				471	if not extracted:
				472	output(ind, self.base.extract_name(), '(&u.f_', arg, ', insn);\n')
				473	for n, f in self.fields.items():
				474	output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
Richard Henderson	7680559	2018-03-02 10:45:35 +0000	[diff] [blame]	475	output(ind, 'return ', translate_prefix, '_', self.name,
Richard Henderson	568ae7e	2017-12-07 12:44:09 -0800	[diff] [blame]	476	'(ctx, &u.f_', arg, ', insn);\n')
Richard Henderson	568ae7e	2017-12-07 12:44:09 -0800	[diff] [blame]	477	# end Pattern
				478
				479
				480	def parse_field(lineno, name, toks):
				481	"""Parse one instruction field from TOKS at LINENO"""
				482	global fields
				483	global re_ident
				484	global insnwidth
				485
				486	# A "simple" field will have only one entry;
				487	# a "multifield" will have several.
				488	subs = []
				489	width = 0
				490	func = None
				491	for t in toks:
				492	if re_fullmatch('!function=' + re_ident, t):
				493	if func:
				494	error(lineno, 'duplicate function')
				495	func = t.split('=')
				496	func = func[1]
				497	continue
				498
				499	if re_fullmatch('[0-9]+:s[0-9]+', t):
				500	# Signed field extract
				501	subtoks = t.split(':s')
				502	sign = True
				503	elif re_fullmatch('[0-9]+:[0-9]+', t):
				504	# Unsigned field extract
				505	subtoks = t.split(':')
				506	sign = False
				507	else:
				508	error(lineno, 'invalid field token "{0}"'.format(t))
				509	po = int(subtoks[0])
				510	le = int(subtoks[1])
				511	if po + le > insnwidth:
				512	error(lineno, 'field {0} too large'.format(t))
				513	f = Field(sign, po, le)
				514	subs.append(f)
				515	width += le
				516
				517	if width > insnwidth:
				518	error(lineno, 'field too large')
				519	if len(subs) == 1:
				520	f = subs[0]
				521	else:
				522	mask = 0
				523	for s in subs:
				524	if mask & s.mask:
				525	error(lineno, 'field components overlap')
				526	mask \|= s.mask
				527	f = MultiField(subs, mask)
				528	if func:
				529	f = FunctionField(func, f)
				530
				531	if name in fields:
				532	error(lineno, 'duplicate field', name)
				533	fields[name] = f
				534	# end parse_field
				535
				536
				537	def parse_arguments(lineno, name, toks):
				538	"""Parse one argument set from TOKS at LINENO"""
				539	global arguments
				540	global re_ident
				541
				542	flds = []
				543	for t in toks:
				544	if not re_fullmatch(re_ident, t):
				545	error(lineno, 'invalid argument set token "{0}"'.format(t))
				546	if t in flds:
				547	error(lineno, 'duplicate argument "{0}"'.format(t))
				548	flds.append(t)
				549
				550	if name in arguments:
				551	error(lineno, 'duplicate argument set', name)
				552	arguments[name] = Arguments(name, flds)
				553	# end parse_arguments
				554
				555
				556	def lookup_field(lineno, name):
				557	global fields
				558	if name in fields:
				559	return fields[name]
				560	error(lineno, 'undefined field', name)
				561
				562
				563	def add_field(lineno, flds, new_name, f):
				564	if new_name in flds:
				565	error(lineno, 'duplicate field', new_name)
				566	flds[new_name] = f
				567	return flds
				568
				569
				570	def add_field_byname(lineno, flds, new_name, old_name):
				571	return add_field(lineno, flds, new_name, lookup_field(lineno, old_name))
				572
				573
				574	def infer_argument_set(flds):
				575	global arguments
				576
				577	for arg in arguments.values():
				578	if eq_fields_for_args(flds, arg.fields):
				579	return arg
				580
				581	name = str(len(arguments))
				582	arg = Arguments(name, flds.keys())
				583	arguments[name] = arg
				584	return arg
				585
				586
				587	def infer_format(arg, fieldmask, flds):
				588	global arguments
				589	global formats
				590
				591	const_flds = {}
				592	var_flds = {}
				593	for n, c in flds.items():
				594	if c is ConstField:
				595	const_flds[n] = c
				596	else:
				597	var_flds[n] = c
				598
				599	# Look for an existing format with the same argument set and fields
				600	for fmt in formats.values():
				601	if arg and fmt.base != arg:
				602	continue
				603	if fieldmask != fmt.fieldmask:
				604	continue
				605	if not eq_fields_for_fmts(flds, fmt.fields):
				606	continue
				607	return (fmt, const_flds)
				608
				609	name = 'Fmt_' + str(len(formats))
				610	if not arg:
				611	arg = infer_argument_set(flds)
				612
				613	fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds)
				614	formats[name] = fmt
				615
				616	return (fmt, const_flds)
				617	# end infer_format
				618
				619
				620	def parse_generic(lineno, is_format, name, toks):
				621	"""Parse one instruction format from TOKS at LINENO"""
				622	global fields
				623	global arguments
				624	global formats
				625	global patterns
				626	global re_ident
				627	global insnwidth
				628	global insnmask
				629
				630	fixedmask = 0
				631	fixedbits = 0
				632	undefmask = 0
				633	width = 0
				634	flds = {}
				635	arg = None
				636	fmt = None
				637	for t in toks:
				638	# '&Foo' gives a format an explcit argument set.
				639	if t[0] == '&':
				640	tt = t[1:]
				641	if arg:
				642	error(lineno, 'multiple argument sets')
				643	if tt in arguments:
				644	arg = arguments[tt]
				645	else:
				646	error(lineno, 'undefined argument set', t)
				647	continue
				648
				649	# '@Foo' gives a pattern an explicit format.
				650	if t[0] == '@':
				651	tt = t[1:]
				652	if fmt:
				653	error(lineno, 'multiple formats')
				654	if tt in formats:
				655	fmt = formats[tt]
				656	else:
				657	error(lineno, 'undefined format', t)
				658	continue
				659
				660	# '%Foo' imports a field.
				661	if t[0] == '%':
				662	tt = t[1:]
				663	flds = add_field_byname(lineno, flds, tt, tt)
				664	continue
				665
				666	# 'Foo=%Bar' imports a field with a different name.
				667	if re_fullmatch(re_ident + '=%' + re_ident, t):
				668	(fname, iname) = t.split('=%')
				669	flds = add_field_byname(lineno, flds, fname, iname)
				670	continue
				671
				672	# 'Foo=number' sets an argument field to a constant value
				673	if re_fullmatch(re_ident + '=[0-9]+', t):
				674	(fname, value) = t.split('=')
				675	value = int(value)
				676	flds = add_field(lineno, flds, fname, ConstField(value))
				677	continue
				678
				679	# Pattern of 0s, 1s, dots and dashes indicate required zeros,
				680	# required ones, or dont-cares.
				681	if re_fullmatch('[01.-]+', t):
				682	shift = len(t)
				683	fms = t.replace('0', '1')
				684	fms = fms.replace('.', '0')
				685	fms = fms.replace('-', '0')
				686	fbs = t.replace('.', '0')
				687	fbs = fbs.replace('-', '0')
				688	ubm = t.replace('1', '0')
				689	ubm = ubm.replace('.', '0')
				690	ubm = ubm.replace('-', '1')
				691	fms = int(fms, 2)
				692	fbs = int(fbs, 2)
				693	ubm = int(ubm, 2)
				694	fixedbits = (fixedbits << shift) \| fbs
				695	fixedmask = (fixedmask << shift) \| fms
				696	undefmask = (undefmask << shift) \| ubm
				697	# Otherwise, fieldname:fieldwidth
				698	elif re_fullmatch(re_ident + ':s?[0-9]+', t):
				699	(fname, flen) = t.split(':')
				700	sign = False
				701	if flen[0] == 's':
				702	sign = True
				703	flen = flen[1:]
				704	shift = int(flen, 10)
				705	f = Field(sign, insnwidth - width - shift, shift)
				706	flds = add_field(lineno, flds, fname, f)
				707	fixedbits <<= shift
				708	fixedmask <<= shift
				709	undefmask <<= shift
				710	else:
				711	error(lineno, 'invalid token "{0}"'.format(t))
				712	width += shift
				713
				714	# We should have filled in all of the bits of the instruction.
				715	if not (is_format and width == 0) and width != insnwidth:
				716	error(lineno, 'definition has {0} bits'.format(width))
				717
				718	# Do not check for fields overlaping fields; one valid usage
				719	# is to be able to duplicate fields via import.
				720	fieldmask = 0
				721	for f in flds.values():
				722	fieldmask \|= f.mask
				723
				724	# Fix up what we've parsed to match either a format or a pattern.
				725	if is_format:
				726	# Formats cannot reference formats.
				727	if fmt:
				728	error(lineno, 'format referencing format')
				729	# If an argument set is given, then there should be no fields
				730	# without a place to store it.
				731	if arg:
				732	for f in flds.keys():
				733	if f not in arg.fields:
				734	error(lineno, 'field {0} not in argument set {1}'
				735	.format(f, arg.name))
				736	else:
				737	arg = infer_argument_set(flds)
				738	if name in formats:
				739	error(lineno, 'duplicate format name', name)
				740	fmt = Format(name, lineno, arg, fixedbits, fixedmask,
				741	undefmask, fieldmask, flds)
				742	formats[name] = fmt
				743	else:
				744	# Patterns can reference a format ...
				745	if fmt:
				746	# ... but not an argument simultaneously
				747	if arg:
				748	error(lineno, 'pattern specifies both format and argument set')
				749	if fixedmask & fmt.fixedmask:
				750	error(lineno, 'pattern fixed bits overlap format fixed bits')
				751	fieldmask \|= fmt.fieldmask
				752	fixedbits \|= fmt.fixedbits
				753	fixedmask \|= fmt.fixedmask
				754	undefmask \|= fmt.undefmask
				755	else:
				756	(fmt, flds) = infer_format(arg, fieldmask, flds)
				757	arg = fmt.base
				758	for f in flds.keys():
				759	if f not in arg.fields:
				760	error(lineno, 'field {0} not in argument set {1}'
				761	.format(f, arg.name))
				762	if f in fmt.fields.keys():
				763	error(lineno, 'field {0} set by format and pattern'.format(f))
				764	for f in arg.fields:
				765	if f not in flds.keys() and f not in fmt.fields.keys():
				766	error(lineno, 'field {0} not initialized'.format(f))
				767	pat = Pattern(name, lineno, fmt, fixedbits, fixedmask,
				768	undefmask, fieldmask, flds)
				769	patterns.append(pat)
				770
				771	# Validate the masks that we have assembled.
				772	if fieldmask & fixedmask:
				773	error(lineno, 'fieldmask overlaps fixedmask (0x{0:08x} & 0x{1:08x})'
				774	.format(fieldmask, fixedmask))
				775	if fieldmask & undefmask:
				776	error(lineno, 'fieldmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
				777	.format(fieldmask, undefmask))
				778	if fixedmask & undefmask:
				779	error(lineno, 'fixedmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
				780	.format(fixedmask, undefmask))
				781	if not is_format:
				782	allbits = fieldmask \| fixedmask \| undefmask
				783	if allbits != insnmask:
				784	error(lineno, 'bits left unspecified (0x{0:08x})'
				785	.format(allbits ^ insnmask))
				786	# end parse_general
				787
				788
				789	def parse_file(f):
				790	"""Parse all of the patterns within a file"""
				791
				792	# Read all of the lines of the file. Concatenate lines
				793	# ending in backslash; discard empty lines and comments.
				794	toks = []
				795	lineno = 0
				796	for line in f:
				797	lineno += 1
				798
				799	# Discard comments
				800	end = line.find('#')
				801	if end >= 0:
				802	line = line[:end]
				803
				804	t = line.split()
				805	if len(toks) != 0:
				806	# Next line after continuation
				807	toks.extend(t)
				808	elif len(t) == 0:
				809	# Empty line
				810	continue
				811	else:
				812	toks = t
				813
				814	# Continuation?
				815	if toks[-1] == '\\':
				816	toks.pop()
				817	continue
				818
				819	if len(toks) < 2:
				820	error(lineno, 'short line')
				821
				822	name = toks[0]
				823	del toks[0]
				824
				825	# Determine the type of object needing to be parsed.
				826	if name[0] == '%':
				827	parse_field(lineno, name[1:], toks)
				828	elif name[0] == '&':
				829	parse_arguments(lineno, name[1:], toks)
				830	elif name[0] == '@':
				831	parse_generic(lineno, True, name[1:], toks)
				832	else:
				833	parse_generic(lineno, False, name, toks)
				834	toks = []
				835	# end parse_file
				836
				837
				838	class Tree:
				839	"""Class representing a node in a decode tree"""
				840
				841	def __init__(self, fm, tm):
				842	self.fixedmask = fm
				843	self.thismask = tm
				844	self.subs = []
				845	self.base = None
				846
				847	def str1(self, i):
				848	ind = str_indent(i)
				849	r = '{0}{1:08x}'.format(ind, self.fixedmask)
				850	if self.format:
				851	r += ' ' + self.format.name
				852	r += ' [\n'
				853	for (b, s) in self.subs:
				854	r += '{0} {1:08x}:\n'.format(ind, b)
				855	r += s.str1(i + 4) + '\n'
				856	r += ind + ']'
				857	return r
				858
				859	def __str__(self):
				860	return self.str1(0)
				861
				862	def output_code(self, i, extracted, outerbits, outermask):
				863	ind = str_indent(i)
				864
				865	# If we identified all nodes below have the same format,
				866	# extract the fields now.
				867	if not extracted and self.base:
				868	output(ind, self.base.extract_name(),
				869	'(&u.f_', self.base.base.name, ', insn);\n')
				870	extracted = True
				871
				872	# Attempt to aid the compiler in producing compact switch statements.
				873	# If the bits in the mask are contiguous, extract them.
				874	sh = is_contiguous(self.thismask)
				875	if sh > 0:
				876	# Propagate SH down into the local functions.
				877	def str_switch(b, sh=sh):
				878	return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh)
				879
				880	def str_case(b, sh=sh):
				881	return '0x{0:x}'.format(b >> sh)
				882	else:
				883	def str_switch(b):
				884	return 'insn & 0x{0:08x}'.format(b)
				885
				886	def str_case(b):
				887	return '0x{0:08x}'.format(b)
				888
				889	output(ind, 'switch (', str_switch(self.thismask), ') {\n')
				890	for b, s in sorted(self.subs):
				891	assert (self.thismask & ~s.fixedmask) == 0
				892	innermask = outermask \| self.thismask
				893	innerbits = outerbits \| b
				894	output(ind, 'case ', str_case(b), ':\n')
				895	output(ind, ' /* ',
				896	str_match_bits(innerbits, innermask), ' */\n')
				897	s.output_code(i + 4, extracted, innerbits, innermask)
				898	output(ind, '}\n')
				899	output(ind, 'return false;\n')
				900	# end Tree
				901
				902
				903	def build_tree(pats, outerbits, outermask):
				904	# Find the intersection of all remaining fixedmask.
				905	innermask = ~outermask
				906	for i in pats:
				907	innermask &= i.fixedmask
				908
				909	if innermask == 0:
				910	pnames = []
				911	for p in pats:
				912	pnames.append(p.name + ':' + str(p.lineno))
				913	error(pats[0].lineno, 'overlapping patterns:', pnames)
				914
				915	fullmask = outermask \| innermask
				916
				917	# Sort each element of pats into the bin selected by the mask.
				918	bins = {}
				919	for i in pats:
				920	fb = i.fixedbits & innermask
				921	if fb in bins:
				922	bins[fb].append(i)
				923	else:
				924	bins[fb] = [i]
				925
				926	# We must recurse if any bin has more than one element or if
				927	# the single element in the bin has not been fully matched.
				928	t = Tree(fullmask, innermask)
				929
				930	for b, l in bins.items():
				931	s = l[0]
				932	if len(l) > 1 or s.fixedmask & ~fullmask != 0:
				933	s = build_tree(l, b \| outerbits, fullmask)
				934	t.subs.append((b, s))
				935
				936	return t
				937	# end build_tree
				938
				939
				940	def prop_format(tree):
				941	"""Propagate Format objects into the decode tree"""
				942
				943	# Depth first search.
				944	for (b, s) in tree.subs:
				945	if isinstance(s, Tree):
				946	prop_format(s)
				947
				948	# If all entries in SUBS have the same format, then
				949	# propagate that into the tree.
				950	f = None
				951	for (b, s) in tree.subs:
				952	if f is None:
				953	f = s.base
				954	if f is None:
				955	return
				956	if f is not s.base:
				957	return
				958	tree.base = f
				959	# end prop_format
				960
				961
				962	def main():
				963	global arguments
				964	global formats
				965	global patterns
				966	global translate_scope
				967	global translate_prefix
				968	global output_fd
				969	global output_file
				970	global input_file
				971	global insnwidth
				972	global insntype
Bastian Koppelmann	83d7c40	2018-03-19 12:58:46 +0100	[diff] [blame]	973	global insnmask
Richard Henderson	568ae7e	2017-12-07 12:44:09 -0800	[diff] [blame]	974
				975	decode_function = 'decode'
				976	decode_scope = 'static '
				977
				978	long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=']
				979	try:
				980	(opts, args) = getopt.getopt(sys.argv[1:], 'o:w:', long_opts)
				981	except getopt.GetoptError as err:
				982	error(0, err)
				983	for o, a in opts:
				984	if o in ('-o', '--output'):
				985	output_file = a
				986	elif o == '--decode':
				987	decode_function = a
				988	decode_scope = ''
				989	elif o == '--translate':
				990	translate_prefix = a
				991	translate_scope = ''
				992	elif o in ('-w', '--insnwidth'):
				993	insnwidth = int(a)
				994	if insnwidth == 16:
				995	insntype = 'uint16_t'
				996	insnmask = 0xffff
				997	elif insnwidth != 32:
				998	error(0, 'cannot handle insns of width', insnwidth)
				999	else:
				1000	assert False, 'unhandled option'
				1001
				1002	if len(args) < 1:
				1003	error(0, 'missing input file')
				1004	input_file = args[0]
				1005	f = open(input_file, 'r')
				1006	parse_file(f)
				1007	f.close()
				1008
				1009	t = build_tree(patterns, 0, 0)
				1010	prop_format(t)
				1011
				1012	if output_file:
				1013	output_fd = open(output_file, 'w')
				1014	else:
				1015	output_fd = sys.stdout
				1016
				1017	output_autogen()
				1018	for n in sorted(arguments.keys()):
				1019	f = arguments[n]
				1020	f.output_def()
				1021
				1022	# A single translate function can be invoked for different patterns.
				1023	# Make sure that the argument sets are the same, and declare the
				1024	# function only once.
				1025	out_pats = {}
				1026	for i in patterns:
				1027	if i.name in out_pats:
				1028	p = out_pats[i.name]
				1029	if i.base.base != p.base.base:
				1030	error(0, i.name, ' has conflicting argument sets')
				1031	else:
				1032	i.output_decl()
				1033	out_pats[i.name] = i
				1034	output('\n')
				1035
				1036	for n in sorted(formats.keys()):
				1037	f = formats[n]
				1038	f.output_extract()
				1039
				1040	output(decode_scope, 'bool ', decode_function,
				1041	'(DisasContext *ctx, ', insntype, ' insn)\n{\n')
				1042
				1043	i4 = str_indent(4)
				1044	output(i4, 'union {\n')
				1045	for n in sorted(arguments.keys()):
				1046	f = arguments[n]
				1047	output(i4, i4, f.struct_name(), ' f_', f.name, ';\n')
				1048	output(i4, '} u;\n\n')
				1049
				1050	t.output_code(4, False, 0, 0)
				1051
				1052	output('}\n')
				1053
				1054	if output_file:
				1055	output_fd.close()
				1056	# end main
				1057
				1058
				1059	if __name__ == '__main__':
				1060	main()