Blame - tools/parser.py - android.googlesource.com/platform/external/minijail

blob: c04944fecf9fd97e56bfbb8bb6e3481b62753338 [file] [log] [blame]

Luis Hector Chavez	d4ce449	2018-12-04 20:00:32 -0800	[diff] [blame]	1	#!/usr/bin/env python3
				2	# -- coding: utf-8 --
				3	#
				4	# Copyright (C) 2018 The Android Open Source Project
				5	#
				6	# Licensed under the Apache License, Version 2.0 (the "License");
				7	# you may not use this file except in compliance with the License.
				8	# You may obtain a copy of the License at
				9	#
				10	# http://www.apache.org/licenses/LICENSE-2.0
				11	#
				12	# Unless required by applicable law or agreed to in writing, software
				13	# distributed under the License is distributed on an "AS IS" BASIS,
				14	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				15	# See the License for the specific language governing permissions and
				16	# limitations under the License.
				17	"""A parser for the Minijail policy file."""
				18
				19	from __future__ import absolute_import
				20	from __future__ import division
				21	from __future__ import print_function
				22
				23	import collections
Luis Hector Chavez	5dfe919	2018-12-04 22:38:43 -0800	[diff] [blame]	24	import itertools
				25	import os.path
Luis Hector Chavez	d4ce449	2018-12-04 20:00:32 -0800	[diff] [blame]	26	import re
				27
Luis Hector Chavez	1ac9eca	2018-12-04 21:28:52 -0800	[diff] [blame]	28	import bpf
				29
Luis Hector Chavez	d4ce449	2018-12-04 20:00:32 -0800	[diff] [blame]	30	Token = collections.namedtuple('token',
				31	['type', 'value', 'filename', 'line', 'column'])
				32
				33	# A regex that can tokenize a Minijail policy file line.
				34	_TOKEN_SPECIFICATION = (
				35	('COMMENT', r'#.*$'),
				36	('WHITESPACE', r'\s+'),
				37	('INCLUDE', r'@include'),
Luis Hector Chavez	b440886	2018-12-05 16:54:16 -0800	[diff] [blame^]	38	('FREQUENCY', r'@frequency'),
Luis Hector Chavez	d4ce449	2018-12-04 20:00:32 -0800	[diff] [blame]	39	('PATH', r'(?:\.)?/\S+'),
				40	('NUMERIC_CONSTANT', r'-?0[xX][0-9a-fA-F]+\|-?0[Oo][0-7]+\|-?[0-9]+'),
				41	('COLON', r':'),
				42	('SEMICOLON', r';'),
				43	('COMMA', r','),
				44	('BITWISE_COMPLEMENT', r'~'),
				45	('LPAREN', r'\('),
				46	('RPAREN', r'\)'),
				47	('LBRACE', r'\{'),
				48	('RBRACE', r'\}'),
				49	('RBRACKET', r'\]'),
				50	('LBRACKET', r'\['),
				51	('OR', r'\\|\\|'),
				52	('AND', r'&&'),
				53	('BITWISE_OR', r'\\|'),
				54	('OP', r'&\|in\|==\|!=\|<=\|<\|>=\|>'),
				55	('EQUAL', r'='),
				56	('ARGUMENT', r'arg[0-9]+'),
				57	('RETURN', r'return'),
				58	('ACTION', r'allow\|kill-process\|kill-thread\|kill\|trap\|trace\|log'),
				59	('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9@]*'),
				60	)
				61	_TOKEN_RE = re.compile('\|'.join(
				62	r'(?P<%s>%s)' % pair for pair in _TOKEN_SPECIFICATION))
				63
				64
				65	class ParseException(Exception):
				66	"""An exception that is raised when parsing fails."""
				67
				68	# pylint: disable=too-many-arguments
				69	def __init__(self, message, filename, line, line_number=1, token=None):
				70	if token:
				71	column = token.column
				72	length = len(token.value)
				73	else:
				74	column = len(line)
				75	length = 1
				76
				77	message = ('%s(%d:%d): %s') % (filename, line_number, column + 1,
				78	message)
				79	message += '\n %s' % line
				80	message += '\n %s%s' % (' ' * column, '^' * length)
				81	super().__init__(message)
				82
				83
				84	class ParserState:
				85	"""Stores the state of the Parser to provide better diagnostics."""
				86
				87	def __init__(self, filename):
				88	self._filename = filename
				89	self._line = ''
				90	self._line_number = 0
				91
				92	@property
				93	def filename(self):
				94	"""Return the name of the file being processed."""
				95	return self._filename
				96
				97	@property
				98	def line(self):
				99	"""Return the current line being processed."""
				100	return self._line
				101
				102	@property
				103	def line_number(self):
				104	"""Return the current line number being processed."""
				105	return self._line_number
				106
				107	def set_line(self, line):
				108	"""Update the current line being processed."""
				109	self._line = line
				110	self._line_number += 1
				111
				112	def error(self, message, token=None):
				113	"""Raise a ParserException with the provided message."""
				114	raise ParseException(message, self.filename, self.line,
				115	self.line_number, token)
				116
				117	def tokenize(self):
				118	"""Return a list of tokens for the current line."""
				119	tokens = []
				120
				121	last_end = 0
				122	for token in _TOKEN_RE.finditer(self.line):
				123	if token.start() != last_end:
				124	self.error(
				125	'invalid token',
				126	token=Token('INVALID', self.line[last_end:token.start()],
				127	self.filename, self.line_number, last_end))
				128	last_end = token.end()
				129
				130	# Omit whitespace and comments now to avoid sprinkling this logic
				131	# elsewhere.
				132	if token.lastgroup in ('WHITESPACE', 'COMMENT'):
				133	continue
				134	tokens.append(
				135	Token(token.lastgroup, token.group(), self.filename,
				136	self.line_number, token.start()))
				137	if last_end != len(self.line):
				138	self.error(
				139	'invalid token',
				140	token=Token('INVALID', self.line[last_end:], self.filename,
				141	self.line_number, last_end))
				142	return tokens
				143
				144
Luis Hector Chavez	0516e18	2018-12-04 20:36:00 -0800	[diff] [blame]	145	Atom = collections.namedtuple('Atom', ['argument_index', 'op', 'value'])
				146	"""A single boolean comparison within a filter expression."""
				147
Luis Hector Chavez	1ac9eca	2018-12-04 21:28:52 -0800	[diff] [blame]	148	Filter = collections.namedtuple('Filter', ['expression', 'action'])
				149	"""The result of parsing a DNF filter expression, with its action.
				150
				151	Since the expression is in Disjunctive Normal Form, it is composed of two levels
				152	of lists, one for disjunctions and the inner one for conjunctions. The elements
				153	of the inner list are Atoms.
				154	"""
				155
Luis Hector Chavez	75406c2	2018-12-04 21:57:06 -0800	[diff] [blame]	156	Syscall = collections.namedtuple('Syscall', ['name', 'number'])
				157	"""A system call."""
				158
				159	ParsedFilterStatement = collections.namedtuple('ParsedFilterStatement',
				160	['syscalls', 'filters'])
				161	"""The result of parsing a filter statement.
				162
				163	Statements have a list of syscalls, and an associated list of filters that will
				164	be evaluated sequentially when any of the syscalls is invoked.
				165	"""
				166
Luis Hector Chavez	5dfe919	2018-12-04 22:38:43 -0800	[diff] [blame]	167	FilterStatement = collections.namedtuple('FilterStatement',
				168	['syscall', 'frequency', 'filters'])
				169	"""The filter list for a particular syscall.
				170
				171	This is a mapping from one syscall to a list of filters that are evaluated
				172	sequentially. The last filter is always an unconditional action.
				173	"""
				174
				175	ParsedPolicy = collections.namedtuple('ParsedPolicy',
				176	['default_action', 'filter_statements'])
				177	"""The result of parsing a minijail .policy file."""
				178
Luis Hector Chavez	0516e18	2018-12-04 20:36:00 -0800	[diff] [blame]	179
Luis Hector Chavez	d4ce449	2018-12-04 20:00:32 -0800	[diff] [blame]	180	# pylint: disable=too-few-public-methods
				181	class PolicyParser:
				182	"""A parser for the Minijail seccomp policy file format."""
				183
Luis Hector Chavez	5dfe919	2018-12-04 22:38:43 -0800	[diff] [blame]	184	def __init__(self, arch, *, kill_action, include_depth_limit=10):
Luis Hector Chavez	d4ce449	2018-12-04 20:00:32 -0800	[diff] [blame]	185	self._parser_states = [ParserState("<memory>")]
Luis Hector Chavez	1ac9eca	2018-12-04 21:28:52 -0800	[diff] [blame]	186	self._kill_action = kill_action
Luis Hector Chavez	5dfe919	2018-12-04 22:38:43 -0800	[diff] [blame]	187	self._include_depth_limit = include_depth_limit
				188	self._default_action = self._kill_action
Luis Hector Chavez	b440886	2018-12-05 16:54:16 -0800	[diff] [blame^]	189	self._frequency_mapping = collections.defaultdict(int)
Luis Hector Chavez	d4ce449	2018-12-04 20:00:32 -0800	[diff] [blame]	190	self._arch = arch
				191
				192	@property
				193	def _parser_state(self):
				194	return self._parser_states[-1]
				195
				196	# single-constant = identifier
				197	# \| numeric-constant
				198	# ;
				199	def _parse_single_constant(self, token):
				200	if token.type == 'IDENTIFIER':
				201	if token.value not in self._arch.constants:
				202	self._parser_state.error('invalid constant', token=token)
				203	single_constant = self._arch.constants[token.value]
				204	elif token.type == 'NUMERIC_CONSTANT':
				205	try:
				206	single_constant = int(token.value, base=0)
				207	except ValueError:
				208	self._parser_state.error('invalid constant', token=token)
				209	else:
				210	self._parser_state.error('invalid constant', token=token)
				211	if single_constant > self._arch.max_unsigned:
				212	self._parser_state.error('unsigned overflow', token=token)
				213	elif single_constant < self._arch.min_signed:
				214	self._parser_state.error('signed underflow', token=token)
				215	elif single_constant < 0:
				216	# This converts the constant to an unsigned representation of the
				217	# same value, since BPF only uses unsigned values.
				218	single_constant = self._arch.truncate_word(single_constant)
				219	return single_constant
				220
				221	# constant = [ '~' ] , '(' , value , ')'
				222	# \| [ '~' ] , single-constant
				223	# ;
				224	def _parse_constant(self, tokens):
				225	negate = False
				226	if tokens[0].type == 'BITWISE_COMPLEMENT':
				227	negate = True
				228	tokens.pop(0)
				229	if not tokens:
				230	self._parser_state.error('empty complement')
				231	if tokens[0].type == 'BITWISE_COMPLEMENT':
				232	self._parser_state.error(
				233	'invalid double complement', token=tokens[0])
				234	if tokens[0].type == 'LPAREN':
				235	last_open_paren = tokens.pop(0)
				236	single_value = self.parse_value(tokens)
				237	if not tokens or tokens[0].type != 'RPAREN':
				238	self._parser_state.error(
				239	'unclosed parenthesis', token=last_open_paren)
				240	else:
				241	single_value = self._parse_single_constant(tokens[0])
				242	tokens.pop(0)
				243	if negate:
				244	single_value = self._arch.truncate_word(~single_value)
				245	return single_value
				246
				247	# value = constant , [ { '\|' , constant } ]
				248	# ;
				249	def parse_value(self, tokens):
				250	"""Parse constants separated bitwise OR operator \|.
				251
				252	Constants can be:
				253
				254	- A number that can be parsed with int(..., base=0)
				255	- A named constant expression.
				256	- A parenthesized, valid constant expression.
				257	- A valid constant expression prefixed with the unary bitwise
				258	complement operator ~.
				259	- A series of valid constant expressions separated by bitwise
				260	OR operator \|.
				261
				262	If there is an error parsing any of the constants, the whole process
				263	fails.
				264	"""
				265
				266	value = 0
				267	while tokens:
				268	value \|= self._parse_constant(tokens)
				269	if not tokens or tokens[0].type != 'BITWISE_OR':
				270	break
				271	tokens.pop(0)
				272	else:
				273	self._parser_state.error('empty constant')
				274	return value
Luis Hector Chavez	0516e18	2018-12-04 20:36:00 -0800	[diff] [blame]	275
				276	# atom = argument , op , value
				277	# ;
				278	def _parse_atom(self, tokens):
				279	if not tokens:
				280	self._parser_state.error('missing argument')
				281	argument = tokens.pop(0)
				282	if argument.type != 'ARGUMENT':
				283	self._parser_state.error('invalid argument', token=argument)
				284
				285	if not tokens:
				286	self._parser_state.error('missing operator')
				287	operator = tokens.pop(0)
				288	if operator.type != 'OP':
				289	self._parser_state.error('invalid operator', token=operator)
				290
				291	value = self.parse_value(tokens)
Luis Hector Chavez	1ac9eca	2018-12-04 21:28:52 -0800	[diff] [blame]	292	argument_index = int(argument.value[3:])
				293	if not (0 <= argument_index < bpf.MAX_SYSCALL_ARGUMENTS):
				294	self._parser_state.error('invalid argument', token=argument)
				295	return Atom(argument_index, operator.value, value)
Luis Hector Chavez	0516e18	2018-12-04 20:36:00 -0800	[diff] [blame]	296
				297	# clause = atom , [ { '&&' , atom } ]
				298	# ;
				299	def _parse_clause(self, tokens):
				300	atoms = []
				301	while tokens:
				302	atoms.append(self._parse_atom(tokens))
				303	if not tokens or tokens[0].type != 'AND':
				304	break
				305	tokens.pop(0)
				306	else:
				307	self._parser_state.error('empty clause')
				308	return atoms
				309
Luis Hector Chavez	1ac9eca	2018-12-04 21:28:52 -0800	[diff] [blame]	310	# argument-expression = clause , [ { '\|\|' , clause } ]
Luis Hector Chavez	0516e18	2018-12-04 20:36:00 -0800	[diff] [blame]	311	# ;
Luis Hector Chavez	1ac9eca	2018-12-04 21:28:52 -0800	[diff] [blame]	312	def parse_argument_expression(self, tokens):
				313	"""Parse a argument expression in Disjunctive Normal Form.
Luis Hector Chavez	0516e18	2018-12-04 20:36:00 -0800	[diff] [blame]	314
				315	Since BPF disallows back jumps, we build the basic blocks in reverse
				316	order so that all the jump targets are known by the time we need to
				317	reference them.
				318	"""
				319
				320	clauses = []
				321	while tokens:
				322	clauses.append(self._parse_clause(tokens))
				323	if not tokens or tokens[0].type != 'OR':
				324	break
				325	tokens.pop(0)
				326	else:
Luis Hector Chavez	1ac9eca	2018-12-04 21:28:52 -0800	[diff] [blame]	327	self._parser_state.error('empty argument expression')
Luis Hector Chavez	0516e18	2018-12-04 20:36:00 -0800	[diff] [blame]	328	return clauses
Luis Hector Chavez	1ac9eca	2018-12-04 21:28:52 -0800	[diff] [blame]	329
				330	# action = 'allow' \| '1'
				331	# \| 'kill-process'
				332	# \| 'kill-thread'
				333	# \| 'kill'
				334	# \| 'trap'
				335	# \| 'trace'
				336	# \| 'log'
				337	# \| 'return' , single-constant
				338	# ;
				339	def _parse_action(self, tokens):
				340	if not tokens:
				341	self._parser_state.error('missing action')
				342	action_token = tokens.pop(0)
				343	if action_token.type == 'ACTION':
				344	if action_token.value == 'allow':
				345	return bpf.Allow()
				346	if action_token.value == 'kill':
				347	return self._kill_action
				348	if action_token.value == 'kill-process':
				349	return bpf.KillProcess()
				350	if action_token.value == 'kill-thread':
				351	return bpf.KillThread()
				352	if action_token.value == 'trap':
				353	return bpf.Trap()
				354	if action_token.value == 'trace':
				355	return bpf.Trace()
				356	if action_token.value == 'log':
				357	return bpf.Log()
				358	elif action_token.type == 'NUMERIC_CONSTANT':
				359	constant = self._parse_single_constant(action_token)
				360	if constant == 1:
				361	return bpf.Allow()
				362	elif action_token.type == 'RETURN':
				363	if not tokens:
				364	self._parser_state.error('missing return value')
				365	return bpf.ReturnErrno(self._parse_single_constant(tokens.pop(0)))
				366	return self._parser_state.error('invalid action', token=action_token)
				367
				368	# single-filter = action
				369	# \| argument-expression , [ ';' , action ]
				370	# ;
				371	def _parse_single_filter(self, tokens):
				372	if not tokens:
				373	self._parser_state.error('missing filter')
				374	if tokens[0].type == 'ARGUMENT':
				375	# Only argument expressions can start with an ARGUMENT token.
				376	argument_expression = self.parse_argument_expression(tokens)
				377	if tokens and tokens[0].type == 'SEMICOLON':
				378	tokens.pop(0)
				379	action = self._parse_action(tokens)
				380	else:
				381	action = bpf.Allow()
				382	return Filter(argument_expression, action)
				383	else:
				384	return Filter(None, self._parse_action(tokens))
				385
				386	# filter = '{' , single-filter , [ { ',' , single-filter } ] , '}'
				387	# \| single-filter
				388	# ;
				389	def parse_filter(self, tokens):
				390	"""Parse a filter and return a list of Filter objects."""
				391	if not tokens:
				392	self._parser_state.error('missing filter')
				393	filters = []
				394	if tokens[0].type == 'LBRACE':
				395	opening_brace = tokens.pop(0)
				396	while tokens:
				397	filters.append(self._parse_single_filter(tokens))
				398	if not tokens or tokens[0].type != 'COMMA':
				399	break
				400	tokens.pop(0)
				401	if not tokens or tokens[0].type != 'RBRACE':
				402	self._parser_state.error('unclosed brace', token=opening_brace)
				403	tokens.pop(0)
				404	else:
				405	filters.append(self._parse_single_filter(tokens))
				406	return filters
Luis Hector Chavez	75406c2	2018-12-04 21:57:06 -0800	[diff] [blame]	407
Luis Hector Chavez	571e958	2018-12-05 09:44:00 -0800	[diff] [blame]	408	# key-value-pair = identifier , '=', identifier , [ { ',' , identifier } ]
				409	# ;
				410	def _parse_key_value_pair(self, tokens):
				411	if not tokens:
				412	self._parser_state.error('missing key')
				413	key = tokens.pop(0)
				414	if key.type != 'IDENTIFIER':
				415	self._parser_state.error('invalid key', token=key)
				416	if not tokens:
				417	self._parser_state.error('missing equal')
				418	if tokens[0].type != 'EQUAL':
				419	self._parser_state.error('invalid equal', token=tokens[0])
				420	tokens.pop(0)
				421	value_list = []
				422	while tokens:
				423	value = tokens.pop(0)
				424	if value.type != 'IDENTIFIER':
				425	self._parser_state.error('invalid value', token=value)
				426	value_list.append(value.value)
				427	if not tokens or tokens[0].type != 'COMMA':
				428	break
				429	tokens.pop(0)
				430	else:
				431	self._parser_state.error('empty value')
				432	return (key.value, value_list)
				433
				434	# metadata = '[' , key-value-pair , [ { ';' , key-value-pair } ] , ']'
				435	# ;
				436	def _parse_metadata(self, tokens):
				437	if not tokens:
				438	self._parser_state.error('missing opening bracket')
				439	opening_bracket = tokens.pop(0)
				440	if opening_bracket.type != 'LBRACKET':
				441	self._parser_state.error(
				442	'invalid opening bracket', token=opening_bracket)
				443	metadata = {}
				444	while tokens:
				445	first_token = tokens[0]
				446	key, value = self._parse_key_value_pair(tokens)
				447	if key in metadata:
				448	self._parser_state.error(
				449	'duplicate metadata key: "%s"' % key, token=first_token)
				450	metadata[key] = value
				451	if not tokens or tokens[0].type != 'SEMICOLON':
				452	break
				453	tokens.pop(0)
				454	if not tokens or tokens[0].type != 'RBRACKET':
				455	self._parser_state.error('unclosed bracket', token=opening_bracket)
				456	tokens.pop(0)
				457	return metadata
				458
Luis Hector Chavez	75406c2	2018-12-04 21:57:06 -0800	[diff] [blame]	459	# syscall-descriptor = syscall-name , [ metadata ]
				460	# \| libc-function , [ metadata ]
				461	# ;
				462	def _parse_syscall_descriptor(self, tokens):
				463	if not tokens:
				464	self._parser_state.error('missing syscall descriptor')
				465	syscall_descriptor = tokens.pop(0)
				466	if syscall_descriptor.type != 'IDENTIFIER':
				467	self._parser_state.error(
				468	'invalid syscall descriptor', token=syscall_descriptor)
Luis Hector Chavez	571e958	2018-12-05 09:44:00 -0800	[diff] [blame]	469	# TODO(lhchavez): Support libc function names.
				470	if tokens and tokens[0].type == 'LBRACKET':
				471	metadata = self._parse_metadata(tokens)
				472	if 'arch' in metadata and self._arch.arch_name not in metadata['arch']:
				473	return ()
Luis Hector Chavez	75406c2	2018-12-04 21:57:06 -0800	[diff] [blame]	474	if syscall_descriptor.value not in self._arch.syscalls:
				475	self._parser_state.error(
				476	'nonexistent syscall', token=syscall_descriptor)
Luis Hector Chavez	75406c2	2018-12-04 21:57:06 -0800	[diff] [blame]	477	return (Syscall(syscall_descriptor.value,
				478	self._arch.syscalls[syscall_descriptor.value]), )
				479
				480	# filter-statement = '{' , syscall-descriptor , [ { ',', syscall-descriptor } ] , '}' ,
				481	# ':' , filter
				482	# \| syscall-descriptor , ':' , filter
				483	# ;
				484	def parse_filter_statement(self, tokens):
				485	"""Parse a filter statement and return a ParsedFilterStatement."""
				486	if not tokens:
				487	self._parser_state.error('empty filter statement')
				488	syscall_descriptors = []
				489	if tokens[0].type == 'LBRACE':
				490	opening_brace = tokens.pop(0)
				491	while tokens:
				492	syscall_descriptors.extend(
				493	self._parse_syscall_descriptor(tokens))
				494	if not tokens or tokens[0].type != 'COMMA':
				495	break
				496	tokens.pop(0)
				497	if not tokens or tokens[0].type != 'RBRACE':
				498	self._parser_state.error('unclosed brace', token=opening_brace)
				499	tokens.pop(0)
				500	else:
				501	syscall_descriptors.extend(self._parse_syscall_descriptor(tokens))
				502	if not tokens:
				503	self._parser_state.error('missing colon')
				504	if tokens[0].type != 'COLON':
				505	self._parser_state.error('invalid colon', token=tokens[0])
				506	tokens.pop(0)
				507	parsed_filter = self.parse_filter(tokens)
				508	if not syscall_descriptors:
				509	return None
				510	return ParsedFilterStatement(tuple(syscall_descriptors), parsed_filter)
Luis Hector Chavez	5dfe919	2018-12-04 22:38:43 -0800	[diff] [blame]	511
				512	# include-statement = '@include' , posix-path
				513	# ;
				514	def _parse_include_statement(self, tokens):
				515	if not tokens:
				516	self._parser_state.error('empty filter statement')
				517	if tokens[0].type != 'INCLUDE':
				518	self._parser_state.error('invalid include', token=tokens[0])
				519	tokens.pop(0)
				520	if not tokens:
				521	self._parser_state.error('empty include path')
				522	include_path = tokens.pop(0)
				523	if include_path.type != 'PATH':
				524	self._parser_state.error(
				525	'invalid include path', token=include_path)
				526	if len(self._parser_states) == self._include_depth_limit:
				527	self._parser_state.error('@include statement nested too deep')
				528	include_filename = os.path.normpath(
				529	os.path.join(
				530	os.path.dirname(self._parser_state.filename),
				531	include_path.value))
				532	if not os.path.isfile(include_filename):
				533	self._parser_state.error(
				534	'Could not @include %s' % include_filename, token=include_path)
				535	return self._parse_policy_file(include_filename)
				536
Luis Hector Chavez	b440886	2018-12-05 16:54:16 -0800	[diff] [blame^]	537	def _parse_frequency_file(self, filename):
				538	self._parser_states.append(ParserState(filename))
				539	try:
				540	frequency_mapping = collections.defaultdict(int)
				541	with open(filename) as frequency_file:
				542	for line in frequency_file:
				543	self._parser_state.set_line(line.rstrip())
				544	tokens = self._parser_state.tokenize()
				545
				546	if not tokens:
				547	continue
				548
				549	syscall_numbers = self._parse_syscall_descriptor(tokens)
				550	if not tokens:
				551	self._parser_state.error('missing colon')
				552	if tokens[0].type != 'COLON':
				553	self._parser_state.error(
				554	'invalid colon', token=tokens[0])
				555	tokens.pop(0)
				556
				557	if not tokens:
				558	self._parser_state.error('missing number')
				559	number = tokens.pop(0)
				560	if number.type != 'NUMERIC_CONSTANT':
				561	self._parser_state.error(
				562	'invalid number', token=number)
				563	number_value = int(number.value, base=0)
				564	if number_value < 0:
				565	self._parser_state.error(
				566	'invalid number', token=number)
				567
				568	for syscall_number in syscall_numbers:
				569	frequency_mapping[syscall_number] += number_value
				570	return frequency_mapping
				571	finally:
				572	self._parser_states.pop()
				573
				574	# frequency-statement = '@frequency' , posix-path
				575	# ;
				576	def _parse_frequency_statement(self, tokens):
				577	if not tokens:
				578	self._parser_state.error('empty frequency statement')
				579	if tokens[0].type != 'FREQUENCY':
				580	self._parser_state.error('invalid frequency', token=tokens[0])
				581	tokens.pop(0)
				582	if not tokens:
				583	self._parser_state.error('empty frequency path')
				584	frequency_path = tokens.pop(0)
				585	if frequency_path.type != 'PATH':
				586	self._parser_state.error(
				587	'invalid frequency path', token=frequency_path)
				588	frequency_filename = os.path.normpath(
				589	os.path.join(
				590	os.path.dirname(self._parser_state.filename),
				591	frequency_path.value))
				592	if not os.path.isfile(frequency_filename):
				593	self._parser_state.error(
				594	'Could not open frequency file %s' % frequency_filename,
				595	token=frequency_path)
				596	return self._parse_frequency_file(frequency_filename)
				597
Luis Hector Chavez	5dfe919	2018-12-04 22:38:43 -0800	[diff] [blame]	598	def _parse_policy_file(self, filename):
				599	self._parser_states.append(ParserState(filename))
				600	try:
				601	statements = []
				602	with open(filename) as policy_file:
				603	for line in policy_file:
				604	self._parser_state.set_line(line.rstrip())
				605	tokens = self._parser_state.tokenize()
				606
				607	if not tokens:
				608	# Allow empty lines.
				609	continue
				610
				611	if tokens[0].type == 'INCLUDE':
				612	statements.extend(
				613	self._parse_include_statement(tokens))
Luis Hector Chavez	b440886	2018-12-05 16:54:16 -0800	[diff] [blame^]	614	elif tokens[0].type == 'FREQUENCY':
				615	for syscall_number, frequency in self._parse_frequency_statement(
				616	tokens).items():
				617	self._frequency_mapping[
				618	syscall_number] += frequency
Luis Hector Chavez	5dfe919	2018-12-04 22:38:43 -0800	[diff] [blame]	619	else:
Luis Hector Chavez	571e958	2018-12-05 09:44:00 -0800	[diff] [blame]	620	statement = self.parse_filter_statement(tokens)
				621	if statement is None:
				622	# If all the syscalls in the statement are for
				623	# another arch, skip the whole statement.
				624	continue
				625	statements.append(statement)
Luis Hector Chavez	5dfe919	2018-12-04 22:38:43 -0800	[diff] [blame]	626
				627	if tokens:
				628	self._parser_state.error(
				629	'extra tokens', token=tokens[0])
				630	return statements
				631	finally:
				632	self._parser_states.pop()
				633
				634	def parse_file(self, filename):
				635	"""Parse a file and return the list of FilterStatements."""
Luis Hector Chavez	b440886	2018-12-05 16:54:16 -0800	[diff] [blame^]	636	self._frequency_mapping = collections.defaultdict(int)
Luis Hector Chavez	5dfe919	2018-12-04 22:38:43 -0800	[diff] [blame]	637	try:
				638	statements = [x for x in self._parse_policy_file(filename)]
				639	except RecursionError:
				640	raise ParseException('recursion limit exceeded', filename,
				641	self._parser_states[-1].line)
				642
				643	# Collapse statements into a single syscall-to-filter-list.
				644	syscall_filter_mapping = {}
				645	filter_statements = []
				646	for syscalls, filters in statements:
				647	for syscall in syscalls:
				648	if syscall not in syscall_filter_mapping:
Luis Hector Chavez	b440886	2018-12-05 16:54:16 -0800	[diff] [blame^]	649	filter_statements.append(
				650	FilterStatement(
				651	syscall, self._frequency_mapping.get(syscall, 1),
				652	[]))
Luis Hector Chavez	5dfe919	2018-12-04 22:38:43 -0800	[diff] [blame]	653	syscall_filter_mapping[syscall] = filter_statements[-1]
				654	syscall_filter_mapping[syscall].filters.extend(filters)
				655	for filter_statement in filter_statements:
				656	unconditional_actions_suffix = list(
				657	itertools.dropwhile(lambda filt: filt.expression is not None,
				658	filter_statement.filters))
				659	if len(unconditional_actions_suffix) == 1:
				660	# The last filter already has an unconditional action, no need
				661	# to add another one.
				662	continue
				663	if len(unconditional_actions_suffix) > 1:
				664	raise ParseException(('Syscall %s (number %d) already had '
				665	'an unconditional action applied') %
				666	(filter_statement.syscall.name,
				667	filter_statement.syscall.number),
				668	filename, self._parser_states[-1].line)
				669	assert not unconditional_actions_suffix
				670	filter_statement.filters.append(
				671	Filter(expression=None, action=self._default_action))
				672	return ParsedPolicy(self._default_action, filter_statements)