Blame - tools/parser.py - android.googlesource.com/platform/external/minijail

blob: 99b2e997cc185dcc70e580f3dd5c6e2ad43416f0 [file] [log] [blame]

Luis Hector Chavez	d4ce449	2018-12-04 20:00:32 -0800	[diff] [blame]	1	#!/usr/bin/env python3
				2	# -- coding: utf-8 --
				3	#
				4	# Copyright (C) 2018 The Android Open Source Project
				5	#
				6	# Licensed under the Apache License, Version 2.0 (the "License");
				7	# you may not use this file except in compliance with the License.
				8	# You may obtain a copy of the License at
				9	#
				10	# http://www.apache.org/licenses/LICENSE-2.0
				11	#
				12	# Unless required by applicable law or agreed to in writing, software
				13	# distributed under the License is distributed on an "AS IS" BASIS,
				14	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				15	# See the License for the specific language governing permissions and
				16	# limitations under the License.
				17	"""A parser for the Minijail policy file."""
				18
				19	from __future__ import absolute_import
				20	from __future__ import division
				21	from __future__ import print_function
				22
				23	import collections
Luis Hector Chavez	5dfe919	2018-12-04 22:38:43 -0800	[diff] [blame^]	24	import itertools
				25	import os.path
Luis Hector Chavez	d4ce449	2018-12-04 20:00:32 -0800	[diff] [blame]	26	import re
				27
Luis Hector Chavez	1ac9eca	2018-12-04 21:28:52 -0800	[diff] [blame]	28	import bpf
				29
Luis Hector Chavez	d4ce449	2018-12-04 20:00:32 -0800	[diff] [blame]	30	Token = collections.namedtuple('token',
				31	['type', 'value', 'filename', 'line', 'column'])
				32
				33	# A regex that can tokenize a Minijail policy file line.
				34	_TOKEN_SPECIFICATION = (
				35	('COMMENT', r'#.*$'),
				36	('WHITESPACE', r'\s+'),
				37	('INCLUDE', r'@include'),
				38	('PATH', r'(?:\.)?/\S+'),
				39	('NUMERIC_CONSTANT', r'-?0[xX][0-9a-fA-F]+\|-?0[Oo][0-7]+\|-?[0-9]+'),
				40	('COLON', r':'),
				41	('SEMICOLON', r';'),
				42	('COMMA', r','),
				43	('BITWISE_COMPLEMENT', r'~'),
				44	('LPAREN', r'\('),
				45	('RPAREN', r'\)'),
				46	('LBRACE', r'\{'),
				47	('RBRACE', r'\}'),
				48	('RBRACKET', r'\]'),
				49	('LBRACKET', r'\['),
				50	('OR', r'\\|\\|'),
				51	('AND', r'&&'),
				52	('BITWISE_OR', r'\\|'),
				53	('OP', r'&\|in\|==\|!=\|<=\|<\|>=\|>'),
				54	('EQUAL', r'='),
				55	('ARGUMENT', r'arg[0-9]+'),
				56	('RETURN', r'return'),
				57	('ACTION', r'allow\|kill-process\|kill-thread\|kill\|trap\|trace\|log'),
				58	('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9@]*'),
				59	)
				60	_TOKEN_RE = re.compile('\|'.join(
				61	r'(?P<%s>%s)' % pair for pair in _TOKEN_SPECIFICATION))
				62
				63
				64	class ParseException(Exception):
				65	"""An exception that is raised when parsing fails."""
				66
				67	# pylint: disable=too-many-arguments
				68	def __init__(self, message, filename, line, line_number=1, token=None):
				69	if token:
				70	column = token.column
				71	length = len(token.value)
				72	else:
				73	column = len(line)
				74	length = 1
				75
				76	message = ('%s(%d:%d): %s') % (filename, line_number, column + 1,
				77	message)
				78	message += '\n %s' % line
				79	message += '\n %s%s' % (' ' * column, '^' * length)
				80	super().__init__(message)
				81
				82
				83	class ParserState:
				84	"""Stores the state of the Parser to provide better diagnostics."""
				85
				86	def __init__(self, filename):
				87	self._filename = filename
				88	self._line = ''
				89	self._line_number = 0
				90
				91	@property
				92	def filename(self):
				93	"""Return the name of the file being processed."""
				94	return self._filename
				95
				96	@property
				97	def line(self):
				98	"""Return the current line being processed."""
				99	return self._line
				100
				101	@property
				102	def line_number(self):
				103	"""Return the current line number being processed."""
				104	return self._line_number
				105
				106	def set_line(self, line):
				107	"""Update the current line being processed."""
				108	self._line = line
				109	self._line_number += 1
				110
				111	def error(self, message, token=None):
				112	"""Raise a ParserException with the provided message."""
				113	raise ParseException(message, self.filename, self.line,
				114	self.line_number, token)
				115
				116	def tokenize(self):
				117	"""Return a list of tokens for the current line."""
				118	tokens = []
				119
				120	last_end = 0
				121	for token in _TOKEN_RE.finditer(self.line):
				122	if token.start() != last_end:
				123	self.error(
				124	'invalid token',
				125	token=Token('INVALID', self.line[last_end:token.start()],
				126	self.filename, self.line_number, last_end))
				127	last_end = token.end()
				128
				129	# Omit whitespace and comments now to avoid sprinkling this logic
				130	# elsewhere.
				131	if token.lastgroup in ('WHITESPACE', 'COMMENT'):
				132	continue
				133	tokens.append(
				134	Token(token.lastgroup, token.group(), self.filename,
				135	self.line_number, token.start()))
				136	if last_end != len(self.line):
				137	self.error(
				138	'invalid token',
				139	token=Token('INVALID', self.line[last_end:], self.filename,
				140	self.line_number, last_end))
				141	return tokens
				142
				143
Luis Hector Chavez	0516e18	2018-12-04 20:36:00 -0800	[diff] [blame]	144	Atom = collections.namedtuple('Atom', ['argument_index', 'op', 'value'])
				145	"""A single boolean comparison within a filter expression."""
				146
Luis Hector Chavez	1ac9eca	2018-12-04 21:28:52 -0800	[diff] [blame]	147	Filter = collections.namedtuple('Filter', ['expression', 'action'])
				148	"""The result of parsing a DNF filter expression, with its action.
				149
				150	Since the expression is in Disjunctive Normal Form, it is composed of two levels
				151	of lists, one for disjunctions and the inner one for conjunctions. The elements
				152	of the inner list are Atoms.
				153	"""
				154
Luis Hector Chavez	75406c2	2018-12-04 21:57:06 -0800	[diff] [blame]	155	Syscall = collections.namedtuple('Syscall', ['name', 'number'])
				156	"""A system call."""
				157
				158	ParsedFilterStatement = collections.namedtuple('ParsedFilterStatement',
				159	['syscalls', 'filters'])
				160	"""The result of parsing a filter statement.
				161
				162	Statements have a list of syscalls, and an associated list of filters that will
				163	be evaluated sequentially when any of the syscalls is invoked.
				164	"""
				165
Luis Hector Chavez	5dfe919	2018-12-04 22:38:43 -0800	[diff] [blame^]	166	FilterStatement = collections.namedtuple('FilterStatement',
				167	['syscall', 'frequency', 'filters'])
				168	"""The filter list for a particular syscall.
				169
				170	This is a mapping from one syscall to a list of filters that are evaluated
				171	sequentially. The last filter is always an unconditional action.
				172	"""
				173
				174	ParsedPolicy = collections.namedtuple('ParsedPolicy',
				175	['default_action', 'filter_statements'])
				176	"""The result of parsing a minijail .policy file."""
				177
Luis Hector Chavez	0516e18	2018-12-04 20:36:00 -0800	[diff] [blame]	178
Luis Hector Chavez	d4ce449	2018-12-04 20:00:32 -0800	[diff] [blame]	179	# pylint: disable=too-few-public-methods
				180	class PolicyParser:
				181	"""A parser for the Minijail seccomp policy file format."""
				182
Luis Hector Chavez	5dfe919	2018-12-04 22:38:43 -0800	[diff] [blame^]	183	def __init__(self, arch, *, kill_action, include_depth_limit=10):
Luis Hector Chavez	d4ce449	2018-12-04 20:00:32 -0800	[diff] [blame]	184	self._parser_states = [ParserState("<memory>")]
Luis Hector Chavez	1ac9eca	2018-12-04 21:28:52 -0800	[diff] [blame]	185	self._kill_action = kill_action
Luis Hector Chavez	5dfe919	2018-12-04 22:38:43 -0800	[diff] [blame^]	186	self._include_depth_limit = include_depth_limit
				187	self._default_action = self._kill_action
Luis Hector Chavez	d4ce449	2018-12-04 20:00:32 -0800	[diff] [blame]	188	self._arch = arch
				189
				190	@property
				191	def _parser_state(self):
				192	return self._parser_states[-1]
				193
				194	# single-constant = identifier
				195	# \| numeric-constant
				196	# ;
				197	def _parse_single_constant(self, token):
				198	if token.type == 'IDENTIFIER':
				199	if token.value not in self._arch.constants:
				200	self._parser_state.error('invalid constant', token=token)
				201	single_constant = self._arch.constants[token.value]
				202	elif token.type == 'NUMERIC_CONSTANT':
				203	try:
				204	single_constant = int(token.value, base=0)
				205	except ValueError:
				206	self._parser_state.error('invalid constant', token=token)
				207	else:
				208	self._parser_state.error('invalid constant', token=token)
				209	if single_constant > self._arch.max_unsigned:
				210	self._parser_state.error('unsigned overflow', token=token)
				211	elif single_constant < self._arch.min_signed:
				212	self._parser_state.error('signed underflow', token=token)
				213	elif single_constant < 0:
				214	# This converts the constant to an unsigned representation of the
				215	# same value, since BPF only uses unsigned values.
				216	single_constant = self._arch.truncate_word(single_constant)
				217	return single_constant
				218
				219	# constant = [ '~' ] , '(' , value , ')'
				220	# \| [ '~' ] , single-constant
				221	# ;
				222	def _parse_constant(self, tokens):
				223	negate = False
				224	if tokens[0].type == 'BITWISE_COMPLEMENT':
				225	negate = True
				226	tokens.pop(0)
				227	if not tokens:
				228	self._parser_state.error('empty complement')
				229	if tokens[0].type == 'BITWISE_COMPLEMENT':
				230	self._parser_state.error(
				231	'invalid double complement', token=tokens[0])
				232	if tokens[0].type == 'LPAREN':
				233	last_open_paren = tokens.pop(0)
				234	single_value = self.parse_value(tokens)
				235	if not tokens or tokens[0].type != 'RPAREN':
				236	self._parser_state.error(
				237	'unclosed parenthesis', token=last_open_paren)
				238	else:
				239	single_value = self._parse_single_constant(tokens[0])
				240	tokens.pop(0)
				241	if negate:
				242	single_value = self._arch.truncate_word(~single_value)
				243	return single_value
				244
				245	# value = constant , [ { '\|' , constant } ]
				246	# ;
				247	def parse_value(self, tokens):
				248	"""Parse constants separated bitwise OR operator \|.
				249
				250	Constants can be:
				251
				252	- A number that can be parsed with int(..., base=0)
				253	- A named constant expression.
				254	- A parenthesized, valid constant expression.
				255	- A valid constant expression prefixed with the unary bitwise
				256	complement operator ~.
				257	- A series of valid constant expressions separated by bitwise
				258	OR operator \|.
				259
				260	If there is an error parsing any of the constants, the whole process
				261	fails.
				262	"""
				263
				264	value = 0
				265	while tokens:
				266	value \|= self._parse_constant(tokens)
				267	if not tokens or tokens[0].type != 'BITWISE_OR':
				268	break
				269	tokens.pop(0)
				270	else:
				271	self._parser_state.error('empty constant')
				272	return value
Luis Hector Chavez	0516e18	2018-12-04 20:36:00 -0800	[diff] [blame]	273
				274	# atom = argument , op , value
				275	# ;
				276	def _parse_atom(self, tokens):
				277	if not tokens:
				278	self._parser_state.error('missing argument')
				279	argument = tokens.pop(0)
				280	if argument.type != 'ARGUMENT':
				281	self._parser_state.error('invalid argument', token=argument)
				282
				283	if not tokens:
				284	self._parser_state.error('missing operator')
				285	operator = tokens.pop(0)
				286	if operator.type != 'OP':
				287	self._parser_state.error('invalid operator', token=operator)
				288
				289	value = self.parse_value(tokens)
Luis Hector Chavez	1ac9eca	2018-12-04 21:28:52 -0800	[diff] [blame]	290	argument_index = int(argument.value[3:])
				291	if not (0 <= argument_index < bpf.MAX_SYSCALL_ARGUMENTS):
				292	self._parser_state.error('invalid argument', token=argument)
				293	return Atom(argument_index, operator.value, value)
Luis Hector Chavez	0516e18	2018-12-04 20:36:00 -0800	[diff] [blame]	294
				295	# clause = atom , [ { '&&' , atom } ]
				296	# ;
				297	def _parse_clause(self, tokens):
				298	atoms = []
				299	while tokens:
				300	atoms.append(self._parse_atom(tokens))
				301	if not tokens or tokens[0].type != 'AND':
				302	break
				303	tokens.pop(0)
				304	else:
				305	self._parser_state.error('empty clause')
				306	return atoms
				307
Luis Hector Chavez	1ac9eca	2018-12-04 21:28:52 -0800	[diff] [blame]	308	# argument-expression = clause , [ { '\|\|' , clause } ]
Luis Hector Chavez	0516e18	2018-12-04 20:36:00 -0800	[diff] [blame]	309	# ;
Luis Hector Chavez	1ac9eca	2018-12-04 21:28:52 -0800	[diff] [blame]	310	def parse_argument_expression(self, tokens):
				311	"""Parse a argument expression in Disjunctive Normal Form.
Luis Hector Chavez	0516e18	2018-12-04 20:36:00 -0800	[diff] [blame]	312
				313	Since BPF disallows back jumps, we build the basic blocks in reverse
				314	order so that all the jump targets are known by the time we need to
				315	reference them.
				316	"""
				317
				318	clauses = []
				319	while tokens:
				320	clauses.append(self._parse_clause(tokens))
				321	if not tokens or tokens[0].type != 'OR':
				322	break
				323	tokens.pop(0)
				324	else:
Luis Hector Chavez	1ac9eca	2018-12-04 21:28:52 -0800	[diff] [blame]	325	self._parser_state.error('empty argument expression')
Luis Hector Chavez	0516e18	2018-12-04 20:36:00 -0800	[diff] [blame]	326	return clauses
Luis Hector Chavez	1ac9eca	2018-12-04 21:28:52 -0800	[diff] [blame]	327
				328	# action = 'allow' \| '1'
				329	# \| 'kill-process'
				330	# \| 'kill-thread'
				331	# \| 'kill'
				332	# \| 'trap'
				333	# \| 'trace'
				334	# \| 'log'
				335	# \| 'return' , single-constant
				336	# ;
				337	def _parse_action(self, tokens):
				338	if not tokens:
				339	self._parser_state.error('missing action')
				340	action_token = tokens.pop(0)
				341	if action_token.type == 'ACTION':
				342	if action_token.value == 'allow':
				343	return bpf.Allow()
				344	if action_token.value == 'kill':
				345	return self._kill_action
				346	if action_token.value == 'kill-process':
				347	return bpf.KillProcess()
				348	if action_token.value == 'kill-thread':
				349	return bpf.KillThread()
				350	if action_token.value == 'trap':
				351	return bpf.Trap()
				352	if action_token.value == 'trace':
				353	return bpf.Trace()
				354	if action_token.value == 'log':
				355	return bpf.Log()
				356	elif action_token.type == 'NUMERIC_CONSTANT':
				357	constant = self._parse_single_constant(action_token)
				358	if constant == 1:
				359	return bpf.Allow()
				360	elif action_token.type == 'RETURN':
				361	if not tokens:
				362	self._parser_state.error('missing return value')
				363	return bpf.ReturnErrno(self._parse_single_constant(tokens.pop(0)))
				364	return self._parser_state.error('invalid action', token=action_token)
				365
				366	# single-filter = action
				367	# \| argument-expression , [ ';' , action ]
				368	# ;
				369	def _parse_single_filter(self, tokens):
				370	if not tokens:
				371	self._parser_state.error('missing filter')
				372	if tokens[0].type == 'ARGUMENT':
				373	# Only argument expressions can start with an ARGUMENT token.
				374	argument_expression = self.parse_argument_expression(tokens)
				375	if tokens and tokens[0].type == 'SEMICOLON':
				376	tokens.pop(0)
				377	action = self._parse_action(tokens)
				378	else:
				379	action = bpf.Allow()
				380	return Filter(argument_expression, action)
				381	else:
				382	return Filter(None, self._parse_action(tokens))
				383
				384	# filter = '{' , single-filter , [ { ',' , single-filter } ] , '}'
				385	# \| single-filter
				386	# ;
				387	def parse_filter(self, tokens):
				388	"""Parse a filter and return a list of Filter objects."""
				389	if not tokens:
				390	self._parser_state.error('missing filter')
				391	filters = []
				392	if tokens[0].type == 'LBRACE':
				393	opening_brace = tokens.pop(0)
				394	while tokens:
				395	filters.append(self._parse_single_filter(tokens))
				396	if not tokens or tokens[0].type != 'COMMA':
				397	break
				398	tokens.pop(0)
				399	if not tokens or tokens[0].type != 'RBRACE':
				400	self._parser_state.error('unclosed brace', token=opening_brace)
				401	tokens.pop(0)
				402	else:
				403	filters.append(self._parse_single_filter(tokens))
				404	return filters
Luis Hector Chavez	75406c2	2018-12-04 21:57:06 -0800	[diff] [blame]	405
				406	# syscall-descriptor = syscall-name , [ metadata ]
				407	# \| libc-function , [ metadata ]
				408	# ;
				409	def _parse_syscall_descriptor(self, tokens):
				410	if not tokens:
				411	self._parser_state.error('missing syscall descriptor')
				412	syscall_descriptor = tokens.pop(0)
				413	if syscall_descriptor.type != 'IDENTIFIER':
				414	self._parser_state.error(
				415	'invalid syscall descriptor', token=syscall_descriptor)
				416	if syscall_descriptor.value not in self._arch.syscalls:
				417	self._parser_state.error(
				418	'nonexistent syscall', token=syscall_descriptor)
				419	# TODO(lhchavez): Support libc function names.
				420	# TODO(lhchavez): Support metadata.
				421	return (Syscall(syscall_descriptor.value,
				422	self._arch.syscalls[syscall_descriptor.value]), )
				423
				424	# filter-statement = '{' , syscall-descriptor , [ { ',', syscall-descriptor } ] , '}' ,
				425	# ':' , filter
				426	# \| syscall-descriptor , ':' , filter
				427	# ;
				428	def parse_filter_statement(self, tokens):
				429	"""Parse a filter statement and return a ParsedFilterStatement."""
				430	if not tokens:
				431	self._parser_state.error('empty filter statement')
				432	syscall_descriptors = []
				433	if tokens[0].type == 'LBRACE':
				434	opening_brace = tokens.pop(0)
				435	while tokens:
				436	syscall_descriptors.extend(
				437	self._parse_syscall_descriptor(tokens))
				438	if not tokens or tokens[0].type != 'COMMA':
				439	break
				440	tokens.pop(0)
				441	if not tokens or tokens[0].type != 'RBRACE':
				442	self._parser_state.error('unclosed brace', token=opening_brace)
				443	tokens.pop(0)
				444	else:
				445	syscall_descriptors.extend(self._parse_syscall_descriptor(tokens))
				446	if not tokens:
				447	self._parser_state.error('missing colon')
				448	if tokens[0].type != 'COLON':
				449	self._parser_state.error('invalid colon', token=tokens[0])
				450	tokens.pop(0)
				451	parsed_filter = self.parse_filter(tokens)
				452	if not syscall_descriptors:
				453	return None
				454	return ParsedFilterStatement(tuple(syscall_descriptors), parsed_filter)
Luis Hector Chavez	5dfe919	2018-12-04 22:38:43 -0800	[diff] [blame^]	455
				456	# include-statement = '@include' , posix-path
				457	# ;
				458	def _parse_include_statement(self, tokens):
				459	if not tokens:
				460	self._parser_state.error('empty filter statement')
				461	if tokens[0].type != 'INCLUDE':
				462	self._parser_state.error('invalid include', token=tokens[0])
				463	tokens.pop(0)
				464	if not tokens:
				465	self._parser_state.error('empty include path')
				466	include_path = tokens.pop(0)
				467	if include_path.type != 'PATH':
				468	self._parser_state.error(
				469	'invalid include path', token=include_path)
				470	if len(self._parser_states) == self._include_depth_limit:
				471	self._parser_state.error('@include statement nested too deep')
				472	include_filename = os.path.normpath(
				473	os.path.join(
				474	os.path.dirname(self._parser_state.filename),
				475	include_path.value))
				476	if not os.path.isfile(include_filename):
				477	self._parser_state.error(
				478	'Could not @include %s' % include_filename, token=include_path)
				479	return self._parse_policy_file(include_filename)
				480
				481	def _parse_policy_file(self, filename):
				482	self._parser_states.append(ParserState(filename))
				483	try:
				484	statements = []
				485	with open(filename) as policy_file:
				486	for line in policy_file:
				487	self._parser_state.set_line(line.rstrip())
				488	tokens = self._parser_state.tokenize()
				489
				490	if not tokens:
				491	# Allow empty lines.
				492	continue
				493
				494	if tokens[0].type == 'INCLUDE':
				495	statements.extend(
				496	self._parse_include_statement(tokens))
				497	else:
				498	statements.append(self.parse_filter_statement(tokens))
				499
				500	if tokens:
				501	self._parser_state.error(
				502	'extra tokens', token=tokens[0])
				503	return statements
				504	finally:
				505	self._parser_states.pop()
				506
				507	def parse_file(self, filename):
				508	"""Parse a file and return the list of FilterStatements."""
				509	try:
				510	statements = [x for x in self._parse_policy_file(filename)]
				511	except RecursionError:
				512	raise ParseException('recursion limit exceeded', filename,
				513	self._parser_states[-1].line)
				514
				515	# Collapse statements into a single syscall-to-filter-list.
				516	syscall_filter_mapping = {}
				517	filter_statements = []
				518	for syscalls, filters in statements:
				519	for syscall in syscalls:
				520	if syscall not in syscall_filter_mapping:
				521	filter_statements.append(FilterStatement(syscall, 1, []))
				522	syscall_filter_mapping[syscall] = filter_statements[-1]
				523	syscall_filter_mapping[syscall].filters.extend(filters)
				524	for filter_statement in filter_statements:
				525	unconditional_actions_suffix = list(
				526	itertools.dropwhile(lambda filt: filt.expression is not None,
				527	filter_statement.filters))
				528	if len(unconditional_actions_suffix) == 1:
				529	# The last filter already has an unconditional action, no need
				530	# to add another one.
				531	continue
				532	if len(unconditional_actions_suffix) > 1:
				533	raise ParseException(('Syscall %s (number %d) already had '
				534	'an unconditional action applied') %
				535	(filter_statement.syscall.name,
				536	filter_statement.syscall.number),
				537	filename, self._parser_states[-1].line)
				538	assert not unconditional_actions_suffix
				539	filter_statement.filters.append(
				540	Filter(expression=None, action=self._default_action))
				541	return ParsedPolicy(self._default_action, filter_statements)