Blame - tools/parser.py - android.googlesource.com/platform/external/minijail

blob: a2cf67a0507c5531516faa7c0446fe2630370864 [file] [log] [blame]

Luis Hector Chavez	d4ce449	2018-12-04 20:00:32 -0800	[diff] [blame]	1	#!/usr/bin/env python3
				2	# -- coding: utf-8 --
				3	#
				4	# Copyright (C) 2018 The Android Open Source Project
				5	#
				6	# Licensed under the Apache License, Version 2.0 (the "License");
				7	# you may not use this file except in compliance with the License.
				8	# You may obtain a copy of the License at
				9	#
				10	# http://www.apache.org/licenses/LICENSE-2.0
				11	#
				12	# Unless required by applicable law or agreed to in writing, software
				13	# distributed under the License is distributed on an "AS IS" BASIS,
				14	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				15	# See the License for the specific language governing permissions and
				16	# limitations under the License.
				17	"""A parser for the Minijail policy file."""
				18
				19	from __future__ import absolute_import
				20	from __future__ import division
				21	from __future__ import print_function
				22
				23	import collections
				24	import re
				25
Luis Hector Chavez	1ac9eca	2018-12-04 21:28:52 -0800	[diff] [blame^]	26	import bpf
				27
Luis Hector Chavez	d4ce449	2018-12-04 20:00:32 -0800	[diff] [blame]	28	Token = collections.namedtuple('token',
				29	['type', 'value', 'filename', 'line', 'column'])
				30
				31	# A regex that can tokenize a Minijail policy file line.
				32	_TOKEN_SPECIFICATION = (
				33	('COMMENT', r'#.*$'),
				34	('WHITESPACE', r'\s+'),
				35	('INCLUDE', r'@include'),
				36	('PATH', r'(?:\.)?/\S+'),
				37	('NUMERIC_CONSTANT', r'-?0[xX][0-9a-fA-F]+\|-?0[Oo][0-7]+\|-?[0-9]+'),
				38	('COLON', r':'),
				39	('SEMICOLON', r';'),
				40	('COMMA', r','),
				41	('BITWISE_COMPLEMENT', r'~'),
				42	('LPAREN', r'\('),
				43	('RPAREN', r'\)'),
				44	('LBRACE', r'\{'),
				45	('RBRACE', r'\}'),
				46	('RBRACKET', r'\]'),
				47	('LBRACKET', r'\['),
				48	('OR', r'\\|\\|'),
				49	('AND', r'&&'),
				50	('BITWISE_OR', r'\\|'),
				51	('OP', r'&\|in\|==\|!=\|<=\|<\|>=\|>'),
				52	('EQUAL', r'='),
				53	('ARGUMENT', r'arg[0-9]+'),
				54	('RETURN', r'return'),
				55	('ACTION', r'allow\|kill-process\|kill-thread\|kill\|trap\|trace\|log'),
				56	('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9@]*'),
				57	)
				58	_TOKEN_RE = re.compile('\|'.join(
				59	r'(?P<%s>%s)' % pair for pair in _TOKEN_SPECIFICATION))
				60
				61
				62	class ParseException(Exception):
				63	"""An exception that is raised when parsing fails."""
				64
				65	# pylint: disable=too-many-arguments
				66	def __init__(self, message, filename, line, line_number=1, token=None):
				67	if token:
				68	column = token.column
				69	length = len(token.value)
				70	else:
				71	column = len(line)
				72	length = 1
				73
				74	message = ('%s(%d:%d): %s') % (filename, line_number, column + 1,
				75	message)
				76	message += '\n %s' % line
				77	message += '\n %s%s' % (' ' * column, '^' * length)
				78	super().__init__(message)
				79
				80
				81	class ParserState:
				82	"""Stores the state of the Parser to provide better diagnostics."""
				83
				84	def __init__(self, filename):
				85	self._filename = filename
				86	self._line = ''
				87	self._line_number = 0
				88
				89	@property
				90	def filename(self):
				91	"""Return the name of the file being processed."""
				92	return self._filename
				93
				94	@property
				95	def line(self):
				96	"""Return the current line being processed."""
				97	return self._line
				98
				99	@property
				100	def line_number(self):
				101	"""Return the current line number being processed."""
				102	return self._line_number
				103
				104	def set_line(self, line):
				105	"""Update the current line being processed."""
				106	self._line = line
				107	self._line_number += 1
				108
				109	def error(self, message, token=None):
				110	"""Raise a ParserException with the provided message."""
				111	raise ParseException(message, self.filename, self.line,
				112	self.line_number, token)
				113
				114	def tokenize(self):
				115	"""Return a list of tokens for the current line."""
				116	tokens = []
				117
				118	last_end = 0
				119	for token in _TOKEN_RE.finditer(self.line):
				120	if token.start() != last_end:
				121	self.error(
				122	'invalid token',
				123	token=Token('INVALID', self.line[last_end:token.start()],
				124	self.filename, self.line_number, last_end))
				125	last_end = token.end()
				126
				127	# Omit whitespace and comments now to avoid sprinkling this logic
				128	# elsewhere.
				129	if token.lastgroup in ('WHITESPACE', 'COMMENT'):
				130	continue
				131	tokens.append(
				132	Token(token.lastgroup, token.group(), self.filename,
				133	self.line_number, token.start()))
				134	if last_end != len(self.line):
				135	self.error(
				136	'invalid token',
				137	token=Token('INVALID', self.line[last_end:], self.filename,
				138	self.line_number, last_end))
				139	return tokens
				140
				141
Luis Hector Chavez	0516e18	2018-12-04 20:36:00 -0800	[diff] [blame]	142	Atom = collections.namedtuple('Atom', ['argument_index', 'op', 'value'])
				143	"""A single boolean comparison within a filter expression."""
				144
Luis Hector Chavez	1ac9eca	2018-12-04 21:28:52 -0800	[diff] [blame^]	145	Filter = collections.namedtuple('Filter', ['expression', 'action'])
				146	"""The result of parsing a DNF filter expression, with its action.
				147
				148	Since the expression is in Disjunctive Normal Form, it is composed of two levels
				149	of lists, one for disjunctions and the inner one for conjunctions. The elements
				150	of the inner list are Atoms.
				151	"""
				152
Luis Hector Chavez	0516e18	2018-12-04 20:36:00 -0800	[diff] [blame]	153
Luis Hector Chavez	d4ce449	2018-12-04 20:00:32 -0800	[diff] [blame]	154	# pylint: disable=too-few-public-methods
				155	class PolicyParser:
				156	"""A parser for the Minijail seccomp policy file format."""
				157
Luis Hector Chavez	1ac9eca	2018-12-04 21:28:52 -0800	[diff] [blame^]	158	def __init__(self, arch, *, kill_action):
Luis Hector Chavez	d4ce449	2018-12-04 20:00:32 -0800	[diff] [blame]	159	self._parser_states = [ParserState("<memory>")]
Luis Hector Chavez	1ac9eca	2018-12-04 21:28:52 -0800	[diff] [blame^]	160	self._kill_action = kill_action
Luis Hector Chavez	d4ce449	2018-12-04 20:00:32 -0800	[diff] [blame]	161	self._arch = arch
				162
				163	@property
				164	def _parser_state(self):
				165	return self._parser_states[-1]
				166
				167	# single-constant = identifier
				168	# \| numeric-constant
				169	# ;
				170	def _parse_single_constant(self, token):
				171	if token.type == 'IDENTIFIER':
				172	if token.value not in self._arch.constants:
				173	self._parser_state.error('invalid constant', token=token)
				174	single_constant = self._arch.constants[token.value]
				175	elif token.type == 'NUMERIC_CONSTANT':
				176	try:
				177	single_constant = int(token.value, base=0)
				178	except ValueError:
				179	self._parser_state.error('invalid constant', token=token)
				180	else:
				181	self._parser_state.error('invalid constant', token=token)
				182	if single_constant > self._arch.max_unsigned:
				183	self._parser_state.error('unsigned overflow', token=token)
				184	elif single_constant < self._arch.min_signed:
				185	self._parser_state.error('signed underflow', token=token)
				186	elif single_constant < 0:
				187	# This converts the constant to an unsigned representation of the
				188	# same value, since BPF only uses unsigned values.
				189	single_constant = self._arch.truncate_word(single_constant)
				190	return single_constant
				191
				192	# constant = [ '~' ] , '(' , value , ')'
				193	# \| [ '~' ] , single-constant
				194	# ;
				195	def _parse_constant(self, tokens):
				196	negate = False
				197	if tokens[0].type == 'BITWISE_COMPLEMENT':
				198	negate = True
				199	tokens.pop(0)
				200	if not tokens:
				201	self._parser_state.error('empty complement')
				202	if tokens[0].type == 'BITWISE_COMPLEMENT':
				203	self._parser_state.error(
				204	'invalid double complement', token=tokens[0])
				205	if tokens[0].type == 'LPAREN':
				206	last_open_paren = tokens.pop(0)
				207	single_value = self.parse_value(tokens)
				208	if not tokens or tokens[0].type != 'RPAREN':
				209	self._parser_state.error(
				210	'unclosed parenthesis', token=last_open_paren)
				211	else:
				212	single_value = self._parse_single_constant(tokens[0])
				213	tokens.pop(0)
				214	if negate:
				215	single_value = self._arch.truncate_word(~single_value)
				216	return single_value
				217
				218	# value = constant , [ { '\|' , constant } ]
				219	# ;
				220	def parse_value(self, tokens):
				221	"""Parse constants separated bitwise OR operator \|.
				222
				223	Constants can be:
				224
				225	- A number that can be parsed with int(..., base=0)
				226	- A named constant expression.
				227	- A parenthesized, valid constant expression.
				228	- A valid constant expression prefixed with the unary bitwise
				229	complement operator ~.
				230	- A series of valid constant expressions separated by bitwise
				231	OR operator \|.
				232
				233	If there is an error parsing any of the constants, the whole process
				234	fails.
				235	"""
				236
				237	value = 0
				238	while tokens:
				239	value \|= self._parse_constant(tokens)
				240	if not tokens or tokens[0].type != 'BITWISE_OR':
				241	break
				242	tokens.pop(0)
				243	else:
				244	self._parser_state.error('empty constant')
				245	return value
Luis Hector Chavez	0516e18	2018-12-04 20:36:00 -0800	[diff] [blame]	246
				247	# atom = argument , op , value
				248	# ;
				249	def _parse_atom(self, tokens):
				250	if not tokens:
				251	self._parser_state.error('missing argument')
				252	argument = tokens.pop(0)
				253	if argument.type != 'ARGUMENT':
				254	self._parser_state.error('invalid argument', token=argument)
				255
				256	if not tokens:
				257	self._parser_state.error('missing operator')
				258	operator = tokens.pop(0)
				259	if operator.type != 'OP':
				260	self._parser_state.error('invalid operator', token=operator)
				261
				262	value = self.parse_value(tokens)
Luis Hector Chavez	1ac9eca	2018-12-04 21:28:52 -0800	[diff] [blame^]	263	argument_index = int(argument.value[3:])
				264	if not (0 <= argument_index < bpf.MAX_SYSCALL_ARGUMENTS):
				265	self._parser_state.error('invalid argument', token=argument)
				266	return Atom(argument_index, operator.value, value)
Luis Hector Chavez	0516e18	2018-12-04 20:36:00 -0800	[diff] [blame]	267
				268	# clause = atom , [ { '&&' , atom } ]
				269	# ;
				270	def _parse_clause(self, tokens):
				271	atoms = []
				272	while tokens:
				273	atoms.append(self._parse_atom(tokens))
				274	if not tokens or tokens[0].type != 'AND':
				275	break
				276	tokens.pop(0)
				277	else:
				278	self._parser_state.error('empty clause')
				279	return atoms
				280
Luis Hector Chavez	1ac9eca	2018-12-04 21:28:52 -0800	[diff] [blame^]	281	# argument-expression = clause , [ { '\|\|' , clause } ]
Luis Hector Chavez	0516e18	2018-12-04 20:36:00 -0800	[diff] [blame]	282	# ;
Luis Hector Chavez	1ac9eca	2018-12-04 21:28:52 -0800	[diff] [blame^]	283	def parse_argument_expression(self, tokens):
				284	"""Parse a argument expression in Disjunctive Normal Form.
Luis Hector Chavez	0516e18	2018-12-04 20:36:00 -0800	[diff] [blame]	285
				286	Since BPF disallows back jumps, we build the basic blocks in reverse
				287	order so that all the jump targets are known by the time we need to
				288	reference them.
				289	"""
				290
				291	clauses = []
				292	while tokens:
				293	clauses.append(self._parse_clause(tokens))
				294	if not tokens or tokens[0].type != 'OR':
				295	break
				296	tokens.pop(0)
				297	else:
Luis Hector Chavez	1ac9eca	2018-12-04 21:28:52 -0800	[diff] [blame^]	298	self._parser_state.error('empty argument expression')
Luis Hector Chavez	0516e18	2018-12-04 20:36:00 -0800	[diff] [blame]	299	return clauses
Luis Hector Chavez	1ac9eca	2018-12-04 21:28:52 -0800	[diff] [blame^]	300
				301	# action = 'allow' \| '1'
				302	# \| 'kill-process'
				303	# \| 'kill-thread'
				304	# \| 'kill'
				305	# \| 'trap'
				306	# \| 'trace'
				307	# \| 'log'
				308	# \| 'return' , single-constant
				309	# ;
				310	def _parse_action(self, tokens):
				311	if not tokens:
				312	self._parser_state.error('missing action')
				313	action_token = tokens.pop(0)
				314	if action_token.type == 'ACTION':
				315	if action_token.value == 'allow':
				316	return bpf.Allow()
				317	if action_token.value == 'kill':
				318	return self._kill_action
				319	if action_token.value == 'kill-process':
				320	return bpf.KillProcess()
				321	if action_token.value == 'kill-thread':
				322	return bpf.KillThread()
				323	if action_token.value == 'trap':
				324	return bpf.Trap()
				325	if action_token.value == 'trace':
				326	return bpf.Trace()
				327	if action_token.value == 'log':
				328	return bpf.Log()
				329	elif action_token.type == 'NUMERIC_CONSTANT':
				330	constant = self._parse_single_constant(action_token)
				331	if constant == 1:
				332	return bpf.Allow()
				333	elif action_token.type == 'RETURN':
				334	if not tokens:
				335	self._parser_state.error('missing return value')
				336	return bpf.ReturnErrno(self._parse_single_constant(tokens.pop(0)))
				337	return self._parser_state.error('invalid action', token=action_token)
				338
				339	# single-filter = action
				340	# \| argument-expression , [ ';' , action ]
				341	# ;
				342	def _parse_single_filter(self, tokens):
				343	if not tokens:
				344	self._parser_state.error('missing filter')
				345	if tokens[0].type == 'ARGUMENT':
				346	# Only argument expressions can start with an ARGUMENT token.
				347	argument_expression = self.parse_argument_expression(tokens)
				348	if tokens and tokens[0].type == 'SEMICOLON':
				349	tokens.pop(0)
				350	action = self._parse_action(tokens)
				351	else:
				352	action = bpf.Allow()
				353	return Filter(argument_expression, action)
				354	else:
				355	return Filter(None, self._parse_action(tokens))
				356
				357	# filter = '{' , single-filter , [ { ',' , single-filter } ] , '}'
				358	# \| single-filter
				359	# ;
				360	def parse_filter(self, tokens):
				361	"""Parse a filter and return a list of Filter objects."""
				362	if not tokens:
				363	self._parser_state.error('missing filter')
				364	filters = []
				365	if tokens[0].type == 'LBRACE':
				366	opening_brace = tokens.pop(0)
				367	while tokens:
				368	filters.append(self._parse_single_filter(tokens))
				369	if not tokens or tokens[0].type != 'COMMA':
				370	break
				371	tokens.pop(0)
				372	if not tokens or tokens[0].type != 'RBRACE':
				373	self._parser_state.error('unclosed brace', token=opening_brace)
				374	tokens.pop(0)
				375	else:
				376	filters.append(self._parse_single_filter(tokens))
				377	return filters