Blame - tools/parser.py - android.googlesource.com/platform/external/minijail

blob: da1b64b3b41c20f1604d4287f3fd877b395f86b8 [file] [log] [blame]

Luis Hector Chavez	d4ce449	2018-12-04 20:00:32 -0800	[diff] [blame]	1	#!/usr/bin/env python3
				2	# -- coding: utf-8 --
				3	#
				4	# Copyright (C) 2018 The Android Open Source Project
				5	#
				6	# Licensed under the Apache License, Version 2.0 (the "License");
				7	# you may not use this file except in compliance with the License.
				8	# You may obtain a copy of the License at
				9	#
				10	# http://www.apache.org/licenses/LICENSE-2.0
				11	#
				12	# Unless required by applicable law or agreed to in writing, software
				13	# distributed under the License is distributed on an "AS IS" BASIS,
				14	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				15	# See the License for the specific language governing permissions and
				16	# limitations under the License.
				17	"""A parser for the Minijail policy file."""
				18
				19	from __future__ import absolute_import
				20	from __future__ import division
				21	from __future__ import print_function
				22
				23	import collections
				24	import re
				25
				26	Token = collections.namedtuple('token',
				27	['type', 'value', 'filename', 'line', 'column'])
				28
				29	# A regex that can tokenize a Minijail policy file line.
				30	_TOKEN_SPECIFICATION = (
				31	('COMMENT', r'#.*$'),
				32	('WHITESPACE', r'\s+'),
				33	('INCLUDE', r'@include'),
				34	('PATH', r'(?:\.)?/\S+'),
				35	('NUMERIC_CONSTANT', r'-?0[xX][0-9a-fA-F]+\|-?0[Oo][0-7]+\|-?[0-9]+'),
				36	('COLON', r':'),
				37	('SEMICOLON', r';'),
				38	('COMMA', r','),
				39	('BITWISE_COMPLEMENT', r'~'),
				40	('LPAREN', r'\('),
				41	('RPAREN', r'\)'),
				42	('LBRACE', r'\{'),
				43	('RBRACE', r'\}'),
				44	('RBRACKET', r'\]'),
				45	('LBRACKET', r'\['),
				46	('OR', r'\\|\\|'),
				47	('AND', r'&&'),
				48	('BITWISE_OR', r'\\|'),
				49	('OP', r'&\|in\|==\|!=\|<=\|<\|>=\|>'),
				50	('EQUAL', r'='),
				51	('ARGUMENT', r'arg[0-9]+'),
				52	('RETURN', r'return'),
				53	('ACTION', r'allow\|kill-process\|kill-thread\|kill\|trap\|trace\|log'),
				54	('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9@]*'),
				55	)
				56	_TOKEN_RE = re.compile('\|'.join(
				57	r'(?P<%s>%s)' % pair for pair in _TOKEN_SPECIFICATION))
				58
				59
				60	class ParseException(Exception):
				61	"""An exception that is raised when parsing fails."""
				62
				63	# pylint: disable=too-many-arguments
				64	def __init__(self, message, filename, line, line_number=1, token=None):
				65	if token:
				66	column = token.column
				67	length = len(token.value)
				68	else:
				69	column = len(line)
				70	length = 1
				71
				72	message = ('%s(%d:%d): %s') % (filename, line_number, column + 1,
				73	message)
				74	message += '\n %s' % line
				75	message += '\n %s%s' % (' ' * column, '^' * length)
				76	super().__init__(message)
				77
				78
				79	class ParserState:
				80	"""Stores the state of the Parser to provide better diagnostics."""
				81
				82	def __init__(self, filename):
				83	self._filename = filename
				84	self._line = ''
				85	self._line_number = 0
				86
				87	@property
				88	def filename(self):
				89	"""Return the name of the file being processed."""
				90	return self._filename
				91
				92	@property
				93	def line(self):
				94	"""Return the current line being processed."""
				95	return self._line
				96
				97	@property
				98	def line_number(self):
				99	"""Return the current line number being processed."""
				100	return self._line_number
				101
				102	def set_line(self, line):
				103	"""Update the current line being processed."""
				104	self._line = line
				105	self._line_number += 1
				106
				107	def error(self, message, token=None):
				108	"""Raise a ParserException with the provided message."""
				109	raise ParseException(message, self.filename, self.line,
				110	self.line_number, token)
				111
				112	def tokenize(self):
				113	"""Return a list of tokens for the current line."""
				114	tokens = []
				115
				116	last_end = 0
				117	for token in _TOKEN_RE.finditer(self.line):
				118	if token.start() != last_end:
				119	self.error(
				120	'invalid token',
				121	token=Token('INVALID', self.line[last_end:token.start()],
				122	self.filename, self.line_number, last_end))
				123	last_end = token.end()
				124
				125	# Omit whitespace and comments now to avoid sprinkling this logic
				126	# elsewhere.
				127	if token.lastgroup in ('WHITESPACE', 'COMMENT'):
				128	continue
				129	tokens.append(
				130	Token(token.lastgroup, token.group(), self.filename,
				131	self.line_number, token.start()))
				132	if last_end != len(self.line):
				133	self.error(
				134	'invalid token',
				135	token=Token('INVALID', self.line[last_end:], self.filename,
				136	self.line_number, last_end))
				137	return tokens
				138
				139
Luis Hector Chavez	0516e18	2018-12-04 20:36:00 -0800	[diff] [blame^]	140	Atom = collections.namedtuple('Atom', ['argument_index', 'op', 'value'])
				141	"""A single boolean comparison within a filter expression."""
				142
				143
Luis Hector Chavez	d4ce449	2018-12-04 20:00:32 -0800	[diff] [blame]	144	# pylint: disable=too-few-public-methods
				145	class PolicyParser:
				146	"""A parser for the Minijail seccomp policy file format."""
				147
				148	def __init__(self, arch):
				149	self._parser_states = [ParserState("<memory>")]
				150	self._arch = arch
				151
				152	@property
				153	def _parser_state(self):
				154	return self._parser_states[-1]
				155
				156	# single-constant = identifier
				157	# \| numeric-constant
				158	# ;
				159	def _parse_single_constant(self, token):
				160	if token.type == 'IDENTIFIER':
				161	if token.value not in self._arch.constants:
				162	self._parser_state.error('invalid constant', token=token)
				163	single_constant = self._arch.constants[token.value]
				164	elif token.type == 'NUMERIC_CONSTANT':
				165	try:
				166	single_constant = int(token.value, base=0)
				167	except ValueError:
				168	self._parser_state.error('invalid constant', token=token)
				169	else:
				170	self._parser_state.error('invalid constant', token=token)
				171	if single_constant > self._arch.max_unsigned:
				172	self._parser_state.error('unsigned overflow', token=token)
				173	elif single_constant < self._arch.min_signed:
				174	self._parser_state.error('signed underflow', token=token)
				175	elif single_constant < 0:
				176	# This converts the constant to an unsigned representation of the
				177	# same value, since BPF only uses unsigned values.
				178	single_constant = self._arch.truncate_word(single_constant)
				179	return single_constant
				180
				181	# constant = [ '~' ] , '(' , value , ')'
				182	# \| [ '~' ] , single-constant
				183	# ;
				184	def _parse_constant(self, tokens):
				185	negate = False
				186	if tokens[0].type == 'BITWISE_COMPLEMENT':
				187	negate = True
				188	tokens.pop(0)
				189	if not tokens:
				190	self._parser_state.error('empty complement')
				191	if tokens[0].type == 'BITWISE_COMPLEMENT':
				192	self._parser_state.error(
				193	'invalid double complement', token=tokens[0])
				194	if tokens[0].type == 'LPAREN':
				195	last_open_paren = tokens.pop(0)
				196	single_value = self.parse_value(tokens)
				197	if not tokens or tokens[0].type != 'RPAREN':
				198	self._parser_state.error(
				199	'unclosed parenthesis', token=last_open_paren)
				200	else:
				201	single_value = self._parse_single_constant(tokens[0])
				202	tokens.pop(0)
				203	if negate:
				204	single_value = self._arch.truncate_word(~single_value)
				205	return single_value
				206
				207	# value = constant , [ { '\|' , constant } ]
				208	# ;
				209	def parse_value(self, tokens):
				210	"""Parse constants separated bitwise OR operator \|.
				211
				212	Constants can be:
				213
				214	- A number that can be parsed with int(..., base=0)
				215	- A named constant expression.
				216	- A parenthesized, valid constant expression.
				217	- A valid constant expression prefixed with the unary bitwise
				218	complement operator ~.
				219	- A series of valid constant expressions separated by bitwise
				220	OR operator \|.
				221
				222	If there is an error parsing any of the constants, the whole process
				223	fails.
				224	"""
				225
				226	value = 0
				227	while tokens:
				228	value \|= self._parse_constant(tokens)
				229	if not tokens or tokens[0].type != 'BITWISE_OR':
				230	break
				231	tokens.pop(0)
				232	else:
				233	self._parser_state.error('empty constant')
				234	return value
Luis Hector Chavez	0516e18	2018-12-04 20:36:00 -0800	[diff] [blame^]	235
				236	# atom = argument , op , value
				237	# ;
				238	def _parse_atom(self, tokens):
				239	if not tokens:
				240	self._parser_state.error('missing argument')
				241	argument = tokens.pop(0)
				242	if argument.type != 'ARGUMENT':
				243	self._parser_state.error('invalid argument', token=argument)
				244
				245	if not tokens:
				246	self._parser_state.error('missing operator')
				247	operator = tokens.pop(0)
				248	if operator.type != 'OP':
				249	self._parser_state.error('invalid operator', token=operator)
				250
				251	value = self.parse_value(tokens)
				252	return Atom(int(argument.value[3:]), operator.value, value)
				253
				254	# clause = atom , [ { '&&' , atom } ]
				255	# ;
				256	def _parse_clause(self, tokens):
				257	atoms = []
				258	while tokens:
				259	atoms.append(self._parse_atom(tokens))
				260	if not tokens or tokens[0].type != 'AND':
				261	break
				262	tokens.pop(0)
				263	else:
				264	self._parser_state.error('empty clause')
				265	return atoms
				266
				267	# filter-expression = clause , [ { '\|\|' , clause } ]
				268	# ;
				269	def parse_filter_expression(self, tokens):
				270	"""Parse a filter expression in Disjunctive Normal Form.
				271
				272	Since BPF disallows back jumps, we build the basic blocks in reverse
				273	order so that all the jump targets are known by the time we need to
				274	reference them.
				275	"""
				276
				277	clauses = []
				278	while tokens:
				279	clauses.append(self._parse_clause(tokens))
				280	if not tokens or tokens[0].type != 'OR':
				281	break
				282	tokens.pop(0)
				283	else:
				284	self._parser_state.error('empty filter expression')
				285	return clauses