blob: da1b64b3b41c20f1604d4287f3fd877b395f86b8 [file] [log] [blame]
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -08001#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# Copyright (C) 2018 The Android Open Source Project
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17"""A parser for the Minijail policy file."""
18
19from __future__ import absolute_import
20from __future__ import division
21from __future__ import print_function
22
23import collections
24import re
25
26Token = collections.namedtuple('token',
27 ['type', 'value', 'filename', 'line', 'column'])
28
29# A regex that can tokenize a Minijail policy file line.
30_TOKEN_SPECIFICATION = (
31 ('COMMENT', r'#.*$'),
32 ('WHITESPACE', r'\s+'),
33 ('INCLUDE', r'@include'),
34 ('PATH', r'(?:\.)?/\S+'),
35 ('NUMERIC_CONSTANT', r'-?0[xX][0-9a-fA-F]+|-?0[Oo][0-7]+|-?[0-9]+'),
36 ('COLON', r':'),
37 ('SEMICOLON', r';'),
38 ('COMMA', r','),
39 ('BITWISE_COMPLEMENT', r'~'),
40 ('LPAREN', r'\('),
41 ('RPAREN', r'\)'),
42 ('LBRACE', r'\{'),
43 ('RBRACE', r'\}'),
44 ('RBRACKET', r'\]'),
45 ('LBRACKET', r'\['),
46 ('OR', r'\|\|'),
47 ('AND', r'&&'),
48 ('BITWISE_OR', r'\|'),
49 ('OP', r'&|in|==|!=|<=|<|>=|>'),
50 ('EQUAL', r'='),
51 ('ARGUMENT', r'arg[0-9]+'),
52 ('RETURN', r'return'),
53 ('ACTION', r'allow|kill-process|kill-thread|kill|trap|trace|log'),
54 ('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9@]*'),
55)
56_TOKEN_RE = re.compile('|'.join(
57 r'(?P<%s>%s)' % pair for pair in _TOKEN_SPECIFICATION))
58
59
60class ParseException(Exception):
61 """An exception that is raised when parsing fails."""
62
63 # pylint: disable=too-many-arguments
64 def __init__(self, message, filename, line, line_number=1, token=None):
65 if token:
66 column = token.column
67 length = len(token.value)
68 else:
69 column = len(line)
70 length = 1
71
72 message = ('%s(%d:%d): %s') % (filename, line_number, column + 1,
73 message)
74 message += '\n %s' % line
75 message += '\n %s%s' % (' ' * column, '^' * length)
76 super().__init__(message)
77
78
79class ParserState:
80 """Stores the state of the Parser to provide better diagnostics."""
81
82 def __init__(self, filename):
83 self._filename = filename
84 self._line = ''
85 self._line_number = 0
86
87 @property
88 def filename(self):
89 """Return the name of the file being processed."""
90 return self._filename
91
92 @property
93 def line(self):
94 """Return the current line being processed."""
95 return self._line
96
97 @property
98 def line_number(self):
99 """Return the current line number being processed."""
100 return self._line_number
101
102 def set_line(self, line):
103 """Update the current line being processed."""
104 self._line = line
105 self._line_number += 1
106
107 def error(self, message, token=None):
108 """Raise a ParserException with the provided message."""
109 raise ParseException(message, self.filename, self.line,
110 self.line_number, token)
111
112 def tokenize(self):
113 """Return a list of tokens for the current line."""
114 tokens = []
115
116 last_end = 0
117 for token in _TOKEN_RE.finditer(self.line):
118 if token.start() != last_end:
119 self.error(
120 'invalid token',
121 token=Token('INVALID', self.line[last_end:token.start()],
122 self.filename, self.line_number, last_end))
123 last_end = token.end()
124
125 # Omit whitespace and comments now to avoid sprinkling this logic
126 # elsewhere.
127 if token.lastgroup in ('WHITESPACE', 'COMMENT'):
128 continue
129 tokens.append(
130 Token(token.lastgroup, token.group(), self.filename,
131 self.line_number, token.start()))
132 if last_end != len(self.line):
133 self.error(
134 'invalid token',
135 token=Token('INVALID', self.line[last_end:], self.filename,
136 self.line_number, last_end))
137 return tokens
138
139
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800140Atom = collections.namedtuple('Atom', ['argument_index', 'op', 'value'])
141"""A single boolean comparison within a filter expression."""
142
143
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800144# pylint: disable=too-few-public-methods
145class PolicyParser:
146 """A parser for the Minijail seccomp policy file format."""
147
148 def __init__(self, arch):
149 self._parser_states = [ParserState("<memory>")]
150 self._arch = arch
151
152 @property
153 def _parser_state(self):
154 return self._parser_states[-1]
155
156 # single-constant = identifier
157 # | numeric-constant
158 # ;
159 def _parse_single_constant(self, token):
160 if token.type == 'IDENTIFIER':
161 if token.value not in self._arch.constants:
162 self._parser_state.error('invalid constant', token=token)
163 single_constant = self._arch.constants[token.value]
164 elif token.type == 'NUMERIC_CONSTANT':
165 try:
166 single_constant = int(token.value, base=0)
167 except ValueError:
168 self._parser_state.error('invalid constant', token=token)
169 else:
170 self._parser_state.error('invalid constant', token=token)
171 if single_constant > self._arch.max_unsigned:
172 self._parser_state.error('unsigned overflow', token=token)
173 elif single_constant < self._arch.min_signed:
174 self._parser_state.error('signed underflow', token=token)
175 elif single_constant < 0:
176 # This converts the constant to an unsigned representation of the
177 # same value, since BPF only uses unsigned values.
178 single_constant = self._arch.truncate_word(single_constant)
179 return single_constant
180
181 # constant = [ '~' ] , '(' , value , ')'
182 # | [ '~' ] , single-constant
183 # ;
184 def _parse_constant(self, tokens):
185 negate = False
186 if tokens[0].type == 'BITWISE_COMPLEMENT':
187 negate = True
188 tokens.pop(0)
189 if not tokens:
190 self._parser_state.error('empty complement')
191 if tokens[0].type == 'BITWISE_COMPLEMENT':
192 self._parser_state.error(
193 'invalid double complement', token=tokens[0])
194 if tokens[0].type == 'LPAREN':
195 last_open_paren = tokens.pop(0)
196 single_value = self.parse_value(tokens)
197 if not tokens or tokens[0].type != 'RPAREN':
198 self._parser_state.error(
199 'unclosed parenthesis', token=last_open_paren)
200 else:
201 single_value = self._parse_single_constant(tokens[0])
202 tokens.pop(0)
203 if negate:
204 single_value = self._arch.truncate_word(~single_value)
205 return single_value
206
207 # value = constant , [ { '|' , constant } ]
208 # ;
209 def parse_value(self, tokens):
210 """Parse constants separated bitwise OR operator |.
211
212 Constants can be:
213
214 - A number that can be parsed with int(..., base=0)
215 - A named constant expression.
216 - A parenthesized, valid constant expression.
217 - A valid constant expression prefixed with the unary bitwise
218 complement operator ~.
219 - A series of valid constant expressions separated by bitwise
220 OR operator |.
221
222 If there is an error parsing any of the constants, the whole process
223 fails.
224 """
225
226 value = 0
227 while tokens:
228 value |= self._parse_constant(tokens)
229 if not tokens or tokens[0].type != 'BITWISE_OR':
230 break
231 tokens.pop(0)
232 else:
233 self._parser_state.error('empty constant')
234 return value
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800235
236 # atom = argument , op , value
237 # ;
238 def _parse_atom(self, tokens):
239 if not tokens:
240 self._parser_state.error('missing argument')
241 argument = tokens.pop(0)
242 if argument.type != 'ARGUMENT':
243 self._parser_state.error('invalid argument', token=argument)
244
245 if not tokens:
246 self._parser_state.error('missing operator')
247 operator = tokens.pop(0)
248 if operator.type != 'OP':
249 self._parser_state.error('invalid operator', token=operator)
250
251 value = self.parse_value(tokens)
252 return Atom(int(argument.value[3:]), operator.value, value)
253
254 # clause = atom , [ { '&&' , atom } ]
255 # ;
256 def _parse_clause(self, tokens):
257 atoms = []
258 while tokens:
259 atoms.append(self._parse_atom(tokens))
260 if not tokens or tokens[0].type != 'AND':
261 break
262 tokens.pop(0)
263 else:
264 self._parser_state.error('empty clause')
265 return atoms
266
267 # filter-expression = clause , [ { '||' , clause } ]
268 # ;
269 def parse_filter_expression(self, tokens):
270 """Parse a filter expression in Disjunctive Normal Form.
271
272 Since BPF disallows back jumps, we build the basic blocks in reverse
273 order so that all the jump targets are known by the time we need to
274 reference them.
275 """
276
277 clauses = []
278 while tokens:
279 clauses.append(self._parse_clause(tokens))
280 if not tokens or tokens[0].type != 'OR':
281 break
282 tokens.pop(0)
283 else:
284 self._parser_state.error('empty filter expression')
285 return clauses