blob: a2cf67a0507c5531516faa7c0446fe2630370864 [file] [log] [blame]
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -08001#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# Copyright (C) 2018 The Android Open Source Project
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17"""A parser for the Minijail policy file."""
18
19from __future__ import absolute_import
20from __future__ import division
21from __future__ import print_function
22
23import collections
24import re
25
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -080026import bpf
27
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080028Token = collections.namedtuple('token',
29 ['type', 'value', 'filename', 'line', 'column'])
30
31# A regex that can tokenize a Minijail policy file line.
32_TOKEN_SPECIFICATION = (
33 ('COMMENT', r'#.*$'),
34 ('WHITESPACE', r'\s+'),
35 ('INCLUDE', r'@include'),
36 ('PATH', r'(?:\.)?/\S+'),
37 ('NUMERIC_CONSTANT', r'-?0[xX][0-9a-fA-F]+|-?0[Oo][0-7]+|-?[0-9]+'),
38 ('COLON', r':'),
39 ('SEMICOLON', r';'),
40 ('COMMA', r','),
41 ('BITWISE_COMPLEMENT', r'~'),
42 ('LPAREN', r'\('),
43 ('RPAREN', r'\)'),
44 ('LBRACE', r'\{'),
45 ('RBRACE', r'\}'),
46 ('RBRACKET', r'\]'),
47 ('LBRACKET', r'\['),
48 ('OR', r'\|\|'),
49 ('AND', r'&&'),
50 ('BITWISE_OR', r'\|'),
51 ('OP', r'&|in|==|!=|<=|<|>=|>'),
52 ('EQUAL', r'='),
53 ('ARGUMENT', r'arg[0-9]+'),
54 ('RETURN', r'return'),
55 ('ACTION', r'allow|kill-process|kill-thread|kill|trap|trace|log'),
56 ('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9@]*'),
57)
58_TOKEN_RE = re.compile('|'.join(
59 r'(?P<%s>%s)' % pair for pair in _TOKEN_SPECIFICATION))
60
61
62class ParseException(Exception):
63 """An exception that is raised when parsing fails."""
64
65 # pylint: disable=too-many-arguments
66 def __init__(self, message, filename, line, line_number=1, token=None):
67 if token:
68 column = token.column
69 length = len(token.value)
70 else:
71 column = len(line)
72 length = 1
73
74 message = ('%s(%d:%d): %s') % (filename, line_number, column + 1,
75 message)
76 message += '\n %s' % line
77 message += '\n %s%s' % (' ' * column, '^' * length)
78 super().__init__(message)
79
80
81class ParserState:
82 """Stores the state of the Parser to provide better diagnostics."""
83
84 def __init__(self, filename):
85 self._filename = filename
86 self._line = ''
87 self._line_number = 0
88
89 @property
90 def filename(self):
91 """Return the name of the file being processed."""
92 return self._filename
93
94 @property
95 def line(self):
96 """Return the current line being processed."""
97 return self._line
98
99 @property
100 def line_number(self):
101 """Return the current line number being processed."""
102 return self._line_number
103
104 def set_line(self, line):
105 """Update the current line being processed."""
106 self._line = line
107 self._line_number += 1
108
109 def error(self, message, token=None):
110 """Raise a ParserException with the provided message."""
111 raise ParseException(message, self.filename, self.line,
112 self.line_number, token)
113
114 def tokenize(self):
115 """Return a list of tokens for the current line."""
116 tokens = []
117
118 last_end = 0
119 for token in _TOKEN_RE.finditer(self.line):
120 if token.start() != last_end:
121 self.error(
122 'invalid token',
123 token=Token('INVALID', self.line[last_end:token.start()],
124 self.filename, self.line_number, last_end))
125 last_end = token.end()
126
127 # Omit whitespace and comments now to avoid sprinkling this logic
128 # elsewhere.
129 if token.lastgroup in ('WHITESPACE', 'COMMENT'):
130 continue
131 tokens.append(
132 Token(token.lastgroup, token.group(), self.filename,
133 self.line_number, token.start()))
134 if last_end != len(self.line):
135 self.error(
136 'invalid token',
137 token=Token('INVALID', self.line[last_end:], self.filename,
138 self.line_number, last_end))
139 return tokens
140
141
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800142Atom = collections.namedtuple('Atom', ['argument_index', 'op', 'value'])
143"""A single boolean comparison within a filter expression."""
144
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800145Filter = collections.namedtuple('Filter', ['expression', 'action'])
146"""The result of parsing a DNF filter expression, with its action.
147
148Since the expression is in Disjunctive Normal Form, it is composed of two levels
149of lists, one for disjunctions and the inner one for conjunctions. The elements
150of the inner list are Atoms.
151"""
152
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800153
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800154# pylint: disable=too-few-public-methods
155class PolicyParser:
156 """A parser for the Minijail seccomp policy file format."""
157
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800158 def __init__(self, arch, *, kill_action):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800159 self._parser_states = [ParserState("<memory>")]
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800160 self._kill_action = kill_action
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800161 self._arch = arch
162
163 @property
164 def _parser_state(self):
165 return self._parser_states[-1]
166
167 # single-constant = identifier
168 # | numeric-constant
169 # ;
170 def _parse_single_constant(self, token):
171 if token.type == 'IDENTIFIER':
172 if token.value not in self._arch.constants:
173 self._parser_state.error('invalid constant', token=token)
174 single_constant = self._arch.constants[token.value]
175 elif token.type == 'NUMERIC_CONSTANT':
176 try:
177 single_constant = int(token.value, base=0)
178 except ValueError:
179 self._parser_state.error('invalid constant', token=token)
180 else:
181 self._parser_state.error('invalid constant', token=token)
182 if single_constant > self._arch.max_unsigned:
183 self._parser_state.error('unsigned overflow', token=token)
184 elif single_constant < self._arch.min_signed:
185 self._parser_state.error('signed underflow', token=token)
186 elif single_constant < 0:
187 # This converts the constant to an unsigned representation of the
188 # same value, since BPF only uses unsigned values.
189 single_constant = self._arch.truncate_word(single_constant)
190 return single_constant
191
192 # constant = [ '~' ] , '(' , value , ')'
193 # | [ '~' ] , single-constant
194 # ;
195 def _parse_constant(self, tokens):
196 negate = False
197 if tokens[0].type == 'BITWISE_COMPLEMENT':
198 negate = True
199 tokens.pop(0)
200 if not tokens:
201 self._parser_state.error('empty complement')
202 if tokens[0].type == 'BITWISE_COMPLEMENT':
203 self._parser_state.error(
204 'invalid double complement', token=tokens[0])
205 if tokens[0].type == 'LPAREN':
206 last_open_paren = tokens.pop(0)
207 single_value = self.parse_value(tokens)
208 if not tokens or tokens[0].type != 'RPAREN':
209 self._parser_state.error(
210 'unclosed parenthesis', token=last_open_paren)
211 else:
212 single_value = self._parse_single_constant(tokens[0])
213 tokens.pop(0)
214 if negate:
215 single_value = self._arch.truncate_word(~single_value)
216 return single_value
217
218 # value = constant , [ { '|' , constant } ]
219 # ;
220 def parse_value(self, tokens):
221 """Parse constants separated bitwise OR operator |.
222
223 Constants can be:
224
225 - A number that can be parsed with int(..., base=0)
226 - A named constant expression.
227 - A parenthesized, valid constant expression.
228 - A valid constant expression prefixed with the unary bitwise
229 complement operator ~.
230 - A series of valid constant expressions separated by bitwise
231 OR operator |.
232
233 If there is an error parsing any of the constants, the whole process
234 fails.
235 """
236
237 value = 0
238 while tokens:
239 value |= self._parse_constant(tokens)
240 if not tokens or tokens[0].type != 'BITWISE_OR':
241 break
242 tokens.pop(0)
243 else:
244 self._parser_state.error('empty constant')
245 return value
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800246
247 # atom = argument , op , value
248 # ;
249 def _parse_atom(self, tokens):
250 if not tokens:
251 self._parser_state.error('missing argument')
252 argument = tokens.pop(0)
253 if argument.type != 'ARGUMENT':
254 self._parser_state.error('invalid argument', token=argument)
255
256 if not tokens:
257 self._parser_state.error('missing operator')
258 operator = tokens.pop(0)
259 if operator.type != 'OP':
260 self._parser_state.error('invalid operator', token=operator)
261
262 value = self.parse_value(tokens)
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800263 argument_index = int(argument.value[3:])
264 if not (0 <= argument_index < bpf.MAX_SYSCALL_ARGUMENTS):
265 self._parser_state.error('invalid argument', token=argument)
266 return Atom(argument_index, operator.value, value)
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800267
268 # clause = atom , [ { '&&' , atom } ]
269 # ;
270 def _parse_clause(self, tokens):
271 atoms = []
272 while tokens:
273 atoms.append(self._parse_atom(tokens))
274 if not tokens or tokens[0].type != 'AND':
275 break
276 tokens.pop(0)
277 else:
278 self._parser_state.error('empty clause')
279 return atoms
280
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800281 # argument-expression = clause , [ { '||' , clause } ]
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800282 # ;
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800283 def parse_argument_expression(self, tokens):
284 """Parse a argument expression in Disjunctive Normal Form.
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800285
286 Since BPF disallows back jumps, we build the basic blocks in reverse
287 order so that all the jump targets are known by the time we need to
288 reference them.
289 """
290
291 clauses = []
292 while tokens:
293 clauses.append(self._parse_clause(tokens))
294 if not tokens or tokens[0].type != 'OR':
295 break
296 tokens.pop(0)
297 else:
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800298 self._parser_state.error('empty argument expression')
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800299 return clauses
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800300
301 # action = 'allow' | '1'
302 # | 'kill-process'
303 # | 'kill-thread'
304 # | 'kill'
305 # | 'trap'
306 # | 'trace'
307 # | 'log'
308 # | 'return' , single-constant
309 # ;
310 def _parse_action(self, tokens):
311 if not tokens:
312 self._parser_state.error('missing action')
313 action_token = tokens.pop(0)
314 if action_token.type == 'ACTION':
315 if action_token.value == 'allow':
316 return bpf.Allow()
317 if action_token.value == 'kill':
318 return self._kill_action
319 if action_token.value == 'kill-process':
320 return bpf.KillProcess()
321 if action_token.value == 'kill-thread':
322 return bpf.KillThread()
323 if action_token.value == 'trap':
324 return bpf.Trap()
325 if action_token.value == 'trace':
326 return bpf.Trace()
327 if action_token.value == 'log':
328 return bpf.Log()
329 elif action_token.type == 'NUMERIC_CONSTANT':
330 constant = self._parse_single_constant(action_token)
331 if constant == 1:
332 return bpf.Allow()
333 elif action_token.type == 'RETURN':
334 if not tokens:
335 self._parser_state.error('missing return value')
336 return bpf.ReturnErrno(self._parse_single_constant(tokens.pop(0)))
337 return self._parser_state.error('invalid action', token=action_token)
338
339 # single-filter = action
340 # | argument-expression , [ ';' , action ]
341 # ;
342 def _parse_single_filter(self, tokens):
343 if not tokens:
344 self._parser_state.error('missing filter')
345 if tokens[0].type == 'ARGUMENT':
346 # Only argument expressions can start with an ARGUMENT token.
347 argument_expression = self.parse_argument_expression(tokens)
348 if tokens and tokens[0].type == 'SEMICOLON':
349 tokens.pop(0)
350 action = self._parse_action(tokens)
351 else:
352 action = bpf.Allow()
353 return Filter(argument_expression, action)
354 else:
355 return Filter(None, self._parse_action(tokens))
356
357 # filter = '{' , single-filter , [ { ',' , single-filter } ] , '}'
358 # | single-filter
359 # ;
360 def parse_filter(self, tokens):
361 """Parse a filter and return a list of Filter objects."""
362 if not tokens:
363 self._parser_state.error('missing filter')
364 filters = []
365 if tokens[0].type == 'LBRACE':
366 opening_brace = tokens.pop(0)
367 while tokens:
368 filters.append(self._parse_single_filter(tokens))
369 if not tokens or tokens[0].type != 'COMMA':
370 break
371 tokens.pop(0)
372 if not tokens or tokens[0].type != 'RBRACE':
373 self._parser_state.error('unclosed brace', token=opening_brace)
374 tokens.pop(0)
375 else:
376 filters.append(self._parse_single_filter(tokens))
377 return filters