blob: 99b2e997cc185dcc70e580f3dd5c6e2ad43416f0 [file] [log] [blame]
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -08001#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# Copyright (C) 2018 The Android Open Source Project
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17"""A parser for the Minijail policy file."""
18
19from __future__ import absolute_import
20from __future__ import division
21from __future__ import print_function
22
23import collections
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -080024import itertools
25import os.path
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080026import re
27
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -080028import bpf
29
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080030Token = collections.namedtuple('token',
31 ['type', 'value', 'filename', 'line', 'column'])
32
33# A regex that can tokenize a Minijail policy file line.
34_TOKEN_SPECIFICATION = (
35 ('COMMENT', r'#.*$'),
36 ('WHITESPACE', r'\s+'),
37 ('INCLUDE', r'@include'),
38 ('PATH', r'(?:\.)?/\S+'),
39 ('NUMERIC_CONSTANT', r'-?0[xX][0-9a-fA-F]+|-?0[Oo][0-7]+|-?[0-9]+'),
40 ('COLON', r':'),
41 ('SEMICOLON', r';'),
42 ('COMMA', r','),
43 ('BITWISE_COMPLEMENT', r'~'),
44 ('LPAREN', r'\('),
45 ('RPAREN', r'\)'),
46 ('LBRACE', r'\{'),
47 ('RBRACE', r'\}'),
48 ('RBRACKET', r'\]'),
49 ('LBRACKET', r'\['),
50 ('OR', r'\|\|'),
51 ('AND', r'&&'),
52 ('BITWISE_OR', r'\|'),
53 ('OP', r'&|in|==|!=|<=|<|>=|>'),
54 ('EQUAL', r'='),
55 ('ARGUMENT', r'arg[0-9]+'),
56 ('RETURN', r'return'),
57 ('ACTION', r'allow|kill-process|kill-thread|kill|trap|trace|log'),
58 ('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9@]*'),
59)
60_TOKEN_RE = re.compile('|'.join(
61 r'(?P<%s>%s)' % pair for pair in _TOKEN_SPECIFICATION))
62
63
64class ParseException(Exception):
65 """An exception that is raised when parsing fails."""
66
67 # pylint: disable=too-many-arguments
68 def __init__(self, message, filename, line, line_number=1, token=None):
69 if token:
70 column = token.column
71 length = len(token.value)
72 else:
73 column = len(line)
74 length = 1
75
76 message = ('%s(%d:%d): %s') % (filename, line_number, column + 1,
77 message)
78 message += '\n %s' % line
79 message += '\n %s%s' % (' ' * column, '^' * length)
80 super().__init__(message)
81
82
83class ParserState:
84 """Stores the state of the Parser to provide better diagnostics."""
85
86 def __init__(self, filename):
87 self._filename = filename
88 self._line = ''
89 self._line_number = 0
90
91 @property
92 def filename(self):
93 """Return the name of the file being processed."""
94 return self._filename
95
96 @property
97 def line(self):
98 """Return the current line being processed."""
99 return self._line
100
101 @property
102 def line_number(self):
103 """Return the current line number being processed."""
104 return self._line_number
105
106 def set_line(self, line):
107 """Update the current line being processed."""
108 self._line = line
109 self._line_number += 1
110
111 def error(self, message, token=None):
112 """Raise a ParserException with the provided message."""
113 raise ParseException(message, self.filename, self.line,
114 self.line_number, token)
115
116 def tokenize(self):
117 """Return a list of tokens for the current line."""
118 tokens = []
119
120 last_end = 0
121 for token in _TOKEN_RE.finditer(self.line):
122 if token.start() != last_end:
123 self.error(
124 'invalid token',
125 token=Token('INVALID', self.line[last_end:token.start()],
126 self.filename, self.line_number, last_end))
127 last_end = token.end()
128
129 # Omit whitespace and comments now to avoid sprinkling this logic
130 # elsewhere.
131 if token.lastgroup in ('WHITESPACE', 'COMMENT'):
132 continue
133 tokens.append(
134 Token(token.lastgroup, token.group(), self.filename,
135 self.line_number, token.start()))
136 if last_end != len(self.line):
137 self.error(
138 'invalid token',
139 token=Token('INVALID', self.line[last_end:], self.filename,
140 self.line_number, last_end))
141 return tokens
142
143
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800144Atom = collections.namedtuple('Atom', ['argument_index', 'op', 'value'])
145"""A single boolean comparison within a filter expression."""
146
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800147Filter = collections.namedtuple('Filter', ['expression', 'action'])
148"""The result of parsing a DNF filter expression, with its action.
149
150Since the expression is in Disjunctive Normal Form, it is composed of two levels
151of lists, one for disjunctions and the inner one for conjunctions. The elements
152of the inner list are Atoms.
153"""
154
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800155Syscall = collections.namedtuple('Syscall', ['name', 'number'])
156"""A system call."""
157
158ParsedFilterStatement = collections.namedtuple('ParsedFilterStatement',
159 ['syscalls', 'filters'])
160"""The result of parsing a filter statement.
161
162Statements have a list of syscalls, and an associated list of filters that will
163be evaluated sequentially when any of the syscalls is invoked.
164"""
165
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800166FilterStatement = collections.namedtuple('FilterStatement',
167 ['syscall', 'frequency', 'filters'])
168"""The filter list for a particular syscall.
169
170This is a mapping from one syscall to a list of filters that are evaluated
171sequentially. The last filter is always an unconditional action.
172"""
173
174ParsedPolicy = collections.namedtuple('ParsedPolicy',
175 ['default_action', 'filter_statements'])
176"""The result of parsing a minijail .policy file."""
177
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800178
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800179# pylint: disable=too-few-public-methods
180class PolicyParser:
181 """A parser for the Minijail seccomp policy file format."""
182
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800183 def __init__(self, arch, *, kill_action, include_depth_limit=10):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800184 self._parser_states = [ParserState("<memory>")]
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800185 self._kill_action = kill_action
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800186 self._include_depth_limit = include_depth_limit
187 self._default_action = self._kill_action
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800188 self._arch = arch
189
190 @property
191 def _parser_state(self):
192 return self._parser_states[-1]
193
194 # single-constant = identifier
195 # | numeric-constant
196 # ;
197 def _parse_single_constant(self, token):
198 if token.type == 'IDENTIFIER':
199 if token.value not in self._arch.constants:
200 self._parser_state.error('invalid constant', token=token)
201 single_constant = self._arch.constants[token.value]
202 elif token.type == 'NUMERIC_CONSTANT':
203 try:
204 single_constant = int(token.value, base=0)
205 except ValueError:
206 self._parser_state.error('invalid constant', token=token)
207 else:
208 self._parser_state.error('invalid constant', token=token)
209 if single_constant > self._arch.max_unsigned:
210 self._parser_state.error('unsigned overflow', token=token)
211 elif single_constant < self._arch.min_signed:
212 self._parser_state.error('signed underflow', token=token)
213 elif single_constant < 0:
214 # This converts the constant to an unsigned representation of the
215 # same value, since BPF only uses unsigned values.
216 single_constant = self._arch.truncate_word(single_constant)
217 return single_constant
218
219 # constant = [ '~' ] , '(' , value , ')'
220 # | [ '~' ] , single-constant
221 # ;
222 def _parse_constant(self, tokens):
223 negate = False
224 if tokens[0].type == 'BITWISE_COMPLEMENT':
225 negate = True
226 tokens.pop(0)
227 if not tokens:
228 self._parser_state.error('empty complement')
229 if tokens[0].type == 'BITWISE_COMPLEMENT':
230 self._parser_state.error(
231 'invalid double complement', token=tokens[0])
232 if tokens[0].type == 'LPAREN':
233 last_open_paren = tokens.pop(0)
234 single_value = self.parse_value(tokens)
235 if not tokens or tokens[0].type != 'RPAREN':
236 self._parser_state.error(
237 'unclosed parenthesis', token=last_open_paren)
238 else:
239 single_value = self._parse_single_constant(tokens[0])
240 tokens.pop(0)
241 if negate:
242 single_value = self._arch.truncate_word(~single_value)
243 return single_value
244
245 # value = constant , [ { '|' , constant } ]
246 # ;
247 def parse_value(self, tokens):
248 """Parse constants separated bitwise OR operator |.
249
250 Constants can be:
251
252 - A number that can be parsed with int(..., base=0)
253 - A named constant expression.
254 - A parenthesized, valid constant expression.
255 - A valid constant expression prefixed with the unary bitwise
256 complement operator ~.
257 - A series of valid constant expressions separated by bitwise
258 OR operator |.
259
260 If there is an error parsing any of the constants, the whole process
261 fails.
262 """
263
264 value = 0
265 while tokens:
266 value |= self._parse_constant(tokens)
267 if not tokens or tokens[0].type != 'BITWISE_OR':
268 break
269 tokens.pop(0)
270 else:
271 self._parser_state.error('empty constant')
272 return value
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800273
274 # atom = argument , op , value
275 # ;
276 def _parse_atom(self, tokens):
277 if not tokens:
278 self._parser_state.error('missing argument')
279 argument = tokens.pop(0)
280 if argument.type != 'ARGUMENT':
281 self._parser_state.error('invalid argument', token=argument)
282
283 if not tokens:
284 self._parser_state.error('missing operator')
285 operator = tokens.pop(0)
286 if operator.type != 'OP':
287 self._parser_state.error('invalid operator', token=operator)
288
289 value = self.parse_value(tokens)
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800290 argument_index = int(argument.value[3:])
291 if not (0 <= argument_index < bpf.MAX_SYSCALL_ARGUMENTS):
292 self._parser_state.error('invalid argument', token=argument)
293 return Atom(argument_index, operator.value, value)
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800294
295 # clause = atom , [ { '&&' , atom } ]
296 # ;
297 def _parse_clause(self, tokens):
298 atoms = []
299 while tokens:
300 atoms.append(self._parse_atom(tokens))
301 if not tokens or tokens[0].type != 'AND':
302 break
303 tokens.pop(0)
304 else:
305 self._parser_state.error('empty clause')
306 return atoms
307
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800308 # argument-expression = clause , [ { '||' , clause } ]
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800309 # ;
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800310 def parse_argument_expression(self, tokens):
311 """Parse a argument expression in Disjunctive Normal Form.
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800312
313 Since BPF disallows back jumps, we build the basic blocks in reverse
314 order so that all the jump targets are known by the time we need to
315 reference them.
316 """
317
318 clauses = []
319 while tokens:
320 clauses.append(self._parse_clause(tokens))
321 if not tokens or tokens[0].type != 'OR':
322 break
323 tokens.pop(0)
324 else:
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800325 self._parser_state.error('empty argument expression')
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800326 return clauses
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800327
328 # action = 'allow' | '1'
329 # | 'kill-process'
330 # | 'kill-thread'
331 # | 'kill'
332 # | 'trap'
333 # | 'trace'
334 # | 'log'
335 # | 'return' , single-constant
336 # ;
337 def _parse_action(self, tokens):
338 if not tokens:
339 self._parser_state.error('missing action')
340 action_token = tokens.pop(0)
341 if action_token.type == 'ACTION':
342 if action_token.value == 'allow':
343 return bpf.Allow()
344 if action_token.value == 'kill':
345 return self._kill_action
346 if action_token.value == 'kill-process':
347 return bpf.KillProcess()
348 if action_token.value == 'kill-thread':
349 return bpf.KillThread()
350 if action_token.value == 'trap':
351 return bpf.Trap()
352 if action_token.value == 'trace':
353 return bpf.Trace()
354 if action_token.value == 'log':
355 return bpf.Log()
356 elif action_token.type == 'NUMERIC_CONSTANT':
357 constant = self._parse_single_constant(action_token)
358 if constant == 1:
359 return bpf.Allow()
360 elif action_token.type == 'RETURN':
361 if not tokens:
362 self._parser_state.error('missing return value')
363 return bpf.ReturnErrno(self._parse_single_constant(tokens.pop(0)))
364 return self._parser_state.error('invalid action', token=action_token)
365
366 # single-filter = action
367 # | argument-expression , [ ';' , action ]
368 # ;
369 def _parse_single_filter(self, tokens):
370 if not tokens:
371 self._parser_state.error('missing filter')
372 if tokens[0].type == 'ARGUMENT':
373 # Only argument expressions can start with an ARGUMENT token.
374 argument_expression = self.parse_argument_expression(tokens)
375 if tokens and tokens[0].type == 'SEMICOLON':
376 tokens.pop(0)
377 action = self._parse_action(tokens)
378 else:
379 action = bpf.Allow()
380 return Filter(argument_expression, action)
381 else:
382 return Filter(None, self._parse_action(tokens))
383
384 # filter = '{' , single-filter , [ { ',' , single-filter } ] , '}'
385 # | single-filter
386 # ;
387 def parse_filter(self, tokens):
388 """Parse a filter and return a list of Filter objects."""
389 if not tokens:
390 self._parser_state.error('missing filter')
391 filters = []
392 if tokens[0].type == 'LBRACE':
393 opening_brace = tokens.pop(0)
394 while tokens:
395 filters.append(self._parse_single_filter(tokens))
396 if not tokens or tokens[0].type != 'COMMA':
397 break
398 tokens.pop(0)
399 if not tokens or tokens[0].type != 'RBRACE':
400 self._parser_state.error('unclosed brace', token=opening_brace)
401 tokens.pop(0)
402 else:
403 filters.append(self._parse_single_filter(tokens))
404 return filters
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800405
406 # syscall-descriptor = syscall-name , [ metadata ]
407 # | libc-function , [ metadata ]
408 # ;
409 def _parse_syscall_descriptor(self, tokens):
410 if not tokens:
411 self._parser_state.error('missing syscall descriptor')
412 syscall_descriptor = tokens.pop(0)
413 if syscall_descriptor.type != 'IDENTIFIER':
414 self._parser_state.error(
415 'invalid syscall descriptor', token=syscall_descriptor)
416 if syscall_descriptor.value not in self._arch.syscalls:
417 self._parser_state.error(
418 'nonexistent syscall', token=syscall_descriptor)
419 # TODO(lhchavez): Support libc function names.
420 # TODO(lhchavez): Support metadata.
421 return (Syscall(syscall_descriptor.value,
422 self._arch.syscalls[syscall_descriptor.value]), )
423
424 # filter-statement = '{' , syscall-descriptor , [ { ',', syscall-descriptor } ] , '}' ,
425 # ':' , filter
426 # | syscall-descriptor , ':' , filter
427 # ;
428 def parse_filter_statement(self, tokens):
429 """Parse a filter statement and return a ParsedFilterStatement."""
430 if not tokens:
431 self._parser_state.error('empty filter statement')
432 syscall_descriptors = []
433 if tokens[0].type == 'LBRACE':
434 opening_brace = tokens.pop(0)
435 while tokens:
436 syscall_descriptors.extend(
437 self._parse_syscall_descriptor(tokens))
438 if not tokens or tokens[0].type != 'COMMA':
439 break
440 tokens.pop(0)
441 if not tokens or tokens[0].type != 'RBRACE':
442 self._parser_state.error('unclosed brace', token=opening_brace)
443 tokens.pop(0)
444 else:
445 syscall_descriptors.extend(self._parse_syscall_descriptor(tokens))
446 if not tokens:
447 self._parser_state.error('missing colon')
448 if tokens[0].type != 'COLON':
449 self._parser_state.error('invalid colon', token=tokens[0])
450 tokens.pop(0)
451 parsed_filter = self.parse_filter(tokens)
452 if not syscall_descriptors:
453 return None
454 return ParsedFilterStatement(tuple(syscall_descriptors), parsed_filter)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800455
456 # include-statement = '@include' , posix-path
457 # ;
458 def _parse_include_statement(self, tokens):
459 if not tokens:
460 self._parser_state.error('empty filter statement')
461 if tokens[0].type != 'INCLUDE':
462 self._parser_state.error('invalid include', token=tokens[0])
463 tokens.pop(0)
464 if not tokens:
465 self._parser_state.error('empty include path')
466 include_path = tokens.pop(0)
467 if include_path.type != 'PATH':
468 self._parser_state.error(
469 'invalid include path', token=include_path)
470 if len(self._parser_states) == self._include_depth_limit:
471 self._parser_state.error('@include statement nested too deep')
472 include_filename = os.path.normpath(
473 os.path.join(
474 os.path.dirname(self._parser_state.filename),
475 include_path.value))
476 if not os.path.isfile(include_filename):
477 self._parser_state.error(
478 'Could not @include %s' % include_filename, token=include_path)
479 return self._parse_policy_file(include_filename)
480
481 def _parse_policy_file(self, filename):
482 self._parser_states.append(ParserState(filename))
483 try:
484 statements = []
485 with open(filename) as policy_file:
486 for line in policy_file:
487 self._parser_state.set_line(line.rstrip())
488 tokens = self._parser_state.tokenize()
489
490 if not tokens:
491 # Allow empty lines.
492 continue
493
494 if tokens[0].type == 'INCLUDE':
495 statements.extend(
496 self._parse_include_statement(tokens))
497 else:
498 statements.append(self.parse_filter_statement(tokens))
499
500 if tokens:
501 self._parser_state.error(
502 'extra tokens', token=tokens[0])
503 return statements
504 finally:
505 self._parser_states.pop()
506
507 def parse_file(self, filename):
508 """Parse a file and return the list of FilterStatements."""
509 try:
510 statements = [x for x in self._parse_policy_file(filename)]
511 except RecursionError:
512 raise ParseException('recursion limit exceeded', filename,
513 self._parser_states[-1].line)
514
515 # Collapse statements into a single syscall-to-filter-list.
516 syscall_filter_mapping = {}
517 filter_statements = []
518 for syscalls, filters in statements:
519 for syscall in syscalls:
520 if syscall not in syscall_filter_mapping:
521 filter_statements.append(FilterStatement(syscall, 1, []))
522 syscall_filter_mapping[syscall] = filter_statements[-1]
523 syscall_filter_mapping[syscall].filters.extend(filters)
524 for filter_statement in filter_statements:
525 unconditional_actions_suffix = list(
526 itertools.dropwhile(lambda filt: filt.expression is not None,
527 filter_statement.filters))
528 if len(unconditional_actions_suffix) == 1:
529 # The last filter already has an unconditional action, no need
530 # to add another one.
531 continue
532 if len(unconditional_actions_suffix) > 1:
533 raise ParseException(('Syscall %s (number %d) already had '
534 'an unconditional action applied') %
535 (filter_statement.syscall.name,
536 filter_statement.syscall.number),
537 filename, self._parser_states[-1].line)
538 assert not unconditional_actions_suffix
539 filter_statement.filters.append(
540 Filter(expression=None, action=self._default_action))
541 return ParsedPolicy(self._default_action, filter_statements)