blob: e335650f8f32e9f33206a2f92e731c36f16182d3 [file] [log] [blame]
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -08001#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# Copyright (C) 2018 The Android Open Source Project
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17"""A parser for the Minijail policy file."""
18
19from __future__ import absolute_import
20from __future__ import division
21from __future__ import print_function
22
23import collections
24import re
25
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -080026import bpf
27
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080028Token = collections.namedtuple('token',
29 ['type', 'value', 'filename', 'line', 'column'])
30
31# A regex that can tokenize a Minijail policy file line.
32_TOKEN_SPECIFICATION = (
33 ('COMMENT', r'#.*$'),
34 ('WHITESPACE', r'\s+'),
35 ('INCLUDE', r'@include'),
36 ('PATH', r'(?:\.)?/\S+'),
37 ('NUMERIC_CONSTANT', r'-?0[xX][0-9a-fA-F]+|-?0[Oo][0-7]+|-?[0-9]+'),
38 ('COLON', r':'),
39 ('SEMICOLON', r';'),
40 ('COMMA', r','),
41 ('BITWISE_COMPLEMENT', r'~'),
42 ('LPAREN', r'\('),
43 ('RPAREN', r'\)'),
44 ('LBRACE', r'\{'),
45 ('RBRACE', r'\}'),
46 ('RBRACKET', r'\]'),
47 ('LBRACKET', r'\['),
48 ('OR', r'\|\|'),
49 ('AND', r'&&'),
50 ('BITWISE_OR', r'\|'),
51 ('OP', r'&|in|==|!=|<=|<|>=|>'),
52 ('EQUAL', r'='),
53 ('ARGUMENT', r'arg[0-9]+'),
54 ('RETURN', r'return'),
55 ('ACTION', r'allow|kill-process|kill-thread|kill|trap|trace|log'),
56 ('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9@]*'),
57)
58_TOKEN_RE = re.compile('|'.join(
59 r'(?P<%s>%s)' % pair for pair in _TOKEN_SPECIFICATION))
60
61
62class ParseException(Exception):
63 """An exception that is raised when parsing fails."""
64
65 # pylint: disable=too-many-arguments
66 def __init__(self, message, filename, line, line_number=1, token=None):
67 if token:
68 column = token.column
69 length = len(token.value)
70 else:
71 column = len(line)
72 length = 1
73
74 message = ('%s(%d:%d): %s') % (filename, line_number, column + 1,
75 message)
76 message += '\n %s' % line
77 message += '\n %s%s' % (' ' * column, '^' * length)
78 super().__init__(message)
79
80
81class ParserState:
82 """Stores the state of the Parser to provide better diagnostics."""
83
84 def __init__(self, filename):
85 self._filename = filename
86 self._line = ''
87 self._line_number = 0
88
89 @property
90 def filename(self):
91 """Return the name of the file being processed."""
92 return self._filename
93
94 @property
95 def line(self):
96 """Return the current line being processed."""
97 return self._line
98
99 @property
100 def line_number(self):
101 """Return the current line number being processed."""
102 return self._line_number
103
104 def set_line(self, line):
105 """Update the current line being processed."""
106 self._line = line
107 self._line_number += 1
108
109 def error(self, message, token=None):
110 """Raise a ParserException with the provided message."""
111 raise ParseException(message, self.filename, self.line,
112 self.line_number, token)
113
114 def tokenize(self):
115 """Return a list of tokens for the current line."""
116 tokens = []
117
118 last_end = 0
119 for token in _TOKEN_RE.finditer(self.line):
120 if token.start() != last_end:
121 self.error(
122 'invalid token',
123 token=Token('INVALID', self.line[last_end:token.start()],
124 self.filename, self.line_number, last_end))
125 last_end = token.end()
126
127 # Omit whitespace and comments now to avoid sprinkling this logic
128 # elsewhere.
129 if token.lastgroup in ('WHITESPACE', 'COMMENT'):
130 continue
131 tokens.append(
132 Token(token.lastgroup, token.group(), self.filename,
133 self.line_number, token.start()))
134 if last_end != len(self.line):
135 self.error(
136 'invalid token',
137 token=Token('INVALID', self.line[last_end:], self.filename,
138 self.line_number, last_end))
139 return tokens
140
141
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800142Atom = collections.namedtuple('Atom', ['argument_index', 'op', 'value'])
143"""A single boolean comparison within a filter expression."""
144
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800145Filter = collections.namedtuple('Filter', ['expression', 'action'])
146"""The result of parsing a DNF filter expression, with its action.
147
148Since the expression is in Disjunctive Normal Form, it is composed of two levels
149of lists, one for disjunctions and the inner one for conjunctions. The elements
150of the inner list are Atoms.
151"""
152
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800153Syscall = collections.namedtuple('Syscall', ['name', 'number'])
154"""A system call."""
155
156ParsedFilterStatement = collections.namedtuple('ParsedFilterStatement',
157 ['syscalls', 'filters'])
158"""The result of parsing a filter statement.
159
160Statements have a list of syscalls, and an associated list of filters that will
161be evaluated sequentially when any of the syscalls is invoked.
162"""
163
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800164
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800165# pylint: disable=too-few-public-methods
166class PolicyParser:
167 """A parser for the Minijail seccomp policy file format."""
168
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800169 def __init__(self, arch, *, kill_action):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800170 self._parser_states = [ParserState("<memory>")]
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800171 self._kill_action = kill_action
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800172 self._arch = arch
173
174 @property
175 def _parser_state(self):
176 return self._parser_states[-1]
177
178 # single-constant = identifier
179 # | numeric-constant
180 # ;
181 def _parse_single_constant(self, token):
182 if token.type == 'IDENTIFIER':
183 if token.value not in self._arch.constants:
184 self._parser_state.error('invalid constant', token=token)
185 single_constant = self._arch.constants[token.value]
186 elif token.type == 'NUMERIC_CONSTANT':
187 try:
188 single_constant = int(token.value, base=0)
189 except ValueError:
190 self._parser_state.error('invalid constant', token=token)
191 else:
192 self._parser_state.error('invalid constant', token=token)
193 if single_constant > self._arch.max_unsigned:
194 self._parser_state.error('unsigned overflow', token=token)
195 elif single_constant < self._arch.min_signed:
196 self._parser_state.error('signed underflow', token=token)
197 elif single_constant < 0:
198 # This converts the constant to an unsigned representation of the
199 # same value, since BPF only uses unsigned values.
200 single_constant = self._arch.truncate_word(single_constant)
201 return single_constant
202
203 # constant = [ '~' ] , '(' , value , ')'
204 # | [ '~' ] , single-constant
205 # ;
206 def _parse_constant(self, tokens):
207 negate = False
208 if tokens[0].type == 'BITWISE_COMPLEMENT':
209 negate = True
210 tokens.pop(0)
211 if not tokens:
212 self._parser_state.error('empty complement')
213 if tokens[0].type == 'BITWISE_COMPLEMENT':
214 self._parser_state.error(
215 'invalid double complement', token=tokens[0])
216 if tokens[0].type == 'LPAREN':
217 last_open_paren = tokens.pop(0)
218 single_value = self.parse_value(tokens)
219 if not tokens or tokens[0].type != 'RPAREN':
220 self._parser_state.error(
221 'unclosed parenthesis', token=last_open_paren)
222 else:
223 single_value = self._parse_single_constant(tokens[0])
224 tokens.pop(0)
225 if negate:
226 single_value = self._arch.truncate_word(~single_value)
227 return single_value
228
229 # value = constant , [ { '|' , constant } ]
230 # ;
231 def parse_value(self, tokens):
232 """Parse constants separated bitwise OR operator |.
233
234 Constants can be:
235
236 - A number that can be parsed with int(..., base=0)
237 - A named constant expression.
238 - A parenthesized, valid constant expression.
239 - A valid constant expression prefixed with the unary bitwise
240 complement operator ~.
241 - A series of valid constant expressions separated by bitwise
242 OR operator |.
243
244 If there is an error parsing any of the constants, the whole process
245 fails.
246 """
247
248 value = 0
249 while tokens:
250 value |= self._parse_constant(tokens)
251 if not tokens or tokens[0].type != 'BITWISE_OR':
252 break
253 tokens.pop(0)
254 else:
255 self._parser_state.error('empty constant')
256 return value
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800257
258 # atom = argument , op , value
259 # ;
260 def _parse_atom(self, tokens):
261 if not tokens:
262 self._parser_state.error('missing argument')
263 argument = tokens.pop(0)
264 if argument.type != 'ARGUMENT':
265 self._parser_state.error('invalid argument', token=argument)
266
267 if not tokens:
268 self._parser_state.error('missing operator')
269 operator = tokens.pop(0)
270 if operator.type != 'OP':
271 self._parser_state.error('invalid operator', token=operator)
272
273 value = self.parse_value(tokens)
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800274 argument_index = int(argument.value[3:])
275 if not (0 <= argument_index < bpf.MAX_SYSCALL_ARGUMENTS):
276 self._parser_state.error('invalid argument', token=argument)
277 return Atom(argument_index, operator.value, value)
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800278
279 # clause = atom , [ { '&&' , atom } ]
280 # ;
281 def _parse_clause(self, tokens):
282 atoms = []
283 while tokens:
284 atoms.append(self._parse_atom(tokens))
285 if not tokens or tokens[0].type != 'AND':
286 break
287 tokens.pop(0)
288 else:
289 self._parser_state.error('empty clause')
290 return atoms
291
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800292 # argument-expression = clause , [ { '||' , clause } ]
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800293 # ;
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800294 def parse_argument_expression(self, tokens):
295 """Parse a argument expression in Disjunctive Normal Form.
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800296
297 Since BPF disallows back jumps, we build the basic blocks in reverse
298 order so that all the jump targets are known by the time we need to
299 reference them.
300 """
301
302 clauses = []
303 while tokens:
304 clauses.append(self._parse_clause(tokens))
305 if not tokens or tokens[0].type != 'OR':
306 break
307 tokens.pop(0)
308 else:
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800309 self._parser_state.error('empty argument expression')
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800310 return clauses
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800311
312 # action = 'allow' | '1'
313 # | 'kill-process'
314 # | 'kill-thread'
315 # | 'kill'
316 # | 'trap'
317 # | 'trace'
318 # | 'log'
319 # | 'return' , single-constant
320 # ;
321 def _parse_action(self, tokens):
322 if not tokens:
323 self._parser_state.error('missing action')
324 action_token = tokens.pop(0)
325 if action_token.type == 'ACTION':
326 if action_token.value == 'allow':
327 return bpf.Allow()
328 if action_token.value == 'kill':
329 return self._kill_action
330 if action_token.value == 'kill-process':
331 return bpf.KillProcess()
332 if action_token.value == 'kill-thread':
333 return bpf.KillThread()
334 if action_token.value == 'trap':
335 return bpf.Trap()
336 if action_token.value == 'trace':
337 return bpf.Trace()
338 if action_token.value == 'log':
339 return bpf.Log()
340 elif action_token.type == 'NUMERIC_CONSTANT':
341 constant = self._parse_single_constant(action_token)
342 if constant == 1:
343 return bpf.Allow()
344 elif action_token.type == 'RETURN':
345 if not tokens:
346 self._parser_state.error('missing return value')
347 return bpf.ReturnErrno(self._parse_single_constant(tokens.pop(0)))
348 return self._parser_state.error('invalid action', token=action_token)
349
350 # single-filter = action
351 # | argument-expression , [ ';' , action ]
352 # ;
353 def _parse_single_filter(self, tokens):
354 if not tokens:
355 self._parser_state.error('missing filter')
356 if tokens[0].type == 'ARGUMENT':
357 # Only argument expressions can start with an ARGUMENT token.
358 argument_expression = self.parse_argument_expression(tokens)
359 if tokens and tokens[0].type == 'SEMICOLON':
360 tokens.pop(0)
361 action = self._parse_action(tokens)
362 else:
363 action = bpf.Allow()
364 return Filter(argument_expression, action)
365 else:
366 return Filter(None, self._parse_action(tokens))
367
368 # filter = '{' , single-filter , [ { ',' , single-filter } ] , '}'
369 # | single-filter
370 # ;
371 def parse_filter(self, tokens):
372 """Parse a filter and return a list of Filter objects."""
373 if not tokens:
374 self._parser_state.error('missing filter')
375 filters = []
376 if tokens[0].type == 'LBRACE':
377 opening_brace = tokens.pop(0)
378 while tokens:
379 filters.append(self._parse_single_filter(tokens))
380 if not tokens or tokens[0].type != 'COMMA':
381 break
382 tokens.pop(0)
383 if not tokens or tokens[0].type != 'RBRACE':
384 self._parser_state.error('unclosed brace', token=opening_brace)
385 tokens.pop(0)
386 else:
387 filters.append(self._parse_single_filter(tokens))
388 return filters
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800389
390 # syscall-descriptor = syscall-name , [ metadata ]
391 # | libc-function , [ metadata ]
392 # ;
393 def _parse_syscall_descriptor(self, tokens):
394 if not tokens:
395 self._parser_state.error('missing syscall descriptor')
396 syscall_descriptor = tokens.pop(0)
397 if syscall_descriptor.type != 'IDENTIFIER':
398 self._parser_state.error(
399 'invalid syscall descriptor', token=syscall_descriptor)
400 if syscall_descriptor.value not in self._arch.syscalls:
401 self._parser_state.error(
402 'nonexistent syscall', token=syscall_descriptor)
403 # TODO(lhchavez): Support libc function names.
404 # TODO(lhchavez): Support metadata.
405 return (Syscall(syscall_descriptor.value,
406 self._arch.syscalls[syscall_descriptor.value]), )
407
408 # filter-statement = '{' , syscall-descriptor , [ { ',', syscall-descriptor } ] , '}' ,
409 # ':' , filter
410 # | syscall-descriptor , ':' , filter
411 # ;
412 def parse_filter_statement(self, tokens):
413 """Parse a filter statement and return a ParsedFilterStatement."""
414 if not tokens:
415 self._parser_state.error('empty filter statement')
416 syscall_descriptors = []
417 if tokens[0].type == 'LBRACE':
418 opening_brace = tokens.pop(0)
419 while tokens:
420 syscall_descriptors.extend(
421 self._parse_syscall_descriptor(tokens))
422 if not tokens or tokens[0].type != 'COMMA':
423 break
424 tokens.pop(0)
425 if not tokens or tokens[0].type != 'RBRACE':
426 self._parser_state.error('unclosed brace', token=opening_brace)
427 tokens.pop(0)
428 else:
429 syscall_descriptors.extend(self._parse_syscall_descriptor(tokens))
430 if not tokens:
431 self._parser_state.error('missing colon')
432 if tokens[0].type != 'COLON':
433 self._parser_state.error('invalid colon', token=tokens[0])
434 tokens.pop(0)
435 parsed_filter = self.parse_filter(tokens)
436 if not syscall_descriptors:
437 return None
438 return ParsedFilterStatement(tuple(syscall_descriptors), parsed_filter)