blob: f3c5331b85ffacf3ee506537766a819be2c65c1f [file] [log] [blame]
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -08001#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# Copyright (C) 2018 The Android Open Source Project
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17"""A parser for the Minijail policy file."""
18
19from __future__ import absolute_import
20from __future__ import division
21from __future__ import print_function
22
23import collections
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -080024import itertools
25import os.path
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080026import re
27
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -080028import bpf
29
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070030Token = collections.namedtuple(
31 'token', ['type', 'value', 'filename', 'line', 'line_number', 'column'])
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080032
33# A regex that can tokenize a Minijail policy file line.
34_TOKEN_SPECIFICATION = (
35 ('COMMENT', r'#.*$'),
36 ('WHITESPACE', r'\s+'),
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070037 ('CONTINUATION', r'\\$'),
Luis Hector Chavezb7926be2018-12-05 16:54:26 -080038 ('DEFAULT', r'@default'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080039 ('INCLUDE', r'@include'),
Luis Hector Chavezb4408862018-12-05 16:54:16 -080040 ('FREQUENCY', r'@frequency'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080041 ('PATH', r'(?:\.)?/\S+'),
42 ('NUMERIC_CONSTANT', r'-?0[xX][0-9a-fA-F]+|-?0[Oo][0-7]+|-?[0-9]+'),
43 ('COLON', r':'),
44 ('SEMICOLON', r';'),
45 ('COMMA', r','),
46 ('BITWISE_COMPLEMENT', r'~'),
47 ('LPAREN', r'\('),
48 ('RPAREN', r'\)'),
49 ('LBRACE', r'\{'),
50 ('RBRACE', r'\}'),
51 ('RBRACKET', r'\]'),
52 ('LBRACKET', r'\['),
53 ('OR', r'\|\|'),
54 ('AND', r'&&'),
55 ('BITWISE_OR', r'\|'),
56 ('OP', r'&|in|==|!=|<=|<|>=|>'),
57 ('EQUAL', r'='),
58 ('ARGUMENT', r'arg[0-9]+'),
59 ('RETURN', r'return'),
60 ('ACTION', r'allow|kill-process|kill-thread|kill|trap|trace|log'),
Luis Hector Chavez524da3b2019-03-05 16:44:08 -080061 ('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9-@]*'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080062)
63_TOKEN_RE = re.compile('|'.join(
64 r'(?P<%s>%s)' % pair for pair in _TOKEN_SPECIFICATION))
65
66
67class ParseException(Exception):
68 """An exception that is raised when parsing fails."""
69
70 # pylint: disable=too-many-arguments
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070071 def __init__(self,
72 message,
73 filename,
74 *,
75 line='',
76 line_number=1,
77 token=None):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080078 if token:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070079 line = token.line
80 line_number = token.line_number
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080081 column = token.column
82 length = len(token.value)
83 else:
84 column = len(line)
85 length = 1
86
87 message = ('%s(%d:%d): %s') % (filename, line_number, column + 1,
88 message)
89 message += '\n %s' % line
90 message += '\n %s%s' % (' ' * column, '^' * length)
91 super().__init__(message)
92
93
94class ParserState:
95 """Stores the state of the Parser to provide better diagnostics."""
96
97 def __init__(self, filename):
98 self._filename = filename
99 self._line = ''
100 self._line_number = 0
101
102 @property
103 def filename(self):
104 """Return the name of the file being processed."""
105 return self._filename
106
107 @property
108 def line(self):
109 """Return the current line being processed."""
110 return self._line
111
112 @property
113 def line_number(self):
114 """Return the current line number being processed."""
115 return self._line_number
116
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800117 def error(self, message, token=None):
118 """Raise a ParserException with the provided message."""
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700119 raise ParseException(
120 message,
121 self.filename,
122 line=self._line,
123 line_number=self._line_number,
124 token=token)
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800125
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700126 def tokenize(self, lines):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800127 """Return a list of tokens for the current line."""
128 tokens = []
129
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700130 for line_number, line in enumerate(lines):
131 self._line_number = line_number + 1
132 self._line = line.rstrip('\r\n')
133
134 last_end = 0
135 for token in _TOKEN_RE.finditer(self._line):
136 if token.start() != last_end:
137 self.error(
138 'invalid token',
139 token=Token('INVALID',
140 self._line[last_end:token.start()],
141 self.filename, self._line,
142 self._line_number, last_end))
143 last_end = token.end()
144
145 # Omit whitespace and comments now to avoid sprinkling this logic
146 # elsewhere.
147 if token.lastgroup in ('WHITESPACE', 'COMMENT',
148 'CONTINUATION'):
149 continue
150 tokens.append(
151 Token(token.lastgroup, token.group(), self.filename,
152 self._line, self._line_number, token.start()))
153 if last_end != len(self._line):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800154 self.error(
155 'invalid token',
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700156 token=Token('INVALID', self._line[last_end:],
157 self.filename, self._line, self._line_number,
158 last_end))
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800159
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700160 if self._line.endswith('\\'):
161 # This line is not finished yet.
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800162 continue
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700163
164 if tokens:
165 # Return a copy of the token list so that the caller can be free
166 # to modify it.
167 yield tokens[::]
168 tokens.clear()
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800169
170
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800171Atom = collections.namedtuple('Atom', ['argument_index', 'op', 'value'])
172"""A single boolean comparison within a filter expression."""
173
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800174Filter = collections.namedtuple('Filter', ['expression', 'action'])
175"""The result of parsing a DNF filter expression, with its action.
176
177Since the expression is in Disjunctive Normal Form, it is composed of two levels
178of lists, one for disjunctions and the inner one for conjunctions. The elements
179of the inner list are Atoms.
180"""
181
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800182Syscall = collections.namedtuple('Syscall', ['name', 'number'])
183"""A system call."""
184
185ParsedFilterStatement = collections.namedtuple('ParsedFilterStatement',
186 ['syscalls', 'filters'])
187"""The result of parsing a filter statement.
188
189Statements have a list of syscalls, and an associated list of filters that will
190be evaluated sequentially when any of the syscalls is invoked.
191"""
192
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800193FilterStatement = collections.namedtuple('FilterStatement',
194 ['syscall', 'frequency', 'filters'])
195"""The filter list for a particular syscall.
196
197This is a mapping from one syscall to a list of filters that are evaluated
198sequentially. The last filter is always an unconditional action.
199"""
200
201ParsedPolicy = collections.namedtuple('ParsedPolicy',
202 ['default_action', 'filter_statements'])
203"""The result of parsing a minijail .policy file."""
204
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800205
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800206# pylint: disable=too-few-public-methods
207class PolicyParser:
208 """A parser for the Minijail seccomp policy file format."""
209
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800210 def __init__(self,
211 arch,
212 *,
213 kill_action,
214 include_depth_limit=10,
215 override_default_action=None):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800216 self._parser_states = [ParserState("<memory>")]
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800217 self._kill_action = kill_action
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800218 self._include_depth_limit = include_depth_limit
219 self._default_action = self._kill_action
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800220 self._override_default_action = override_default_action
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800221 self._frequency_mapping = collections.defaultdict(int)
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800222 self._arch = arch
223
224 @property
225 def _parser_state(self):
226 return self._parser_states[-1]
227
228 # single-constant = identifier
229 # | numeric-constant
230 # ;
231 def _parse_single_constant(self, token):
232 if token.type == 'IDENTIFIER':
233 if token.value not in self._arch.constants:
234 self._parser_state.error('invalid constant', token=token)
235 single_constant = self._arch.constants[token.value]
236 elif token.type == 'NUMERIC_CONSTANT':
237 try:
238 single_constant = int(token.value, base=0)
239 except ValueError:
240 self._parser_state.error('invalid constant', token=token)
241 else:
242 self._parser_state.error('invalid constant', token=token)
243 if single_constant > self._arch.max_unsigned:
244 self._parser_state.error('unsigned overflow', token=token)
245 elif single_constant < self._arch.min_signed:
246 self._parser_state.error('signed underflow', token=token)
247 elif single_constant < 0:
248 # This converts the constant to an unsigned representation of the
249 # same value, since BPF only uses unsigned values.
250 single_constant = self._arch.truncate_word(single_constant)
251 return single_constant
252
253 # constant = [ '~' ] , '(' , value , ')'
254 # | [ '~' ] , single-constant
255 # ;
256 def _parse_constant(self, tokens):
257 negate = False
258 if tokens[0].type == 'BITWISE_COMPLEMENT':
259 negate = True
260 tokens.pop(0)
261 if not tokens:
262 self._parser_state.error('empty complement')
263 if tokens[0].type == 'BITWISE_COMPLEMENT':
264 self._parser_state.error(
265 'invalid double complement', token=tokens[0])
266 if tokens[0].type == 'LPAREN':
267 last_open_paren = tokens.pop(0)
268 single_value = self.parse_value(tokens)
269 if not tokens or tokens[0].type != 'RPAREN':
270 self._parser_state.error(
271 'unclosed parenthesis', token=last_open_paren)
272 else:
273 single_value = self._parse_single_constant(tokens[0])
274 tokens.pop(0)
275 if negate:
276 single_value = self._arch.truncate_word(~single_value)
277 return single_value
278
279 # value = constant , [ { '|' , constant } ]
280 # ;
281 def parse_value(self, tokens):
282 """Parse constants separated bitwise OR operator |.
283
284 Constants can be:
285
286 - A number that can be parsed with int(..., base=0)
287 - A named constant expression.
288 - A parenthesized, valid constant expression.
289 - A valid constant expression prefixed with the unary bitwise
290 complement operator ~.
291 - A series of valid constant expressions separated by bitwise
292 OR operator |.
293
294 If there is an error parsing any of the constants, the whole process
295 fails.
296 """
297
298 value = 0
299 while tokens:
300 value |= self._parse_constant(tokens)
301 if not tokens or tokens[0].type != 'BITWISE_OR':
302 break
303 tokens.pop(0)
304 else:
305 self._parser_state.error('empty constant')
306 return value
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800307
308 # atom = argument , op , value
309 # ;
310 def _parse_atom(self, tokens):
311 if not tokens:
312 self._parser_state.error('missing argument')
313 argument = tokens.pop(0)
314 if argument.type != 'ARGUMENT':
315 self._parser_state.error('invalid argument', token=argument)
316
317 if not tokens:
318 self._parser_state.error('missing operator')
319 operator = tokens.pop(0)
320 if operator.type != 'OP':
321 self._parser_state.error('invalid operator', token=operator)
322
323 value = self.parse_value(tokens)
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800324 argument_index = int(argument.value[3:])
325 if not (0 <= argument_index < bpf.MAX_SYSCALL_ARGUMENTS):
326 self._parser_state.error('invalid argument', token=argument)
327 return Atom(argument_index, operator.value, value)
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800328
329 # clause = atom , [ { '&&' , atom } ]
330 # ;
331 def _parse_clause(self, tokens):
332 atoms = []
333 while tokens:
334 atoms.append(self._parse_atom(tokens))
335 if not tokens or tokens[0].type != 'AND':
336 break
337 tokens.pop(0)
338 else:
339 self._parser_state.error('empty clause')
340 return atoms
341
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800342 # argument-expression = clause , [ { '||' , clause } ]
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800343 # ;
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800344 def parse_argument_expression(self, tokens):
345 """Parse a argument expression in Disjunctive Normal Form.
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800346
347 Since BPF disallows back jumps, we build the basic blocks in reverse
348 order so that all the jump targets are known by the time we need to
349 reference them.
350 """
351
352 clauses = []
353 while tokens:
354 clauses.append(self._parse_clause(tokens))
355 if not tokens or tokens[0].type != 'OR':
356 break
357 tokens.pop(0)
358 else:
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800359 self._parser_state.error('empty argument expression')
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800360 return clauses
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800361
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800362 # default-action = 'kill-process'
363 # | 'kill-thread'
364 # | 'kill'
365 # | 'trap'
366 # ;
367 def _parse_default_action(self, tokens):
368 if not tokens:
369 self._parser_state.error('missing default action')
370 action_token = tokens.pop(0)
371 if action_token.type != 'ACTION':
372 return self._parser_state.error(
373 'invalid default action', token=action_token)
374 if action_token.value == 'kill-process':
375 return bpf.KillProcess()
376 if action_token.value == 'kill-thread':
377 return bpf.KillThread()
378 if action_token.value == 'kill':
379 return self._kill_action
380 if action_token.value == 'trap':
381 return bpf.Trap()
382 return self._parser_state.error(
383 'invalid permissive default action', token=action_token)
384
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800385 # action = 'allow' | '1'
386 # | 'kill-process'
387 # | 'kill-thread'
388 # | 'kill'
389 # | 'trap'
390 # | 'trace'
391 # | 'log'
392 # | 'return' , single-constant
393 # ;
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800394 def parse_action(self, tokens):
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800395 if not tokens:
396 self._parser_state.error('missing action')
397 action_token = tokens.pop(0)
398 if action_token.type == 'ACTION':
399 if action_token.value == 'allow':
400 return bpf.Allow()
401 if action_token.value == 'kill':
402 return self._kill_action
403 if action_token.value == 'kill-process':
404 return bpf.KillProcess()
405 if action_token.value == 'kill-thread':
406 return bpf.KillThread()
407 if action_token.value == 'trap':
408 return bpf.Trap()
409 if action_token.value == 'trace':
410 return bpf.Trace()
411 if action_token.value == 'log':
412 return bpf.Log()
413 elif action_token.type == 'NUMERIC_CONSTANT':
414 constant = self._parse_single_constant(action_token)
415 if constant == 1:
416 return bpf.Allow()
417 elif action_token.type == 'RETURN':
418 if not tokens:
419 self._parser_state.error('missing return value')
420 return bpf.ReturnErrno(self._parse_single_constant(tokens.pop(0)))
421 return self._parser_state.error('invalid action', token=action_token)
422
423 # single-filter = action
424 # | argument-expression , [ ';' , action ]
425 # ;
426 def _parse_single_filter(self, tokens):
427 if not tokens:
428 self._parser_state.error('missing filter')
429 if tokens[0].type == 'ARGUMENT':
430 # Only argument expressions can start with an ARGUMENT token.
431 argument_expression = self.parse_argument_expression(tokens)
432 if tokens and tokens[0].type == 'SEMICOLON':
433 tokens.pop(0)
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800434 action = self.parse_action(tokens)
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800435 else:
436 action = bpf.Allow()
437 return Filter(argument_expression, action)
438 else:
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800439 return Filter(None, self.parse_action(tokens))
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800440
441 # filter = '{' , single-filter , [ { ',' , single-filter } ] , '}'
442 # | single-filter
443 # ;
444 def parse_filter(self, tokens):
445 """Parse a filter and return a list of Filter objects."""
446 if not tokens:
447 self._parser_state.error('missing filter')
448 filters = []
449 if tokens[0].type == 'LBRACE':
450 opening_brace = tokens.pop(0)
451 while tokens:
452 filters.append(self._parse_single_filter(tokens))
453 if not tokens or tokens[0].type != 'COMMA':
454 break
455 tokens.pop(0)
456 if not tokens or tokens[0].type != 'RBRACE':
457 self._parser_state.error('unclosed brace', token=opening_brace)
458 tokens.pop(0)
459 else:
460 filters.append(self._parse_single_filter(tokens))
461 return filters
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800462
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800463 # key-value-pair = identifier , '=', identifier , [ { ',' , identifier } ]
464 # ;
465 def _parse_key_value_pair(self, tokens):
466 if not tokens:
467 self._parser_state.error('missing key')
468 key = tokens.pop(0)
469 if key.type != 'IDENTIFIER':
470 self._parser_state.error('invalid key', token=key)
471 if not tokens:
472 self._parser_state.error('missing equal')
473 if tokens[0].type != 'EQUAL':
474 self._parser_state.error('invalid equal', token=tokens[0])
475 tokens.pop(0)
476 value_list = []
477 while tokens:
478 value = tokens.pop(0)
479 if value.type != 'IDENTIFIER':
480 self._parser_state.error('invalid value', token=value)
481 value_list.append(value.value)
482 if not tokens or tokens[0].type != 'COMMA':
483 break
484 tokens.pop(0)
485 else:
486 self._parser_state.error('empty value')
487 return (key.value, value_list)
488
489 # metadata = '[' , key-value-pair , [ { ';' , key-value-pair } ] , ']'
490 # ;
491 def _parse_metadata(self, tokens):
492 if not tokens:
493 self._parser_state.error('missing opening bracket')
494 opening_bracket = tokens.pop(0)
495 if opening_bracket.type != 'LBRACKET':
496 self._parser_state.error(
497 'invalid opening bracket', token=opening_bracket)
498 metadata = {}
499 while tokens:
500 first_token = tokens[0]
501 key, value = self._parse_key_value_pair(tokens)
502 if key in metadata:
503 self._parser_state.error(
504 'duplicate metadata key: "%s"' % key, token=first_token)
505 metadata[key] = value
506 if not tokens or tokens[0].type != 'SEMICOLON':
507 break
508 tokens.pop(0)
509 if not tokens or tokens[0].type != 'RBRACKET':
510 self._parser_state.error('unclosed bracket', token=opening_bracket)
511 tokens.pop(0)
512 return metadata
513
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800514 # syscall-descriptor = syscall-name , [ metadata ]
Luis Hector Chavez524da3b2019-03-05 16:44:08 -0800515 # | syscall-group-name , [ metadata ]
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800516 # ;
517 def _parse_syscall_descriptor(self, tokens):
518 if not tokens:
519 self._parser_state.error('missing syscall descriptor')
520 syscall_descriptor = tokens.pop(0)
521 if syscall_descriptor.type != 'IDENTIFIER':
522 self._parser_state.error(
523 'invalid syscall descriptor', token=syscall_descriptor)
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800524 if tokens and tokens[0].type == 'LBRACKET':
525 metadata = self._parse_metadata(tokens)
526 if 'arch' in metadata and self._arch.arch_name not in metadata['arch']:
527 return ()
Luis Hector Chavez524da3b2019-03-05 16:44:08 -0800528 if '@' in syscall_descriptor.value:
529 # This is a syscall group.
530 subtokens = syscall_descriptor.value.split('@')
531 if len(subtokens) != 2:
532 self._parser_state.error(
533 'invalid syscall group name', token=syscall_descriptor)
534 syscall_group_name, syscall_namespace_name = subtokens
535 if syscall_namespace_name not in self._arch.syscall_groups:
536 self._parser_state.error(
537 'nonexistent syscall group namespace',
538 token=syscall_descriptor)
539 syscall_namespace = self._arch.syscall_groups[
540 syscall_namespace_name]
541 if syscall_group_name not in syscall_namespace:
542 self._parser_state.error(
543 'nonexistent syscall group', token=syscall_descriptor)
544 return (Syscall(name, self._arch.syscalls[name])
545 for name in syscall_namespace[syscall_group_name])
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800546 if syscall_descriptor.value not in self._arch.syscalls:
547 self._parser_state.error(
548 'nonexistent syscall', token=syscall_descriptor)
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800549 return (Syscall(syscall_descriptor.value,
550 self._arch.syscalls[syscall_descriptor.value]), )
551
552 # filter-statement = '{' , syscall-descriptor , [ { ',', syscall-descriptor } ] , '}' ,
553 # ':' , filter
554 # | syscall-descriptor , ':' , filter
555 # ;
556 def parse_filter_statement(self, tokens):
557 """Parse a filter statement and return a ParsedFilterStatement."""
558 if not tokens:
559 self._parser_state.error('empty filter statement')
560 syscall_descriptors = []
561 if tokens[0].type == 'LBRACE':
562 opening_brace = tokens.pop(0)
563 while tokens:
564 syscall_descriptors.extend(
565 self._parse_syscall_descriptor(tokens))
566 if not tokens or tokens[0].type != 'COMMA':
567 break
568 tokens.pop(0)
569 if not tokens or tokens[0].type != 'RBRACE':
570 self._parser_state.error('unclosed brace', token=opening_brace)
571 tokens.pop(0)
572 else:
573 syscall_descriptors.extend(self._parse_syscall_descriptor(tokens))
574 if not tokens:
575 self._parser_state.error('missing colon')
576 if tokens[0].type != 'COLON':
577 self._parser_state.error('invalid colon', token=tokens[0])
578 tokens.pop(0)
579 parsed_filter = self.parse_filter(tokens)
580 if not syscall_descriptors:
581 return None
582 return ParsedFilterStatement(tuple(syscall_descriptors), parsed_filter)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800583
584 # include-statement = '@include' , posix-path
585 # ;
586 def _parse_include_statement(self, tokens):
587 if not tokens:
588 self._parser_state.error('empty filter statement')
589 if tokens[0].type != 'INCLUDE':
590 self._parser_state.error('invalid include', token=tokens[0])
591 tokens.pop(0)
592 if not tokens:
593 self._parser_state.error('empty include path')
594 include_path = tokens.pop(0)
595 if include_path.type != 'PATH':
596 self._parser_state.error(
597 'invalid include path', token=include_path)
598 if len(self._parser_states) == self._include_depth_limit:
599 self._parser_state.error('@include statement nested too deep')
600 include_filename = os.path.normpath(
601 os.path.join(
602 os.path.dirname(self._parser_state.filename),
603 include_path.value))
604 if not os.path.isfile(include_filename):
605 self._parser_state.error(
606 'Could not @include %s' % include_filename, token=include_path)
607 return self._parse_policy_file(include_filename)
608
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800609 def _parse_frequency_file(self, filename):
610 self._parser_states.append(ParserState(filename))
611 try:
612 frequency_mapping = collections.defaultdict(int)
613 with open(filename) as frequency_file:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700614 for tokens in self._parser_state.tokenize(frequency_file):
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800615 syscall_numbers = self._parse_syscall_descriptor(tokens)
616 if not tokens:
617 self._parser_state.error('missing colon')
618 if tokens[0].type != 'COLON':
619 self._parser_state.error(
620 'invalid colon', token=tokens[0])
621 tokens.pop(0)
622
623 if not tokens:
624 self._parser_state.error('missing number')
625 number = tokens.pop(0)
626 if number.type != 'NUMERIC_CONSTANT':
627 self._parser_state.error(
628 'invalid number', token=number)
629 number_value = int(number.value, base=0)
630 if number_value < 0:
631 self._parser_state.error(
632 'invalid number', token=number)
633
634 for syscall_number in syscall_numbers:
635 frequency_mapping[syscall_number] += number_value
636 return frequency_mapping
637 finally:
638 self._parser_states.pop()
639
640 # frequency-statement = '@frequency' , posix-path
641 # ;
642 def _parse_frequency_statement(self, tokens):
643 if not tokens:
644 self._parser_state.error('empty frequency statement')
645 if tokens[0].type != 'FREQUENCY':
646 self._parser_state.error('invalid frequency', token=tokens[0])
647 tokens.pop(0)
648 if not tokens:
649 self._parser_state.error('empty frequency path')
650 frequency_path = tokens.pop(0)
651 if frequency_path.type != 'PATH':
652 self._parser_state.error(
653 'invalid frequency path', token=frequency_path)
654 frequency_filename = os.path.normpath(
655 os.path.join(
656 os.path.dirname(self._parser_state.filename),
657 frequency_path.value))
658 if not os.path.isfile(frequency_filename):
659 self._parser_state.error(
660 'Could not open frequency file %s' % frequency_filename,
661 token=frequency_path)
662 return self._parse_frequency_file(frequency_filename)
663
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800664 # default-statement = '@default' , default-action
665 # ;
666 def _parse_default_statement(self, tokens):
667 if not tokens:
668 self._parser_state.error('empty default statement')
669 if tokens[0].type != 'DEFAULT':
670 self._parser_state.error('invalid default', token=tokens[0])
671 tokens.pop(0)
672 if not tokens:
673 self._parser_state.error('empty action')
674 return self._parse_default_action(tokens)
675
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800676 def _parse_policy_file(self, filename):
677 self._parser_states.append(ParserState(filename))
678 try:
679 statements = []
680 with open(filename) as policy_file:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700681 for tokens in self._parser_state.tokenize(policy_file):
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800682 if tokens[0].type == 'INCLUDE':
683 statements.extend(
684 self._parse_include_statement(tokens))
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800685 elif tokens[0].type == 'FREQUENCY':
686 for syscall_number, frequency in self._parse_frequency_statement(
687 tokens).items():
688 self._frequency_mapping[
689 syscall_number] += frequency
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800690 elif tokens[0].type == 'DEFAULT':
691 self._default_action = self._parse_default_statement(
692 tokens)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800693 else:
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800694 statement = self.parse_filter_statement(tokens)
695 if statement is None:
696 # If all the syscalls in the statement are for
697 # another arch, skip the whole statement.
698 continue
699 statements.append(statement)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800700
701 if tokens:
702 self._parser_state.error(
703 'extra tokens', token=tokens[0])
704 return statements
705 finally:
706 self._parser_states.pop()
707
708 def parse_file(self, filename):
709 """Parse a file and return the list of FilterStatements."""
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800710 self._frequency_mapping = collections.defaultdict(int)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800711 try:
712 statements = [x for x in self._parse_policy_file(filename)]
713 except RecursionError:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700714 raise ParseException(
715 'recursion limit exceeded',
716 filename,
717 line=self._parser_states[-1].line)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800718
719 # Collapse statements into a single syscall-to-filter-list.
720 syscall_filter_mapping = {}
721 filter_statements = []
722 for syscalls, filters in statements:
723 for syscall in syscalls:
724 if syscall not in syscall_filter_mapping:
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800725 filter_statements.append(
726 FilterStatement(
727 syscall, self._frequency_mapping.get(syscall, 1),
728 []))
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800729 syscall_filter_mapping[syscall] = filter_statements[-1]
730 syscall_filter_mapping[syscall].filters.extend(filters)
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800731 default_action = self._override_default_action or self._default_action
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800732 for filter_statement in filter_statements:
733 unconditional_actions_suffix = list(
734 itertools.dropwhile(lambda filt: filt.expression is not None,
735 filter_statement.filters))
736 if len(unconditional_actions_suffix) == 1:
737 # The last filter already has an unconditional action, no need
738 # to add another one.
739 continue
740 if len(unconditional_actions_suffix) > 1:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700741 raise ParseException(
742 ('Syscall %s (number %d) already had '
743 'an unconditional action applied') %
744 (filter_statement.syscall.name,
745 filter_statement.syscall.number),
746 filename,
747 line=self._parser_states[-1].line)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800748 assert not unconditional_actions_suffix
749 filter_statement.filters.append(
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800750 Filter(expression=None, action=default_action))
751 return ParsedPolicy(default_action, filter_statements)