blob: 7179717bef46e6b05ff72a67bc63dfe94a7dbedb [file] [log] [blame]
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -08001#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# Copyright (C) 2018 The Android Open Source Project
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17"""A parser for the Minijail policy file."""
18
19from __future__ import absolute_import
20from __future__ import division
21from __future__ import print_function
22
23import collections
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -080024import itertools
25import os.path
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080026import re
27
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -080028import bpf
29
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070030Token = collections.namedtuple(
31 'token', ['type', 'value', 'filename', 'line', 'line_number', 'column'])
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080032
33# A regex that can tokenize a Minijail policy file line.
34_TOKEN_SPECIFICATION = (
35 ('COMMENT', r'#.*$'),
36 ('WHITESPACE', r'\s+'),
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070037 ('CONTINUATION', r'\\$'),
Luis Hector Chavezb7926be2018-12-05 16:54:26 -080038 ('DEFAULT', r'@default'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080039 ('INCLUDE', r'@include'),
Luis Hector Chavezb4408862018-12-05 16:54:16 -080040 ('FREQUENCY', r'@frequency'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080041 ('PATH', r'(?:\.)?/\S+'),
42 ('NUMERIC_CONSTANT', r'-?0[xX][0-9a-fA-F]+|-?0[Oo][0-7]+|-?[0-9]+'),
43 ('COLON', r':'),
44 ('SEMICOLON', r';'),
45 ('COMMA', r','),
46 ('BITWISE_COMPLEMENT', r'~'),
47 ('LPAREN', r'\('),
48 ('RPAREN', r'\)'),
49 ('LBRACE', r'\{'),
50 ('RBRACE', r'\}'),
51 ('RBRACKET', r'\]'),
52 ('LBRACKET', r'\['),
53 ('OR', r'\|\|'),
54 ('AND', r'&&'),
55 ('BITWISE_OR', r'\|'),
56 ('OP', r'&|in|==|!=|<=|<|>=|>'),
57 ('EQUAL', r'='),
58 ('ARGUMENT', r'arg[0-9]+'),
59 ('RETURN', r'return'),
60 ('ACTION', r'allow|kill-process|kill-thread|kill|trap|trace|log'),
Luis Hector Chavez524da3b2019-03-05 16:44:08 -080061 ('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9-@]*'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080062)
63_TOKEN_RE = re.compile('|'.join(
64 r'(?P<%s>%s)' % pair for pair in _TOKEN_SPECIFICATION))
65
66
67class ParseException(Exception):
68 """An exception that is raised when parsing fails."""
69
70 # pylint: disable=too-many-arguments
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070071 def __init__(self,
72 message,
73 filename,
74 *,
75 line='',
76 line_number=1,
77 token=None):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080078 if token:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070079 line = token.line
80 line_number = token.line_number
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080081 column = token.column
82 length = len(token.value)
83 else:
84 column = len(line)
85 length = 1
86
87 message = ('%s(%d:%d): %s') % (filename, line_number, column + 1,
88 message)
89 message += '\n %s' % line
90 message += '\n %s%s' % (' ' * column, '^' * length)
91 super().__init__(message)
92
93
94class ParserState:
95 """Stores the state of the Parser to provide better diagnostics."""
96
97 def __init__(self, filename):
98 self._filename = filename
99 self._line = ''
100 self._line_number = 0
101
102 @property
103 def filename(self):
104 """Return the name of the file being processed."""
105 return self._filename
106
107 @property
108 def line(self):
109 """Return the current line being processed."""
110 return self._line
111
112 @property
113 def line_number(self):
114 """Return the current line number being processed."""
115 return self._line_number
116
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800117 def error(self, message, token=None):
118 """Raise a ParserException with the provided message."""
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700119 raise ParseException(
120 message,
121 self.filename,
122 line=self._line,
123 line_number=self._line_number,
124 token=token)
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800125
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700126 def tokenize(self, lines):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800127 """Return a list of tokens for the current line."""
128 tokens = []
129
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700130 for line_number, line in enumerate(lines):
131 self._line_number = line_number + 1
132 self._line = line.rstrip('\r\n')
133
134 last_end = 0
135 for token in _TOKEN_RE.finditer(self._line):
136 if token.start() != last_end:
137 self.error(
138 'invalid token',
139 token=Token('INVALID',
140 self._line[last_end:token.start()],
141 self.filename, self._line,
142 self._line_number, last_end))
143 last_end = token.end()
144
145 # Omit whitespace and comments now to avoid sprinkling this logic
146 # elsewhere.
147 if token.lastgroup in ('WHITESPACE', 'COMMENT',
148 'CONTINUATION'):
149 continue
150 tokens.append(
151 Token(token.lastgroup, token.group(), self.filename,
152 self._line, self._line_number, token.start()))
153 if last_end != len(self._line):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800154 self.error(
155 'invalid token',
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700156 token=Token('INVALID', self._line[last_end:],
157 self.filename, self._line, self._line_number,
158 last_end))
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800159
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700160 if self._line.endswith('\\'):
161 # This line is not finished yet.
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800162 continue
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700163
164 if tokens:
165 # Return a copy of the token list so that the caller can be free
166 # to modify it.
167 yield tokens[::]
168 tokens.clear()
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800169
170
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800171Atom = collections.namedtuple('Atom', ['argument_index', 'op', 'value'])
172"""A single boolean comparison within a filter expression."""
173
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800174Filter = collections.namedtuple('Filter', ['expression', 'action'])
175"""The result of parsing a DNF filter expression, with its action.
176
177Since the expression is in Disjunctive Normal Form, it is composed of two levels
178of lists, one for disjunctions and the inner one for conjunctions. The elements
179of the inner list are Atoms.
180"""
181
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800182Syscall = collections.namedtuple('Syscall', ['name', 'number'])
183"""A system call."""
184
185ParsedFilterStatement = collections.namedtuple('ParsedFilterStatement',
186 ['syscalls', 'filters'])
187"""The result of parsing a filter statement.
188
189Statements have a list of syscalls, and an associated list of filters that will
190be evaluated sequentially when any of the syscalls is invoked.
191"""
192
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800193FilterStatement = collections.namedtuple('FilterStatement',
194 ['syscall', 'frequency', 'filters'])
195"""The filter list for a particular syscall.
196
197This is a mapping from one syscall to a list of filters that are evaluated
198sequentially. The last filter is always an unconditional action.
199"""
200
201ParsedPolicy = collections.namedtuple('ParsedPolicy',
202 ['default_action', 'filter_statements'])
203"""The result of parsing a minijail .policy file."""
204
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800205
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800206# pylint: disable=too-few-public-methods
207class PolicyParser:
208 """A parser for the Minijail seccomp policy file format."""
209
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800210 def __init__(self, arch, *, kill_action, include_depth_limit=10):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800211 self._parser_states = [ParserState("<memory>")]
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800212 self._kill_action = kill_action
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800213 self._include_depth_limit = include_depth_limit
214 self._default_action = self._kill_action
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800215 self._frequency_mapping = collections.defaultdict(int)
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800216 self._arch = arch
217
218 @property
219 def _parser_state(self):
220 return self._parser_states[-1]
221
222 # single-constant = identifier
223 # | numeric-constant
224 # ;
225 def _parse_single_constant(self, token):
226 if token.type == 'IDENTIFIER':
227 if token.value not in self._arch.constants:
228 self._parser_state.error('invalid constant', token=token)
229 single_constant = self._arch.constants[token.value]
230 elif token.type == 'NUMERIC_CONSTANT':
231 try:
232 single_constant = int(token.value, base=0)
233 except ValueError:
234 self._parser_state.error('invalid constant', token=token)
235 else:
236 self._parser_state.error('invalid constant', token=token)
237 if single_constant > self._arch.max_unsigned:
238 self._parser_state.error('unsigned overflow', token=token)
239 elif single_constant < self._arch.min_signed:
240 self._parser_state.error('signed underflow', token=token)
241 elif single_constant < 0:
242 # This converts the constant to an unsigned representation of the
243 # same value, since BPF only uses unsigned values.
244 single_constant = self._arch.truncate_word(single_constant)
245 return single_constant
246
247 # constant = [ '~' ] , '(' , value , ')'
248 # | [ '~' ] , single-constant
249 # ;
250 def _parse_constant(self, tokens):
251 negate = False
252 if tokens[0].type == 'BITWISE_COMPLEMENT':
253 negate = True
254 tokens.pop(0)
255 if not tokens:
256 self._parser_state.error('empty complement')
257 if tokens[0].type == 'BITWISE_COMPLEMENT':
258 self._parser_state.error(
259 'invalid double complement', token=tokens[0])
260 if tokens[0].type == 'LPAREN':
261 last_open_paren = tokens.pop(0)
262 single_value = self.parse_value(tokens)
263 if not tokens or tokens[0].type != 'RPAREN':
264 self._parser_state.error(
265 'unclosed parenthesis', token=last_open_paren)
266 else:
267 single_value = self._parse_single_constant(tokens[0])
268 tokens.pop(0)
269 if negate:
270 single_value = self._arch.truncate_word(~single_value)
271 return single_value
272
273 # value = constant , [ { '|' , constant } ]
274 # ;
275 def parse_value(self, tokens):
276 """Parse constants separated bitwise OR operator |.
277
278 Constants can be:
279
280 - A number that can be parsed with int(..., base=0)
281 - A named constant expression.
282 - A parenthesized, valid constant expression.
283 - A valid constant expression prefixed with the unary bitwise
284 complement operator ~.
285 - A series of valid constant expressions separated by bitwise
286 OR operator |.
287
288 If there is an error parsing any of the constants, the whole process
289 fails.
290 """
291
292 value = 0
293 while tokens:
294 value |= self._parse_constant(tokens)
295 if not tokens or tokens[0].type != 'BITWISE_OR':
296 break
297 tokens.pop(0)
298 else:
299 self._parser_state.error('empty constant')
300 return value
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800301
302 # atom = argument , op , value
303 # ;
304 def _parse_atom(self, tokens):
305 if not tokens:
306 self._parser_state.error('missing argument')
307 argument = tokens.pop(0)
308 if argument.type != 'ARGUMENT':
309 self._parser_state.error('invalid argument', token=argument)
310
311 if not tokens:
312 self._parser_state.error('missing operator')
313 operator = tokens.pop(0)
314 if operator.type != 'OP':
315 self._parser_state.error('invalid operator', token=operator)
316
317 value = self.parse_value(tokens)
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800318 argument_index = int(argument.value[3:])
319 if not (0 <= argument_index < bpf.MAX_SYSCALL_ARGUMENTS):
320 self._parser_state.error('invalid argument', token=argument)
321 return Atom(argument_index, operator.value, value)
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800322
323 # clause = atom , [ { '&&' , atom } ]
324 # ;
325 def _parse_clause(self, tokens):
326 atoms = []
327 while tokens:
328 atoms.append(self._parse_atom(tokens))
329 if not tokens or tokens[0].type != 'AND':
330 break
331 tokens.pop(0)
332 else:
333 self._parser_state.error('empty clause')
334 return atoms
335
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800336 # argument-expression = clause , [ { '||' , clause } ]
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800337 # ;
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800338 def parse_argument_expression(self, tokens):
339 """Parse a argument expression in Disjunctive Normal Form.
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800340
341 Since BPF disallows back jumps, we build the basic blocks in reverse
342 order so that all the jump targets are known by the time we need to
343 reference them.
344 """
345
346 clauses = []
347 while tokens:
348 clauses.append(self._parse_clause(tokens))
349 if not tokens or tokens[0].type != 'OR':
350 break
351 tokens.pop(0)
352 else:
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800353 self._parser_state.error('empty argument expression')
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800354 return clauses
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800355
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800356 # default-action = 'kill-process'
357 # | 'kill-thread'
358 # | 'kill'
359 # | 'trap'
360 # ;
361 def _parse_default_action(self, tokens):
362 if not tokens:
363 self._parser_state.error('missing default action')
364 action_token = tokens.pop(0)
365 if action_token.type != 'ACTION':
366 return self._parser_state.error(
367 'invalid default action', token=action_token)
368 if action_token.value == 'kill-process':
369 return bpf.KillProcess()
370 if action_token.value == 'kill-thread':
371 return bpf.KillThread()
372 if action_token.value == 'kill':
373 return self._kill_action
374 if action_token.value == 'trap':
375 return bpf.Trap()
376 return self._parser_state.error(
377 'invalid permissive default action', token=action_token)
378
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800379 # action = 'allow' | '1'
380 # | 'kill-process'
381 # | 'kill-thread'
382 # | 'kill'
383 # | 'trap'
384 # | 'trace'
385 # | 'log'
386 # | 'return' , single-constant
387 # ;
388 def _parse_action(self, tokens):
389 if not tokens:
390 self._parser_state.error('missing action')
391 action_token = tokens.pop(0)
392 if action_token.type == 'ACTION':
393 if action_token.value == 'allow':
394 return bpf.Allow()
395 if action_token.value == 'kill':
396 return self._kill_action
397 if action_token.value == 'kill-process':
398 return bpf.KillProcess()
399 if action_token.value == 'kill-thread':
400 return bpf.KillThread()
401 if action_token.value == 'trap':
402 return bpf.Trap()
403 if action_token.value == 'trace':
404 return bpf.Trace()
405 if action_token.value == 'log':
406 return bpf.Log()
407 elif action_token.type == 'NUMERIC_CONSTANT':
408 constant = self._parse_single_constant(action_token)
409 if constant == 1:
410 return bpf.Allow()
411 elif action_token.type == 'RETURN':
412 if not tokens:
413 self._parser_state.error('missing return value')
414 return bpf.ReturnErrno(self._parse_single_constant(tokens.pop(0)))
415 return self._parser_state.error('invalid action', token=action_token)
416
417 # single-filter = action
418 # | argument-expression , [ ';' , action ]
419 # ;
420 def _parse_single_filter(self, tokens):
421 if not tokens:
422 self._parser_state.error('missing filter')
423 if tokens[0].type == 'ARGUMENT':
424 # Only argument expressions can start with an ARGUMENT token.
425 argument_expression = self.parse_argument_expression(tokens)
426 if tokens and tokens[0].type == 'SEMICOLON':
427 tokens.pop(0)
428 action = self._parse_action(tokens)
429 else:
430 action = bpf.Allow()
431 return Filter(argument_expression, action)
432 else:
433 return Filter(None, self._parse_action(tokens))
434
435 # filter = '{' , single-filter , [ { ',' , single-filter } ] , '}'
436 # | single-filter
437 # ;
438 def parse_filter(self, tokens):
439 """Parse a filter and return a list of Filter objects."""
440 if not tokens:
441 self._parser_state.error('missing filter')
442 filters = []
443 if tokens[0].type == 'LBRACE':
444 opening_brace = tokens.pop(0)
445 while tokens:
446 filters.append(self._parse_single_filter(tokens))
447 if not tokens or tokens[0].type != 'COMMA':
448 break
449 tokens.pop(0)
450 if not tokens or tokens[0].type != 'RBRACE':
451 self._parser_state.error('unclosed brace', token=opening_brace)
452 tokens.pop(0)
453 else:
454 filters.append(self._parse_single_filter(tokens))
455 return filters
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800456
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800457 # key-value-pair = identifier , '=', identifier , [ { ',' , identifier } ]
458 # ;
459 def _parse_key_value_pair(self, tokens):
460 if not tokens:
461 self._parser_state.error('missing key')
462 key = tokens.pop(0)
463 if key.type != 'IDENTIFIER':
464 self._parser_state.error('invalid key', token=key)
465 if not tokens:
466 self._parser_state.error('missing equal')
467 if tokens[0].type != 'EQUAL':
468 self._parser_state.error('invalid equal', token=tokens[0])
469 tokens.pop(0)
470 value_list = []
471 while tokens:
472 value = tokens.pop(0)
473 if value.type != 'IDENTIFIER':
474 self._parser_state.error('invalid value', token=value)
475 value_list.append(value.value)
476 if not tokens or tokens[0].type != 'COMMA':
477 break
478 tokens.pop(0)
479 else:
480 self._parser_state.error('empty value')
481 return (key.value, value_list)
482
483 # metadata = '[' , key-value-pair , [ { ';' , key-value-pair } ] , ']'
484 # ;
485 def _parse_metadata(self, tokens):
486 if not tokens:
487 self._parser_state.error('missing opening bracket')
488 opening_bracket = tokens.pop(0)
489 if opening_bracket.type != 'LBRACKET':
490 self._parser_state.error(
491 'invalid opening bracket', token=opening_bracket)
492 metadata = {}
493 while tokens:
494 first_token = tokens[0]
495 key, value = self._parse_key_value_pair(tokens)
496 if key in metadata:
497 self._parser_state.error(
498 'duplicate metadata key: "%s"' % key, token=first_token)
499 metadata[key] = value
500 if not tokens or tokens[0].type != 'SEMICOLON':
501 break
502 tokens.pop(0)
503 if not tokens or tokens[0].type != 'RBRACKET':
504 self._parser_state.error('unclosed bracket', token=opening_bracket)
505 tokens.pop(0)
506 return metadata
507
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800508 # syscall-descriptor = syscall-name , [ metadata ]
Luis Hector Chavez524da3b2019-03-05 16:44:08 -0800509 # | syscall-group-name , [ metadata ]
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800510 # ;
511 def _parse_syscall_descriptor(self, tokens):
512 if not tokens:
513 self._parser_state.error('missing syscall descriptor')
514 syscall_descriptor = tokens.pop(0)
515 if syscall_descriptor.type != 'IDENTIFIER':
516 self._parser_state.error(
517 'invalid syscall descriptor', token=syscall_descriptor)
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800518 if tokens and tokens[0].type == 'LBRACKET':
519 metadata = self._parse_metadata(tokens)
520 if 'arch' in metadata and self._arch.arch_name not in metadata['arch']:
521 return ()
Luis Hector Chavez524da3b2019-03-05 16:44:08 -0800522 if '@' in syscall_descriptor.value:
523 # This is a syscall group.
524 subtokens = syscall_descriptor.value.split('@')
525 if len(subtokens) != 2:
526 self._parser_state.error(
527 'invalid syscall group name', token=syscall_descriptor)
528 syscall_group_name, syscall_namespace_name = subtokens
529 if syscall_namespace_name not in self._arch.syscall_groups:
530 self._parser_state.error(
531 'nonexistent syscall group namespace',
532 token=syscall_descriptor)
533 syscall_namespace = self._arch.syscall_groups[
534 syscall_namespace_name]
535 if syscall_group_name not in syscall_namespace:
536 self._parser_state.error(
537 'nonexistent syscall group', token=syscall_descriptor)
538 return (Syscall(name, self._arch.syscalls[name])
539 for name in syscall_namespace[syscall_group_name])
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800540 if syscall_descriptor.value not in self._arch.syscalls:
541 self._parser_state.error(
542 'nonexistent syscall', token=syscall_descriptor)
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800543 return (Syscall(syscall_descriptor.value,
544 self._arch.syscalls[syscall_descriptor.value]), )
545
546 # filter-statement = '{' , syscall-descriptor , [ { ',', syscall-descriptor } ] , '}' ,
547 # ':' , filter
548 # | syscall-descriptor , ':' , filter
549 # ;
550 def parse_filter_statement(self, tokens):
551 """Parse a filter statement and return a ParsedFilterStatement."""
552 if not tokens:
553 self._parser_state.error('empty filter statement')
554 syscall_descriptors = []
555 if tokens[0].type == 'LBRACE':
556 opening_brace = tokens.pop(0)
557 while tokens:
558 syscall_descriptors.extend(
559 self._parse_syscall_descriptor(tokens))
560 if not tokens or tokens[0].type != 'COMMA':
561 break
562 tokens.pop(0)
563 if not tokens or tokens[0].type != 'RBRACE':
564 self._parser_state.error('unclosed brace', token=opening_brace)
565 tokens.pop(0)
566 else:
567 syscall_descriptors.extend(self._parse_syscall_descriptor(tokens))
568 if not tokens:
569 self._parser_state.error('missing colon')
570 if tokens[0].type != 'COLON':
571 self._parser_state.error('invalid colon', token=tokens[0])
572 tokens.pop(0)
573 parsed_filter = self.parse_filter(tokens)
574 if not syscall_descriptors:
575 return None
576 return ParsedFilterStatement(tuple(syscall_descriptors), parsed_filter)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800577
578 # include-statement = '@include' , posix-path
579 # ;
580 def _parse_include_statement(self, tokens):
581 if not tokens:
582 self._parser_state.error('empty filter statement')
583 if tokens[0].type != 'INCLUDE':
584 self._parser_state.error('invalid include', token=tokens[0])
585 tokens.pop(0)
586 if not tokens:
587 self._parser_state.error('empty include path')
588 include_path = tokens.pop(0)
589 if include_path.type != 'PATH':
590 self._parser_state.error(
591 'invalid include path', token=include_path)
592 if len(self._parser_states) == self._include_depth_limit:
593 self._parser_state.error('@include statement nested too deep')
594 include_filename = os.path.normpath(
595 os.path.join(
596 os.path.dirname(self._parser_state.filename),
597 include_path.value))
598 if not os.path.isfile(include_filename):
599 self._parser_state.error(
600 'Could not @include %s' % include_filename, token=include_path)
601 return self._parse_policy_file(include_filename)
602
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800603 def _parse_frequency_file(self, filename):
604 self._parser_states.append(ParserState(filename))
605 try:
606 frequency_mapping = collections.defaultdict(int)
607 with open(filename) as frequency_file:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700608 for tokens in self._parser_state.tokenize(frequency_file):
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800609 syscall_numbers = self._parse_syscall_descriptor(tokens)
610 if not tokens:
611 self._parser_state.error('missing colon')
612 if tokens[0].type != 'COLON':
613 self._parser_state.error(
614 'invalid colon', token=tokens[0])
615 tokens.pop(0)
616
617 if not tokens:
618 self._parser_state.error('missing number')
619 number = tokens.pop(0)
620 if number.type != 'NUMERIC_CONSTANT':
621 self._parser_state.error(
622 'invalid number', token=number)
623 number_value = int(number.value, base=0)
624 if number_value < 0:
625 self._parser_state.error(
626 'invalid number', token=number)
627
628 for syscall_number in syscall_numbers:
629 frequency_mapping[syscall_number] += number_value
630 return frequency_mapping
631 finally:
632 self._parser_states.pop()
633
634 # frequency-statement = '@frequency' , posix-path
635 # ;
636 def _parse_frequency_statement(self, tokens):
637 if not tokens:
638 self._parser_state.error('empty frequency statement')
639 if tokens[0].type != 'FREQUENCY':
640 self._parser_state.error('invalid frequency', token=tokens[0])
641 tokens.pop(0)
642 if not tokens:
643 self._parser_state.error('empty frequency path')
644 frequency_path = tokens.pop(0)
645 if frequency_path.type != 'PATH':
646 self._parser_state.error(
647 'invalid frequency path', token=frequency_path)
648 frequency_filename = os.path.normpath(
649 os.path.join(
650 os.path.dirname(self._parser_state.filename),
651 frequency_path.value))
652 if not os.path.isfile(frequency_filename):
653 self._parser_state.error(
654 'Could not open frequency file %s' % frequency_filename,
655 token=frequency_path)
656 return self._parse_frequency_file(frequency_filename)
657
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800658 # default-statement = '@default' , default-action
659 # ;
660 def _parse_default_statement(self, tokens):
661 if not tokens:
662 self._parser_state.error('empty default statement')
663 if tokens[0].type != 'DEFAULT':
664 self._parser_state.error('invalid default', token=tokens[0])
665 tokens.pop(0)
666 if not tokens:
667 self._parser_state.error('empty action')
668 return self._parse_default_action(tokens)
669
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800670 def _parse_policy_file(self, filename):
671 self._parser_states.append(ParserState(filename))
672 try:
673 statements = []
674 with open(filename) as policy_file:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700675 for tokens in self._parser_state.tokenize(policy_file):
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800676 if tokens[0].type == 'INCLUDE':
677 statements.extend(
678 self._parse_include_statement(tokens))
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800679 elif tokens[0].type == 'FREQUENCY':
680 for syscall_number, frequency in self._parse_frequency_statement(
681 tokens).items():
682 self._frequency_mapping[
683 syscall_number] += frequency
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800684 elif tokens[0].type == 'DEFAULT':
685 self._default_action = self._parse_default_statement(
686 tokens)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800687 else:
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800688 statement = self.parse_filter_statement(tokens)
689 if statement is None:
690 # If all the syscalls in the statement are for
691 # another arch, skip the whole statement.
692 continue
693 statements.append(statement)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800694
695 if tokens:
696 self._parser_state.error(
697 'extra tokens', token=tokens[0])
698 return statements
699 finally:
700 self._parser_states.pop()
701
702 def parse_file(self, filename):
703 """Parse a file and return the list of FilterStatements."""
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800704 self._frequency_mapping = collections.defaultdict(int)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800705 try:
706 statements = [x for x in self._parse_policy_file(filename)]
707 except RecursionError:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700708 raise ParseException(
709 'recursion limit exceeded',
710 filename,
711 line=self._parser_states[-1].line)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800712
713 # Collapse statements into a single syscall-to-filter-list.
714 syscall_filter_mapping = {}
715 filter_statements = []
716 for syscalls, filters in statements:
717 for syscall in syscalls:
718 if syscall not in syscall_filter_mapping:
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800719 filter_statements.append(
720 FilterStatement(
721 syscall, self._frequency_mapping.get(syscall, 1),
722 []))
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800723 syscall_filter_mapping[syscall] = filter_statements[-1]
724 syscall_filter_mapping[syscall].filters.extend(filters)
725 for filter_statement in filter_statements:
726 unconditional_actions_suffix = list(
727 itertools.dropwhile(lambda filt: filt.expression is not None,
728 filter_statement.filters))
729 if len(unconditional_actions_suffix) == 1:
730 # The last filter already has an unconditional action, no need
731 # to add another one.
732 continue
733 if len(unconditional_actions_suffix) > 1:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700734 raise ParseException(
735 ('Syscall %s (number %d) already had '
736 'an unconditional action applied') %
737 (filter_statement.syscall.name,
738 filter_statement.syscall.number),
739 filename,
740 line=self._parser_states[-1].line)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800741 assert not unconditional_actions_suffix
742 filter_statement.filters.append(
743 Filter(expression=None, action=self._default_action))
744 return ParsedPolicy(self._default_action, filter_statements)