blob: a2ba336efdec1f4d7e8309a63f9677f3d98b22f1 [file] [log] [blame]
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -08001#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# Copyright (C) 2018 The Android Open Source Project
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17"""A parser for the Minijail policy file."""
18
19from __future__ import absolute_import
20from __future__ import division
21from __future__ import print_function
22
23import collections
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -080024import itertools
25import os.path
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080026import re
27
Matt Delcoa12687b2020-02-07 17:12:47 -080028try:
29 import bpf
30except ImportError:
31 from minijail import bpf
32
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -080033
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070034Token = collections.namedtuple(
Luis Hector Chavez4228eff2019-12-11 19:07:13 -080035 'Token', ['type', 'value', 'filename', 'line', 'line_number', 'column'])
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080036
37# A regex that can tokenize a Minijail policy file line.
38_TOKEN_SPECIFICATION = (
39 ('COMMENT', r'#.*$'),
40 ('WHITESPACE', r'\s+'),
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070041 ('CONTINUATION', r'\\$'),
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080042 ('DEFAULT', r'@default\b'),
43 ('INCLUDE', r'@include\b'),
44 ('FREQUENCY', r'@frequency\b'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080045 ('PATH', r'(?:\.)?/\S+'),
46 ('NUMERIC_CONSTANT', r'-?0[xX][0-9a-fA-F]+|-?0[Oo][0-7]+|-?[0-9]+'),
47 ('COLON', r':'),
48 ('SEMICOLON', r';'),
49 ('COMMA', r','),
50 ('BITWISE_COMPLEMENT', r'~'),
51 ('LPAREN', r'\('),
52 ('RPAREN', r'\)'),
53 ('LBRACE', r'\{'),
54 ('RBRACE', r'\}'),
55 ('RBRACKET', r'\]'),
56 ('LBRACKET', r'\['),
57 ('OR', r'\|\|'),
58 ('AND', r'&&'),
59 ('BITWISE_OR', r'\|'),
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080060 ('OP', r'&|\bin\b|==|!=|<=|<|>=|>'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080061 ('EQUAL', r'='),
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080062 ('ARGUMENT', r'\barg[0-9]+\b'),
63 ('RETURN', r'\breturn\b'),
64 ('ACTION',
65 r'\ballow\b|\bkill-process\b|\bkill-thread\b|\bkill\b|\btrap\b|'
66 r'\btrace\b|\blog\b'
67 ),
Luis Hector Chavez524da3b2019-03-05 16:44:08 -080068 ('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9-@]*'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080069)
70_TOKEN_RE = re.compile('|'.join(
71 r'(?P<%s>%s)' % pair for pair in _TOKEN_SPECIFICATION))
72
73
74class ParseException(Exception):
75 """An exception that is raised when parsing fails."""
76
77 # pylint: disable=too-many-arguments
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070078 def __init__(self,
79 message,
80 filename,
81 *,
82 line='',
83 line_number=1,
84 token=None):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080085 if token:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070086 line = token.line
87 line_number = token.line_number
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080088 column = token.column
89 length = len(token.value)
90 else:
91 column = len(line)
92 length = 1
93
94 message = ('%s(%d:%d): %s') % (filename, line_number, column + 1,
95 message)
96 message += '\n %s' % line
97 message += '\n %s%s' % (' ' * column, '^' * length)
98 super().__init__(message)
99
100
101class ParserState:
102 """Stores the state of the Parser to provide better diagnostics."""
103
104 def __init__(self, filename):
105 self._filename = filename
106 self._line = ''
107 self._line_number = 0
108
109 @property
110 def filename(self):
111 """Return the name of the file being processed."""
112 return self._filename
113
114 @property
115 def line(self):
116 """Return the current line being processed."""
117 return self._line
118
119 @property
120 def line_number(self):
121 """Return the current line number being processed."""
122 return self._line_number
123
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800124 def error(self, message, token=None):
125 """Raise a ParserException with the provided message."""
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700126 raise ParseException(
127 message,
128 self.filename,
129 line=self._line,
130 line_number=self._line_number,
131 token=token)
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800132
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700133 def tokenize(self, lines):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800134 """Return a list of tokens for the current line."""
135 tokens = []
136
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700137 for line_number, line in enumerate(lines):
138 self._line_number = line_number + 1
139 self._line = line.rstrip('\r\n')
140
141 last_end = 0
142 for token in _TOKEN_RE.finditer(self._line):
143 if token.start() != last_end:
144 self.error(
145 'invalid token',
146 token=Token('INVALID',
147 self._line[last_end:token.start()],
148 self.filename, self._line,
149 self._line_number, last_end))
150 last_end = token.end()
151
152 # Omit whitespace and comments now to avoid sprinkling this logic
153 # elsewhere.
154 if token.lastgroup in ('WHITESPACE', 'COMMENT',
155 'CONTINUATION'):
156 continue
157 tokens.append(
158 Token(token.lastgroup, token.group(), self.filename,
159 self._line, self._line_number, token.start()))
160 if last_end != len(self._line):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800161 self.error(
162 'invalid token',
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700163 token=Token('INVALID', self._line[last_end:],
164 self.filename, self._line, self._line_number,
165 last_end))
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800166
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700167 if self._line.endswith('\\'):
168 # This line is not finished yet.
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800169 continue
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700170
171 if tokens:
172 # Return a copy of the token list so that the caller can be free
173 # to modify it.
174 yield tokens[::]
175 tokens.clear()
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800176
177
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800178Atom = collections.namedtuple('Atom', ['argument_index', 'op', 'value'])
179"""A single boolean comparison within a filter expression."""
180
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800181Filter = collections.namedtuple('Filter', ['expression', 'action'])
182"""The result of parsing a DNF filter expression, with its action.
183
184Since the expression is in Disjunctive Normal Form, it is composed of two levels
185of lists, one for disjunctions and the inner one for conjunctions. The elements
186of the inner list are Atoms.
187"""
188
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800189Syscall = collections.namedtuple('Syscall', ['name', 'number'])
190"""A system call."""
191
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800192ParsedFilterStatement = collections.namedtuple(
193 'ParsedFilterStatement', ['syscalls', 'filters', 'token'])
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800194"""The result of parsing a filter statement.
195
196Statements have a list of syscalls, and an associated list of filters that will
197be evaluated sequentially when any of the syscalls is invoked.
198"""
199
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800200FilterStatement = collections.namedtuple('FilterStatement',
201 ['syscall', 'frequency', 'filters'])
202"""The filter list for a particular syscall.
203
204This is a mapping from one syscall to a list of filters that are evaluated
205sequentially. The last filter is always an unconditional action.
206"""
207
208ParsedPolicy = collections.namedtuple('ParsedPolicy',
209 ['default_action', 'filter_statements'])
210"""The result of parsing a minijail .policy file."""
211
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800212
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800213# pylint: disable=too-few-public-methods
214class PolicyParser:
215 """A parser for the Minijail seccomp policy file format."""
216
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800217 def __init__(self,
218 arch,
219 *,
220 kill_action,
221 include_depth_limit=10,
222 override_default_action=None):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800223 self._parser_states = [ParserState("<memory>")]
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800224 self._kill_action = kill_action
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800225 self._include_depth_limit = include_depth_limit
226 self._default_action = self._kill_action
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800227 self._override_default_action = override_default_action
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800228 self._frequency_mapping = collections.defaultdict(int)
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800229 self._arch = arch
230
231 @property
232 def _parser_state(self):
233 return self._parser_states[-1]
234
235 # single-constant = identifier
236 # | numeric-constant
237 # ;
238 def _parse_single_constant(self, token):
239 if token.type == 'IDENTIFIER':
240 if token.value not in self._arch.constants:
241 self._parser_state.error('invalid constant', token=token)
242 single_constant = self._arch.constants[token.value]
243 elif token.type == 'NUMERIC_CONSTANT':
244 try:
245 single_constant = int(token.value, base=0)
246 except ValueError:
247 self._parser_state.error('invalid constant', token=token)
248 else:
249 self._parser_state.error('invalid constant', token=token)
250 if single_constant > self._arch.max_unsigned:
251 self._parser_state.error('unsigned overflow', token=token)
252 elif single_constant < self._arch.min_signed:
253 self._parser_state.error('signed underflow', token=token)
254 elif single_constant < 0:
255 # This converts the constant to an unsigned representation of the
256 # same value, since BPF only uses unsigned values.
257 single_constant = self._arch.truncate_word(single_constant)
258 return single_constant
259
260 # constant = [ '~' ] , '(' , value , ')'
261 # | [ '~' ] , single-constant
262 # ;
263 def _parse_constant(self, tokens):
264 negate = False
265 if tokens[0].type == 'BITWISE_COMPLEMENT':
266 negate = True
267 tokens.pop(0)
268 if not tokens:
269 self._parser_state.error('empty complement')
270 if tokens[0].type == 'BITWISE_COMPLEMENT':
271 self._parser_state.error(
272 'invalid double complement', token=tokens[0])
273 if tokens[0].type == 'LPAREN':
274 last_open_paren = tokens.pop(0)
275 single_value = self.parse_value(tokens)
276 if not tokens or tokens[0].type != 'RPAREN':
277 self._parser_state.error(
278 'unclosed parenthesis', token=last_open_paren)
279 else:
280 single_value = self._parse_single_constant(tokens[0])
281 tokens.pop(0)
282 if negate:
283 single_value = self._arch.truncate_word(~single_value)
284 return single_value
285
286 # value = constant , [ { '|' , constant } ]
287 # ;
288 def parse_value(self, tokens):
289 """Parse constants separated bitwise OR operator |.
290
291 Constants can be:
292
293 - A number that can be parsed with int(..., base=0)
294 - A named constant expression.
295 - A parenthesized, valid constant expression.
296 - A valid constant expression prefixed with the unary bitwise
297 complement operator ~.
298 - A series of valid constant expressions separated by bitwise
299 OR operator |.
300
301 If there is an error parsing any of the constants, the whole process
302 fails.
303 """
304
305 value = 0
306 while tokens:
307 value |= self._parse_constant(tokens)
308 if not tokens or tokens[0].type != 'BITWISE_OR':
309 break
310 tokens.pop(0)
311 else:
312 self._parser_state.error('empty constant')
313 return value
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800314
315 # atom = argument , op , value
316 # ;
317 def _parse_atom(self, tokens):
318 if not tokens:
319 self._parser_state.error('missing argument')
320 argument = tokens.pop(0)
321 if argument.type != 'ARGUMENT':
322 self._parser_state.error('invalid argument', token=argument)
323
324 if not tokens:
325 self._parser_state.error('missing operator')
326 operator = tokens.pop(0)
327 if operator.type != 'OP':
328 self._parser_state.error('invalid operator', token=operator)
329
330 value = self.parse_value(tokens)
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800331 argument_index = int(argument.value[3:])
332 if not (0 <= argument_index < bpf.MAX_SYSCALL_ARGUMENTS):
333 self._parser_state.error('invalid argument', token=argument)
334 return Atom(argument_index, operator.value, value)
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800335
336 # clause = atom , [ { '&&' , atom } ]
337 # ;
338 def _parse_clause(self, tokens):
339 atoms = []
340 while tokens:
341 atoms.append(self._parse_atom(tokens))
342 if not tokens or tokens[0].type != 'AND':
343 break
344 tokens.pop(0)
345 else:
346 self._parser_state.error('empty clause')
347 return atoms
348
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800349 # argument-expression = clause , [ { '||' , clause } ]
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800350 # ;
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800351 def parse_argument_expression(self, tokens):
352 """Parse a argument expression in Disjunctive Normal Form.
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800353
354 Since BPF disallows back jumps, we build the basic blocks in reverse
355 order so that all the jump targets are known by the time we need to
356 reference them.
357 """
358
359 clauses = []
360 while tokens:
361 clauses.append(self._parse_clause(tokens))
362 if not tokens or tokens[0].type != 'OR':
363 break
364 tokens.pop(0)
365 else:
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800366 self._parser_state.error('empty argument expression')
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800367 return clauses
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800368
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800369 # default-action = 'kill-process'
370 # | 'kill-thread'
371 # | 'kill'
372 # | 'trap'
373 # ;
374 def _parse_default_action(self, tokens):
375 if not tokens:
376 self._parser_state.error('missing default action')
377 action_token = tokens.pop(0)
378 if action_token.type != 'ACTION':
379 return self._parser_state.error(
380 'invalid default action', token=action_token)
381 if action_token.value == 'kill-process':
382 return bpf.KillProcess()
383 if action_token.value == 'kill-thread':
384 return bpf.KillThread()
385 if action_token.value == 'kill':
386 return self._kill_action
387 if action_token.value == 'trap':
388 return bpf.Trap()
389 return self._parser_state.error(
390 'invalid permissive default action', token=action_token)
391
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800392 # action = 'allow' | '1'
393 # | 'kill-process'
394 # | 'kill-thread'
395 # | 'kill'
396 # | 'trap'
397 # | 'trace'
398 # | 'log'
399 # | 'return' , single-constant
400 # ;
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800401 def parse_action(self, tokens):
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800402 if not tokens:
403 self._parser_state.error('missing action')
404 action_token = tokens.pop(0)
405 if action_token.type == 'ACTION':
406 if action_token.value == 'allow':
407 return bpf.Allow()
408 if action_token.value == 'kill':
409 return self._kill_action
410 if action_token.value == 'kill-process':
411 return bpf.KillProcess()
412 if action_token.value == 'kill-thread':
413 return bpf.KillThread()
414 if action_token.value == 'trap':
415 return bpf.Trap()
416 if action_token.value == 'trace':
417 return bpf.Trace()
418 if action_token.value == 'log':
419 return bpf.Log()
420 elif action_token.type == 'NUMERIC_CONSTANT':
421 constant = self._parse_single_constant(action_token)
422 if constant == 1:
423 return bpf.Allow()
424 elif action_token.type == 'RETURN':
425 if not tokens:
426 self._parser_state.error('missing return value')
427 return bpf.ReturnErrno(self._parse_single_constant(tokens.pop(0)))
428 return self._parser_state.error('invalid action', token=action_token)
429
430 # single-filter = action
431 # | argument-expression , [ ';' , action ]
432 # ;
433 def _parse_single_filter(self, tokens):
434 if not tokens:
435 self._parser_state.error('missing filter')
436 if tokens[0].type == 'ARGUMENT':
437 # Only argument expressions can start with an ARGUMENT token.
438 argument_expression = self.parse_argument_expression(tokens)
439 if tokens and tokens[0].type == 'SEMICOLON':
440 tokens.pop(0)
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800441 action = self.parse_action(tokens)
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800442 else:
443 action = bpf.Allow()
444 return Filter(argument_expression, action)
445 else:
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800446 return Filter(None, self.parse_action(tokens))
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800447
448 # filter = '{' , single-filter , [ { ',' , single-filter } ] , '}'
449 # | single-filter
450 # ;
451 def parse_filter(self, tokens):
452 """Parse a filter and return a list of Filter objects."""
453 if not tokens:
454 self._parser_state.error('missing filter')
455 filters = []
456 if tokens[0].type == 'LBRACE':
457 opening_brace = tokens.pop(0)
458 while tokens:
459 filters.append(self._parse_single_filter(tokens))
460 if not tokens or tokens[0].type != 'COMMA':
461 break
462 tokens.pop(0)
463 if not tokens or tokens[0].type != 'RBRACE':
464 self._parser_state.error('unclosed brace', token=opening_brace)
465 tokens.pop(0)
466 else:
467 filters.append(self._parse_single_filter(tokens))
468 return filters
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800469
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800470 # key-value-pair = identifier , '=', identifier , [ { ',' , identifier } ]
471 # ;
472 def _parse_key_value_pair(self, tokens):
473 if not tokens:
474 self._parser_state.error('missing key')
475 key = tokens.pop(0)
476 if key.type != 'IDENTIFIER':
477 self._parser_state.error('invalid key', token=key)
478 if not tokens:
479 self._parser_state.error('missing equal')
480 if tokens[0].type != 'EQUAL':
481 self._parser_state.error('invalid equal', token=tokens[0])
482 tokens.pop(0)
483 value_list = []
484 while tokens:
485 value = tokens.pop(0)
486 if value.type != 'IDENTIFIER':
487 self._parser_state.error('invalid value', token=value)
488 value_list.append(value.value)
489 if not tokens or tokens[0].type != 'COMMA':
490 break
491 tokens.pop(0)
492 else:
493 self._parser_state.error('empty value')
494 return (key.value, value_list)
495
496 # metadata = '[' , key-value-pair , [ { ';' , key-value-pair } ] , ']'
497 # ;
498 def _parse_metadata(self, tokens):
499 if not tokens:
500 self._parser_state.error('missing opening bracket')
501 opening_bracket = tokens.pop(0)
502 if opening_bracket.type != 'LBRACKET':
503 self._parser_state.error(
504 'invalid opening bracket', token=opening_bracket)
505 metadata = {}
506 while tokens:
507 first_token = tokens[0]
508 key, value = self._parse_key_value_pair(tokens)
509 if key in metadata:
510 self._parser_state.error(
511 'duplicate metadata key: "%s"' % key, token=first_token)
512 metadata[key] = value
513 if not tokens or tokens[0].type != 'SEMICOLON':
514 break
515 tokens.pop(0)
516 if not tokens or tokens[0].type != 'RBRACKET':
517 self._parser_state.error('unclosed bracket', token=opening_bracket)
518 tokens.pop(0)
519 return metadata
520
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800521 # syscall-descriptor = syscall-name , [ metadata ]
Luis Hector Chavez524da3b2019-03-05 16:44:08 -0800522 # | syscall-group-name , [ metadata ]
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800523 # ;
524 def _parse_syscall_descriptor(self, tokens):
525 if not tokens:
526 self._parser_state.error('missing syscall descriptor')
527 syscall_descriptor = tokens.pop(0)
Matt Delcof68fc8d2019-11-14 16:47:52 -0800528 # `kill` as a syscall name is a special case since kill is also a valid
529 # action and actions have precendence over identifiers.
530 if (syscall_descriptor.type != 'IDENTIFIER' and
531 syscall_descriptor.value != 'kill'):
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800532 self._parser_state.error(
533 'invalid syscall descriptor', token=syscall_descriptor)
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800534 if tokens and tokens[0].type == 'LBRACKET':
535 metadata = self._parse_metadata(tokens)
536 if 'arch' in metadata and self._arch.arch_name not in metadata['arch']:
537 return ()
Luis Hector Chavez524da3b2019-03-05 16:44:08 -0800538 if '@' in syscall_descriptor.value:
539 # This is a syscall group.
540 subtokens = syscall_descriptor.value.split('@')
541 if len(subtokens) != 2:
542 self._parser_state.error(
543 'invalid syscall group name', token=syscall_descriptor)
544 syscall_group_name, syscall_namespace_name = subtokens
545 if syscall_namespace_name not in self._arch.syscall_groups:
546 self._parser_state.error(
547 'nonexistent syscall group namespace',
548 token=syscall_descriptor)
549 syscall_namespace = self._arch.syscall_groups[
550 syscall_namespace_name]
551 if syscall_group_name not in syscall_namespace:
552 self._parser_state.error(
553 'nonexistent syscall group', token=syscall_descriptor)
554 return (Syscall(name, self._arch.syscalls[name])
555 for name in syscall_namespace[syscall_group_name])
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800556 if syscall_descriptor.value not in self._arch.syscalls:
557 self._parser_state.error(
558 'nonexistent syscall', token=syscall_descriptor)
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800559 return (Syscall(syscall_descriptor.value,
560 self._arch.syscalls[syscall_descriptor.value]), )
561
562 # filter-statement = '{' , syscall-descriptor , [ { ',', syscall-descriptor } ] , '}' ,
563 # ':' , filter
564 # | syscall-descriptor , ':' , filter
565 # ;
566 def parse_filter_statement(self, tokens):
567 """Parse a filter statement and return a ParsedFilterStatement."""
568 if not tokens:
569 self._parser_state.error('empty filter statement')
570 syscall_descriptors = []
571 if tokens[0].type == 'LBRACE':
572 opening_brace = tokens.pop(0)
573 while tokens:
574 syscall_descriptors.extend(
575 self._parse_syscall_descriptor(tokens))
576 if not tokens or tokens[0].type != 'COMMA':
577 break
578 tokens.pop(0)
579 if not tokens or tokens[0].type != 'RBRACE':
580 self._parser_state.error('unclosed brace', token=opening_brace)
581 tokens.pop(0)
582 else:
583 syscall_descriptors.extend(self._parse_syscall_descriptor(tokens))
584 if not tokens:
585 self._parser_state.error('missing colon')
586 if tokens[0].type != 'COLON':
587 self._parser_state.error('invalid colon', token=tokens[0])
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800588 # Given that there can be multiple syscalls and filters in a single
589 # filter statement, use the colon token as the anchor for error location
590 # purposes.
591 colon_token = tokens.pop(0)
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800592 parsed_filter = self.parse_filter(tokens)
593 if not syscall_descriptors:
594 return None
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800595 return ParsedFilterStatement(
596 tuple(syscall_descriptors), parsed_filter, colon_token)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800597
598 # include-statement = '@include' , posix-path
599 # ;
600 def _parse_include_statement(self, tokens):
601 if not tokens:
602 self._parser_state.error('empty filter statement')
603 if tokens[0].type != 'INCLUDE':
604 self._parser_state.error('invalid include', token=tokens[0])
605 tokens.pop(0)
606 if not tokens:
607 self._parser_state.error('empty include path')
608 include_path = tokens.pop(0)
609 if include_path.type != 'PATH':
610 self._parser_state.error(
611 'invalid include path', token=include_path)
612 if len(self._parser_states) == self._include_depth_limit:
613 self._parser_state.error('@include statement nested too deep')
614 include_filename = os.path.normpath(
615 os.path.join(
616 os.path.dirname(self._parser_state.filename),
617 include_path.value))
618 if not os.path.isfile(include_filename):
619 self._parser_state.error(
620 'Could not @include %s' % include_filename, token=include_path)
621 return self._parse_policy_file(include_filename)
622
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800623 def _parse_frequency_file(self, filename):
624 self._parser_states.append(ParserState(filename))
625 try:
626 frequency_mapping = collections.defaultdict(int)
627 with open(filename) as frequency_file:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700628 for tokens in self._parser_state.tokenize(frequency_file):
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800629 syscall_numbers = self._parse_syscall_descriptor(tokens)
630 if not tokens:
631 self._parser_state.error('missing colon')
632 if tokens[0].type != 'COLON':
633 self._parser_state.error(
634 'invalid colon', token=tokens[0])
635 tokens.pop(0)
636
637 if not tokens:
638 self._parser_state.error('missing number')
639 number = tokens.pop(0)
640 if number.type != 'NUMERIC_CONSTANT':
641 self._parser_state.error(
642 'invalid number', token=number)
643 number_value = int(number.value, base=0)
644 if number_value < 0:
645 self._parser_state.error(
646 'invalid number', token=number)
647
648 for syscall_number in syscall_numbers:
649 frequency_mapping[syscall_number] += number_value
650 return frequency_mapping
651 finally:
652 self._parser_states.pop()
653
654 # frequency-statement = '@frequency' , posix-path
655 # ;
656 def _parse_frequency_statement(self, tokens):
657 if not tokens:
658 self._parser_state.error('empty frequency statement')
659 if tokens[0].type != 'FREQUENCY':
660 self._parser_state.error('invalid frequency', token=tokens[0])
661 tokens.pop(0)
662 if not tokens:
663 self._parser_state.error('empty frequency path')
664 frequency_path = tokens.pop(0)
665 if frequency_path.type != 'PATH':
666 self._parser_state.error(
667 'invalid frequency path', token=frequency_path)
668 frequency_filename = os.path.normpath(
669 os.path.join(
670 os.path.dirname(self._parser_state.filename),
671 frequency_path.value))
672 if not os.path.isfile(frequency_filename):
673 self._parser_state.error(
674 'Could not open frequency file %s' % frequency_filename,
675 token=frequency_path)
676 return self._parse_frequency_file(frequency_filename)
677
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800678 # default-statement = '@default' , default-action
679 # ;
680 def _parse_default_statement(self, tokens):
681 if not tokens:
682 self._parser_state.error('empty default statement')
683 if tokens[0].type != 'DEFAULT':
684 self._parser_state.error('invalid default', token=tokens[0])
685 tokens.pop(0)
686 if not tokens:
687 self._parser_state.error('empty action')
688 return self._parse_default_action(tokens)
689
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800690 def _parse_policy_file(self, filename):
691 self._parser_states.append(ParserState(filename))
692 try:
693 statements = []
694 with open(filename) as policy_file:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700695 for tokens in self._parser_state.tokenize(policy_file):
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800696 if tokens[0].type == 'INCLUDE':
697 statements.extend(
698 self._parse_include_statement(tokens))
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800699 elif tokens[0].type == 'FREQUENCY':
700 for syscall_number, frequency in self._parse_frequency_statement(
701 tokens).items():
702 self._frequency_mapping[
703 syscall_number] += frequency
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800704 elif tokens[0].type == 'DEFAULT':
705 self._default_action = self._parse_default_statement(
706 tokens)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800707 else:
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800708 statement = self.parse_filter_statement(tokens)
709 if statement is None:
710 # If all the syscalls in the statement are for
711 # another arch, skip the whole statement.
712 continue
713 statements.append(statement)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800714
715 if tokens:
716 self._parser_state.error(
717 'extra tokens', token=tokens[0])
718 return statements
719 finally:
720 self._parser_states.pop()
721
722 def parse_file(self, filename):
723 """Parse a file and return the list of FilterStatements."""
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800724 self._frequency_mapping = collections.defaultdict(int)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800725 try:
726 statements = [x for x in self._parse_policy_file(filename)]
727 except RecursionError:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700728 raise ParseException(
729 'recursion limit exceeded',
730 filename,
731 line=self._parser_states[-1].line)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800732
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800733 # Collapse statements into a single syscall-to-filter-list, remembering
734 # the token for each filter for better diagnostics.
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800735 syscall_filter_mapping = {}
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800736 syscall_filter_definitions = {}
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800737 filter_statements = []
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800738 for syscalls, filters, token in statements:
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800739 for syscall in syscalls:
740 if syscall not in syscall_filter_mapping:
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800741 filter_statements.append(
742 FilterStatement(
743 syscall, self._frequency_mapping.get(syscall, 1),
744 []))
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800745 syscall_filter_mapping[syscall] = filter_statements[-1]
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800746 syscall_filter_definitions[syscall] = []
747 for filt in filters:
748 syscall_filter_mapping[syscall].filters.append(filt)
749 syscall_filter_definitions[syscall].append(token)
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800750 default_action = self._override_default_action or self._default_action
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800751 for filter_statement in filter_statements:
752 unconditional_actions_suffix = list(
753 itertools.dropwhile(lambda filt: filt.expression is not None,
754 filter_statement.filters))
755 if len(unconditional_actions_suffix) == 1:
756 # The last filter already has an unconditional action, no need
757 # to add another one.
758 continue
759 if len(unconditional_actions_suffix) > 1:
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800760 previous_definition_token = syscall_filter_definitions[
761 filter_statement.syscall][
762 -len(unconditional_actions_suffix)]
763 current_definition_token = syscall_filter_definitions[
764 filter_statement.syscall][
765 -len(unconditional_actions_suffix) + 1]
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700766 raise ParseException(
767 ('Syscall %s (number %d) already had '
768 'an unconditional action applied') %
769 (filter_statement.syscall.name,
770 filter_statement.syscall.number),
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800771 filename=current_definition_token.filename,
772 token=current_definition_token) from ParseException(
773 'Previous definition',
774 filename=previous_definition_token.filename,
775 token=previous_definition_token)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800776 assert not unconditional_actions_suffix
777 filter_statement.filters.append(
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800778 Filter(expression=None, action=default_action))
779 return ParsedPolicy(default_action, filter_statements)