blob: c7ad163c70a715a8917b0681aadf16a36af305af [file] [log] [blame]
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -08001#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# Copyright (C) 2018 The Android Open Source Project
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17"""A parser for the Minijail policy file."""
18
19from __future__ import absolute_import
20from __future__ import division
21from __future__ import print_function
22
23import collections
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -080024import itertools
25import os.path
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080026import re
27
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -080028import bpf
29
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070030Token = collections.namedtuple(
31 'token', ['type', 'value', 'filename', 'line', 'line_number', 'column'])
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080032
33# A regex that can tokenize a Minijail policy file line.
34_TOKEN_SPECIFICATION = (
35 ('COMMENT', r'#.*$'),
36 ('WHITESPACE', r'\s+'),
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070037 ('CONTINUATION', r'\\$'),
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080038 ('DEFAULT', r'@default\b'),
39 ('INCLUDE', r'@include\b'),
40 ('FREQUENCY', r'@frequency\b'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080041 ('PATH', r'(?:\.)?/\S+'),
42 ('NUMERIC_CONSTANT', r'-?0[xX][0-9a-fA-F]+|-?0[Oo][0-7]+|-?[0-9]+'),
43 ('COLON', r':'),
44 ('SEMICOLON', r';'),
45 ('COMMA', r','),
46 ('BITWISE_COMPLEMENT', r'~'),
47 ('LPAREN', r'\('),
48 ('RPAREN', r'\)'),
49 ('LBRACE', r'\{'),
50 ('RBRACE', r'\}'),
51 ('RBRACKET', r'\]'),
52 ('LBRACKET', r'\['),
53 ('OR', r'\|\|'),
54 ('AND', r'&&'),
55 ('BITWISE_OR', r'\|'),
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080056 ('OP', r'&|\bin\b|==|!=|<=|<|>=|>'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080057 ('EQUAL', r'='),
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080058 ('ARGUMENT', r'\barg[0-9]+\b'),
59 ('RETURN', r'\breturn\b'),
60 ('ACTION',
61 r'\ballow\b|\bkill-process\b|\bkill-thread\b|\bkill\b|\btrap\b|'
62 r'\btrace\b|\blog\b'
63 ),
Luis Hector Chavez524da3b2019-03-05 16:44:08 -080064 ('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9-@]*'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080065)
66_TOKEN_RE = re.compile('|'.join(
67 r'(?P<%s>%s)' % pair for pair in _TOKEN_SPECIFICATION))
68
69
70class ParseException(Exception):
71 """An exception that is raised when parsing fails."""
72
73 # pylint: disable=too-many-arguments
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070074 def __init__(self,
75 message,
76 filename,
77 *,
78 line='',
79 line_number=1,
80 token=None):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080081 if token:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070082 line = token.line
83 line_number = token.line_number
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080084 column = token.column
85 length = len(token.value)
86 else:
87 column = len(line)
88 length = 1
89
90 message = ('%s(%d:%d): %s') % (filename, line_number, column + 1,
91 message)
92 message += '\n %s' % line
93 message += '\n %s%s' % (' ' * column, '^' * length)
94 super().__init__(message)
95
96
97class ParserState:
98 """Stores the state of the Parser to provide better diagnostics."""
99
100 def __init__(self, filename):
101 self._filename = filename
102 self._line = ''
103 self._line_number = 0
104
105 @property
106 def filename(self):
107 """Return the name of the file being processed."""
108 return self._filename
109
110 @property
111 def line(self):
112 """Return the current line being processed."""
113 return self._line
114
115 @property
116 def line_number(self):
117 """Return the current line number being processed."""
118 return self._line_number
119
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800120 def error(self, message, token=None):
121 """Raise a ParserException with the provided message."""
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700122 raise ParseException(
123 message,
124 self.filename,
125 line=self._line,
126 line_number=self._line_number,
127 token=token)
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800128
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700129 def tokenize(self, lines):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800130 """Return a list of tokens for the current line."""
131 tokens = []
132
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700133 for line_number, line in enumerate(lines):
134 self._line_number = line_number + 1
135 self._line = line.rstrip('\r\n')
136
137 last_end = 0
138 for token in _TOKEN_RE.finditer(self._line):
139 if token.start() != last_end:
140 self.error(
141 'invalid token',
142 token=Token('INVALID',
143 self._line[last_end:token.start()],
144 self.filename, self._line,
145 self._line_number, last_end))
146 last_end = token.end()
147
148 # Omit whitespace and comments now to avoid sprinkling this logic
149 # elsewhere.
150 if token.lastgroup in ('WHITESPACE', 'COMMENT',
151 'CONTINUATION'):
152 continue
153 tokens.append(
154 Token(token.lastgroup, token.group(), self.filename,
155 self._line, self._line_number, token.start()))
156 if last_end != len(self._line):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800157 self.error(
158 'invalid token',
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700159 token=Token('INVALID', self._line[last_end:],
160 self.filename, self._line, self._line_number,
161 last_end))
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800162
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700163 if self._line.endswith('\\'):
164 # This line is not finished yet.
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800165 continue
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700166
167 if tokens:
168 # Return a copy of the token list so that the caller can be free
169 # to modify it.
170 yield tokens[::]
171 tokens.clear()
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800172
173
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800174Atom = collections.namedtuple('Atom', ['argument_index', 'op', 'value'])
175"""A single boolean comparison within a filter expression."""
176
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800177Filter = collections.namedtuple('Filter', ['expression', 'action'])
178"""The result of parsing a DNF filter expression, with its action.
179
180Since the expression is in Disjunctive Normal Form, it is composed of two levels
181of lists, one for disjunctions and the inner one for conjunctions. The elements
182of the inner list are Atoms.
183"""
184
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800185Syscall = collections.namedtuple('Syscall', ['name', 'number'])
186"""A system call."""
187
188ParsedFilterStatement = collections.namedtuple('ParsedFilterStatement',
189 ['syscalls', 'filters'])
190"""The result of parsing a filter statement.
191
192Statements have a list of syscalls, and an associated list of filters that will
193be evaluated sequentially when any of the syscalls is invoked.
194"""
195
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800196FilterStatement = collections.namedtuple('FilterStatement',
197 ['syscall', 'frequency', 'filters'])
198"""The filter list for a particular syscall.
199
200This is a mapping from one syscall to a list of filters that are evaluated
201sequentially. The last filter is always an unconditional action.
202"""
203
204ParsedPolicy = collections.namedtuple('ParsedPolicy',
205 ['default_action', 'filter_statements'])
206"""The result of parsing a minijail .policy file."""
207
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800208
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800209# pylint: disable=too-few-public-methods
210class PolicyParser:
211 """A parser for the Minijail seccomp policy file format."""
212
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800213 def __init__(self,
214 arch,
215 *,
216 kill_action,
217 include_depth_limit=10,
218 override_default_action=None):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800219 self._parser_states = [ParserState("<memory>")]
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800220 self._kill_action = kill_action
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800221 self._include_depth_limit = include_depth_limit
222 self._default_action = self._kill_action
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800223 self._override_default_action = override_default_action
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800224 self._frequency_mapping = collections.defaultdict(int)
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800225 self._arch = arch
226
227 @property
228 def _parser_state(self):
229 return self._parser_states[-1]
230
231 # single-constant = identifier
232 # | numeric-constant
233 # ;
234 def _parse_single_constant(self, token):
235 if token.type == 'IDENTIFIER':
236 if token.value not in self._arch.constants:
237 self._parser_state.error('invalid constant', token=token)
238 single_constant = self._arch.constants[token.value]
239 elif token.type == 'NUMERIC_CONSTANT':
240 try:
241 single_constant = int(token.value, base=0)
242 except ValueError:
243 self._parser_state.error('invalid constant', token=token)
244 else:
245 self._parser_state.error('invalid constant', token=token)
246 if single_constant > self._arch.max_unsigned:
247 self._parser_state.error('unsigned overflow', token=token)
248 elif single_constant < self._arch.min_signed:
249 self._parser_state.error('signed underflow', token=token)
250 elif single_constant < 0:
251 # This converts the constant to an unsigned representation of the
252 # same value, since BPF only uses unsigned values.
253 single_constant = self._arch.truncate_word(single_constant)
254 return single_constant
255
256 # constant = [ '~' ] , '(' , value , ')'
257 # | [ '~' ] , single-constant
258 # ;
259 def _parse_constant(self, tokens):
260 negate = False
261 if tokens[0].type == 'BITWISE_COMPLEMENT':
262 negate = True
263 tokens.pop(0)
264 if not tokens:
265 self._parser_state.error('empty complement')
266 if tokens[0].type == 'BITWISE_COMPLEMENT':
267 self._parser_state.error(
268 'invalid double complement', token=tokens[0])
269 if tokens[0].type == 'LPAREN':
270 last_open_paren = tokens.pop(0)
271 single_value = self.parse_value(tokens)
272 if not tokens or tokens[0].type != 'RPAREN':
273 self._parser_state.error(
274 'unclosed parenthesis', token=last_open_paren)
275 else:
276 single_value = self._parse_single_constant(tokens[0])
277 tokens.pop(0)
278 if negate:
279 single_value = self._arch.truncate_word(~single_value)
280 return single_value
281
282 # value = constant , [ { '|' , constant } ]
283 # ;
284 def parse_value(self, tokens):
285 """Parse constants separated bitwise OR operator |.
286
287 Constants can be:
288
289 - A number that can be parsed with int(..., base=0)
290 - A named constant expression.
291 - A parenthesized, valid constant expression.
292 - A valid constant expression prefixed with the unary bitwise
293 complement operator ~.
294 - A series of valid constant expressions separated by bitwise
295 OR operator |.
296
297 If there is an error parsing any of the constants, the whole process
298 fails.
299 """
300
301 value = 0
302 while tokens:
303 value |= self._parse_constant(tokens)
304 if not tokens or tokens[0].type != 'BITWISE_OR':
305 break
306 tokens.pop(0)
307 else:
308 self._parser_state.error('empty constant')
309 return value
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800310
311 # atom = argument , op , value
312 # ;
313 def _parse_atom(self, tokens):
314 if not tokens:
315 self._parser_state.error('missing argument')
316 argument = tokens.pop(0)
317 if argument.type != 'ARGUMENT':
318 self._parser_state.error('invalid argument', token=argument)
319
320 if not tokens:
321 self._parser_state.error('missing operator')
322 operator = tokens.pop(0)
323 if operator.type != 'OP':
324 self._parser_state.error('invalid operator', token=operator)
325
326 value = self.parse_value(tokens)
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800327 argument_index = int(argument.value[3:])
328 if not (0 <= argument_index < bpf.MAX_SYSCALL_ARGUMENTS):
329 self._parser_state.error('invalid argument', token=argument)
330 return Atom(argument_index, operator.value, value)
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800331
332 # clause = atom , [ { '&&' , atom } ]
333 # ;
334 def _parse_clause(self, tokens):
335 atoms = []
336 while tokens:
337 atoms.append(self._parse_atom(tokens))
338 if not tokens or tokens[0].type != 'AND':
339 break
340 tokens.pop(0)
341 else:
342 self._parser_state.error('empty clause')
343 return atoms
344
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800345 # argument-expression = clause , [ { '||' , clause } ]
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800346 # ;
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800347 def parse_argument_expression(self, tokens):
348 """Parse a argument expression in Disjunctive Normal Form.
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800349
350 Since BPF disallows back jumps, we build the basic blocks in reverse
351 order so that all the jump targets are known by the time we need to
352 reference them.
353 """
354
355 clauses = []
356 while tokens:
357 clauses.append(self._parse_clause(tokens))
358 if not tokens or tokens[0].type != 'OR':
359 break
360 tokens.pop(0)
361 else:
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800362 self._parser_state.error('empty argument expression')
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800363 return clauses
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800364
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800365 # default-action = 'kill-process'
366 # | 'kill-thread'
367 # | 'kill'
368 # | 'trap'
369 # ;
370 def _parse_default_action(self, tokens):
371 if not tokens:
372 self._parser_state.error('missing default action')
373 action_token = tokens.pop(0)
374 if action_token.type != 'ACTION':
375 return self._parser_state.error(
376 'invalid default action', token=action_token)
377 if action_token.value == 'kill-process':
378 return bpf.KillProcess()
379 if action_token.value == 'kill-thread':
380 return bpf.KillThread()
381 if action_token.value == 'kill':
382 return self._kill_action
383 if action_token.value == 'trap':
384 return bpf.Trap()
385 return self._parser_state.error(
386 'invalid permissive default action', token=action_token)
387
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800388 # action = 'allow' | '1'
389 # | 'kill-process'
390 # | 'kill-thread'
391 # | 'kill'
392 # | 'trap'
393 # | 'trace'
394 # | 'log'
395 # | 'return' , single-constant
396 # ;
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800397 def parse_action(self, tokens):
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800398 if not tokens:
399 self._parser_state.error('missing action')
400 action_token = tokens.pop(0)
401 if action_token.type == 'ACTION':
402 if action_token.value == 'allow':
403 return bpf.Allow()
404 if action_token.value == 'kill':
405 return self._kill_action
406 if action_token.value == 'kill-process':
407 return bpf.KillProcess()
408 if action_token.value == 'kill-thread':
409 return bpf.KillThread()
410 if action_token.value == 'trap':
411 return bpf.Trap()
412 if action_token.value == 'trace':
413 return bpf.Trace()
414 if action_token.value == 'log':
415 return bpf.Log()
416 elif action_token.type == 'NUMERIC_CONSTANT':
417 constant = self._parse_single_constant(action_token)
418 if constant == 1:
419 return bpf.Allow()
420 elif action_token.type == 'RETURN':
421 if not tokens:
422 self._parser_state.error('missing return value')
423 return bpf.ReturnErrno(self._parse_single_constant(tokens.pop(0)))
424 return self._parser_state.error('invalid action', token=action_token)
425
426 # single-filter = action
427 # | argument-expression , [ ';' , action ]
428 # ;
429 def _parse_single_filter(self, tokens):
430 if not tokens:
431 self._parser_state.error('missing filter')
432 if tokens[0].type == 'ARGUMENT':
433 # Only argument expressions can start with an ARGUMENT token.
434 argument_expression = self.parse_argument_expression(tokens)
435 if tokens and tokens[0].type == 'SEMICOLON':
436 tokens.pop(0)
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800437 action = self.parse_action(tokens)
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800438 else:
439 action = bpf.Allow()
440 return Filter(argument_expression, action)
441 else:
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800442 return Filter(None, self.parse_action(tokens))
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800443
444 # filter = '{' , single-filter , [ { ',' , single-filter } ] , '}'
445 # | single-filter
446 # ;
447 def parse_filter(self, tokens):
448 """Parse a filter and return a list of Filter objects."""
449 if not tokens:
450 self._parser_state.error('missing filter')
451 filters = []
452 if tokens[0].type == 'LBRACE':
453 opening_brace = tokens.pop(0)
454 while tokens:
455 filters.append(self._parse_single_filter(tokens))
456 if not tokens or tokens[0].type != 'COMMA':
457 break
458 tokens.pop(0)
459 if not tokens or tokens[0].type != 'RBRACE':
460 self._parser_state.error('unclosed brace', token=opening_brace)
461 tokens.pop(0)
462 else:
463 filters.append(self._parse_single_filter(tokens))
464 return filters
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800465
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800466 # key-value-pair = identifier , '=', identifier , [ { ',' , identifier } ]
467 # ;
468 def _parse_key_value_pair(self, tokens):
469 if not tokens:
470 self._parser_state.error('missing key')
471 key = tokens.pop(0)
472 if key.type != 'IDENTIFIER':
473 self._parser_state.error('invalid key', token=key)
474 if not tokens:
475 self._parser_state.error('missing equal')
476 if tokens[0].type != 'EQUAL':
477 self._parser_state.error('invalid equal', token=tokens[0])
478 tokens.pop(0)
479 value_list = []
480 while tokens:
481 value = tokens.pop(0)
482 if value.type != 'IDENTIFIER':
483 self._parser_state.error('invalid value', token=value)
484 value_list.append(value.value)
485 if not tokens or tokens[0].type != 'COMMA':
486 break
487 tokens.pop(0)
488 else:
489 self._parser_state.error('empty value')
490 return (key.value, value_list)
491
492 # metadata = '[' , key-value-pair , [ { ';' , key-value-pair } ] , ']'
493 # ;
494 def _parse_metadata(self, tokens):
495 if not tokens:
496 self._parser_state.error('missing opening bracket')
497 opening_bracket = tokens.pop(0)
498 if opening_bracket.type != 'LBRACKET':
499 self._parser_state.error(
500 'invalid opening bracket', token=opening_bracket)
501 metadata = {}
502 while tokens:
503 first_token = tokens[0]
504 key, value = self._parse_key_value_pair(tokens)
505 if key in metadata:
506 self._parser_state.error(
507 'duplicate metadata key: "%s"' % key, token=first_token)
508 metadata[key] = value
509 if not tokens or tokens[0].type != 'SEMICOLON':
510 break
511 tokens.pop(0)
512 if not tokens or tokens[0].type != 'RBRACKET':
513 self._parser_state.error('unclosed bracket', token=opening_bracket)
514 tokens.pop(0)
515 return metadata
516
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800517 # syscall-descriptor = syscall-name , [ metadata ]
Luis Hector Chavez524da3b2019-03-05 16:44:08 -0800518 # | syscall-group-name , [ metadata ]
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800519 # ;
520 def _parse_syscall_descriptor(self, tokens):
521 if not tokens:
522 self._parser_state.error('missing syscall descriptor')
523 syscall_descriptor = tokens.pop(0)
Matt Delcof68fc8d2019-11-14 16:47:52 -0800524 # `kill` as a syscall name is a special case since kill is also a valid
525 # action and actions have precendence over identifiers.
526 if (syscall_descriptor.type != 'IDENTIFIER' and
527 syscall_descriptor.value != 'kill'):
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800528 self._parser_state.error(
529 'invalid syscall descriptor', token=syscall_descriptor)
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800530 if tokens and tokens[0].type == 'LBRACKET':
531 metadata = self._parse_metadata(tokens)
532 if 'arch' in metadata and self._arch.arch_name not in metadata['arch']:
533 return ()
Luis Hector Chavez524da3b2019-03-05 16:44:08 -0800534 if '@' in syscall_descriptor.value:
535 # This is a syscall group.
536 subtokens = syscall_descriptor.value.split('@')
537 if len(subtokens) != 2:
538 self._parser_state.error(
539 'invalid syscall group name', token=syscall_descriptor)
540 syscall_group_name, syscall_namespace_name = subtokens
541 if syscall_namespace_name not in self._arch.syscall_groups:
542 self._parser_state.error(
543 'nonexistent syscall group namespace',
544 token=syscall_descriptor)
545 syscall_namespace = self._arch.syscall_groups[
546 syscall_namespace_name]
547 if syscall_group_name not in syscall_namespace:
548 self._parser_state.error(
549 'nonexistent syscall group', token=syscall_descriptor)
550 return (Syscall(name, self._arch.syscalls[name])
551 for name in syscall_namespace[syscall_group_name])
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800552 if syscall_descriptor.value not in self._arch.syscalls:
553 self._parser_state.error(
554 'nonexistent syscall', token=syscall_descriptor)
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800555 return (Syscall(syscall_descriptor.value,
556 self._arch.syscalls[syscall_descriptor.value]), )
557
558 # filter-statement = '{' , syscall-descriptor , [ { ',', syscall-descriptor } ] , '}' ,
559 # ':' , filter
560 # | syscall-descriptor , ':' , filter
561 # ;
562 def parse_filter_statement(self, tokens):
563 """Parse a filter statement and return a ParsedFilterStatement."""
564 if not tokens:
565 self._parser_state.error('empty filter statement')
566 syscall_descriptors = []
567 if tokens[0].type == 'LBRACE':
568 opening_brace = tokens.pop(0)
569 while tokens:
570 syscall_descriptors.extend(
571 self._parse_syscall_descriptor(tokens))
572 if not tokens or tokens[0].type != 'COMMA':
573 break
574 tokens.pop(0)
575 if not tokens or tokens[0].type != 'RBRACE':
576 self._parser_state.error('unclosed brace', token=opening_brace)
577 tokens.pop(0)
578 else:
579 syscall_descriptors.extend(self._parse_syscall_descriptor(tokens))
580 if not tokens:
581 self._parser_state.error('missing colon')
582 if tokens[0].type != 'COLON':
583 self._parser_state.error('invalid colon', token=tokens[0])
584 tokens.pop(0)
585 parsed_filter = self.parse_filter(tokens)
586 if not syscall_descriptors:
587 return None
588 return ParsedFilterStatement(tuple(syscall_descriptors), parsed_filter)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800589
590 # include-statement = '@include' , posix-path
591 # ;
592 def _parse_include_statement(self, tokens):
593 if not tokens:
594 self._parser_state.error('empty filter statement')
595 if tokens[0].type != 'INCLUDE':
596 self._parser_state.error('invalid include', token=tokens[0])
597 tokens.pop(0)
598 if not tokens:
599 self._parser_state.error('empty include path')
600 include_path = tokens.pop(0)
601 if include_path.type != 'PATH':
602 self._parser_state.error(
603 'invalid include path', token=include_path)
604 if len(self._parser_states) == self._include_depth_limit:
605 self._parser_state.error('@include statement nested too deep')
606 include_filename = os.path.normpath(
607 os.path.join(
608 os.path.dirname(self._parser_state.filename),
609 include_path.value))
610 if not os.path.isfile(include_filename):
611 self._parser_state.error(
612 'Could not @include %s' % include_filename, token=include_path)
613 return self._parse_policy_file(include_filename)
614
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800615 def _parse_frequency_file(self, filename):
616 self._parser_states.append(ParserState(filename))
617 try:
618 frequency_mapping = collections.defaultdict(int)
619 with open(filename) as frequency_file:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700620 for tokens in self._parser_state.tokenize(frequency_file):
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800621 syscall_numbers = self._parse_syscall_descriptor(tokens)
622 if not tokens:
623 self._parser_state.error('missing colon')
624 if tokens[0].type != 'COLON':
625 self._parser_state.error(
626 'invalid colon', token=tokens[0])
627 tokens.pop(0)
628
629 if not tokens:
630 self._parser_state.error('missing number')
631 number = tokens.pop(0)
632 if number.type != 'NUMERIC_CONSTANT':
633 self._parser_state.error(
634 'invalid number', token=number)
635 number_value = int(number.value, base=0)
636 if number_value < 0:
637 self._parser_state.error(
638 'invalid number', token=number)
639
640 for syscall_number in syscall_numbers:
641 frequency_mapping[syscall_number] += number_value
642 return frequency_mapping
643 finally:
644 self._parser_states.pop()
645
646 # frequency-statement = '@frequency' , posix-path
647 # ;
648 def _parse_frequency_statement(self, tokens):
649 if not tokens:
650 self._parser_state.error('empty frequency statement')
651 if tokens[0].type != 'FREQUENCY':
652 self._parser_state.error('invalid frequency', token=tokens[0])
653 tokens.pop(0)
654 if not tokens:
655 self._parser_state.error('empty frequency path')
656 frequency_path = tokens.pop(0)
657 if frequency_path.type != 'PATH':
658 self._parser_state.error(
659 'invalid frequency path', token=frequency_path)
660 frequency_filename = os.path.normpath(
661 os.path.join(
662 os.path.dirname(self._parser_state.filename),
663 frequency_path.value))
664 if not os.path.isfile(frequency_filename):
665 self._parser_state.error(
666 'Could not open frequency file %s' % frequency_filename,
667 token=frequency_path)
668 return self._parse_frequency_file(frequency_filename)
669
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800670 # default-statement = '@default' , default-action
671 # ;
672 def _parse_default_statement(self, tokens):
673 if not tokens:
674 self._parser_state.error('empty default statement')
675 if tokens[0].type != 'DEFAULT':
676 self._parser_state.error('invalid default', token=tokens[0])
677 tokens.pop(0)
678 if not tokens:
679 self._parser_state.error('empty action')
680 return self._parse_default_action(tokens)
681
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800682 def _parse_policy_file(self, filename):
683 self._parser_states.append(ParserState(filename))
684 try:
685 statements = []
686 with open(filename) as policy_file:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700687 for tokens in self._parser_state.tokenize(policy_file):
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800688 if tokens[0].type == 'INCLUDE':
689 statements.extend(
690 self._parse_include_statement(tokens))
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800691 elif tokens[0].type == 'FREQUENCY':
692 for syscall_number, frequency in self._parse_frequency_statement(
693 tokens).items():
694 self._frequency_mapping[
695 syscall_number] += frequency
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800696 elif tokens[0].type == 'DEFAULT':
697 self._default_action = self._parse_default_statement(
698 tokens)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800699 else:
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800700 statement = self.parse_filter_statement(tokens)
701 if statement is None:
702 # If all the syscalls in the statement are for
703 # another arch, skip the whole statement.
704 continue
705 statements.append(statement)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800706
707 if tokens:
708 self._parser_state.error(
709 'extra tokens', token=tokens[0])
710 return statements
711 finally:
712 self._parser_states.pop()
713
714 def parse_file(self, filename):
715 """Parse a file and return the list of FilterStatements."""
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800716 self._frequency_mapping = collections.defaultdict(int)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800717 try:
718 statements = [x for x in self._parse_policy_file(filename)]
719 except RecursionError:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700720 raise ParseException(
721 'recursion limit exceeded',
722 filename,
723 line=self._parser_states[-1].line)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800724
725 # Collapse statements into a single syscall-to-filter-list.
726 syscall_filter_mapping = {}
727 filter_statements = []
728 for syscalls, filters in statements:
729 for syscall in syscalls:
730 if syscall not in syscall_filter_mapping:
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800731 filter_statements.append(
732 FilterStatement(
733 syscall, self._frequency_mapping.get(syscall, 1),
734 []))
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800735 syscall_filter_mapping[syscall] = filter_statements[-1]
736 syscall_filter_mapping[syscall].filters.extend(filters)
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800737 default_action = self._override_default_action or self._default_action
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800738 for filter_statement in filter_statements:
739 unconditional_actions_suffix = list(
740 itertools.dropwhile(lambda filt: filt.expression is not None,
741 filter_statement.filters))
742 if len(unconditional_actions_suffix) == 1:
743 # The last filter already has an unconditional action, no need
744 # to add another one.
745 continue
746 if len(unconditional_actions_suffix) > 1:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700747 raise ParseException(
748 ('Syscall %s (number %d) already had '
749 'an unconditional action applied') %
750 (filter_statement.syscall.name,
751 filter_statement.syscall.number),
752 filename,
753 line=self._parser_states[-1].line)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800754 assert not unconditional_actions_suffix
755 filter_statement.filters.append(
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800756 Filter(expression=None, action=default_action))
757 return ParsedPolicy(default_action, filter_statements)