blob: 0db0f62f3b8153759800b33bfb36196d9f06973b [file] [log] [blame]
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -08001#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# Copyright (C) 2018 The Android Open Source Project
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17"""A parser for the Minijail policy file."""
18
19from __future__ import absolute_import
20from __future__ import division
21from __future__ import print_function
22
23import collections
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -080024import itertools
25import os.path
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080026import re
27
Matt Delcoa12687b2020-02-07 17:12:47 -080028try:
29 import bpf
30except ImportError:
31 from minijail import bpf
32
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -080033
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070034Token = collections.namedtuple(
Luis Hector Chavez4228eff2019-12-11 19:07:13 -080035 'Token', ['type', 'value', 'filename', 'line', 'line_number', 'column'])
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080036
37# A regex that can tokenize a Minijail policy file line.
38_TOKEN_SPECIFICATION = (
39 ('COMMENT', r'#.*$'),
40 ('WHITESPACE', r'\s+'),
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070041 ('CONTINUATION', r'\\$'),
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080042 ('DEFAULT', r'@default\b'),
43 ('INCLUDE', r'@include\b'),
44 ('FREQUENCY', r'@frequency\b'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080045 ('PATH', r'(?:\.)?/\S+'),
46 ('NUMERIC_CONSTANT', r'-?0[xX][0-9a-fA-F]+|-?0[Oo][0-7]+|-?[0-9]+'),
47 ('COLON', r':'),
48 ('SEMICOLON', r';'),
49 ('COMMA', r','),
50 ('BITWISE_COMPLEMENT', r'~'),
51 ('LPAREN', r'\('),
52 ('RPAREN', r'\)'),
53 ('LBRACE', r'\{'),
54 ('RBRACE', r'\}'),
55 ('RBRACKET', r'\]'),
56 ('LBRACKET', r'\['),
57 ('OR', r'\|\|'),
58 ('AND', r'&&'),
59 ('BITWISE_OR', r'\|'),
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080060 ('OP', r'&|\bin\b|==|!=|<=|<|>=|>'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080061 ('EQUAL', r'='),
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080062 ('ARGUMENT', r'\barg[0-9]+\b'),
63 ('RETURN', r'\breturn\b'),
64 ('ACTION',
65 r'\ballow\b|\bkill-process\b|\bkill-thread\b|\bkill\b|\btrap\b|'
Luis Héctor Chávez59a64492021-01-03 05:46:47 -080066 r'\btrace\b|\blog\b|\buser-notify\b'
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080067 ),
Luis Hector Chavez524da3b2019-03-05 16:44:08 -080068 ('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9-@]*'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080069)
70_TOKEN_RE = re.compile('|'.join(
71 r'(?P<%s>%s)' % pair for pair in _TOKEN_SPECIFICATION))
72
73
74class ParseException(Exception):
75 """An exception that is raised when parsing fails."""
76
77 # pylint: disable=too-many-arguments
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070078 def __init__(self,
79 message,
80 filename,
81 *,
82 line='',
83 line_number=1,
84 token=None):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080085 if token:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070086 line = token.line
87 line_number = token.line_number
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080088 column = token.column
89 length = len(token.value)
90 else:
91 column = len(line)
92 length = 1
93
94 message = ('%s(%d:%d): %s') % (filename, line_number, column + 1,
95 message)
96 message += '\n %s' % line
97 message += '\n %s%s' % (' ' * column, '^' * length)
98 super().__init__(message)
99
100
101class ParserState:
102 """Stores the state of the Parser to provide better diagnostics."""
103
104 def __init__(self, filename):
105 self._filename = filename
106 self._line = ''
107 self._line_number = 0
108
109 @property
110 def filename(self):
111 """Return the name of the file being processed."""
112 return self._filename
113
114 @property
115 def line(self):
116 """Return the current line being processed."""
117 return self._line
118
119 @property
120 def line_number(self):
121 """Return the current line number being processed."""
122 return self._line_number
123
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800124 def error(self, message, token=None):
125 """Raise a ParserException with the provided message."""
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700126 raise ParseException(
127 message,
128 self.filename,
129 line=self._line,
130 line_number=self._line_number,
131 token=token)
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800132
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700133 def tokenize(self, lines):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800134 """Return a list of tokens for the current line."""
135 tokens = []
136
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700137 for line_number, line in enumerate(lines):
138 self._line_number = line_number + 1
139 self._line = line.rstrip('\r\n')
140
141 last_end = 0
142 for token in _TOKEN_RE.finditer(self._line):
143 if token.start() != last_end:
144 self.error(
145 'invalid token',
146 token=Token('INVALID',
147 self._line[last_end:token.start()],
148 self.filename, self._line,
149 self._line_number, last_end))
150 last_end = token.end()
151
152 # Omit whitespace and comments now to avoid sprinkling this logic
153 # elsewhere.
154 if token.lastgroup in ('WHITESPACE', 'COMMENT',
155 'CONTINUATION'):
156 continue
157 tokens.append(
158 Token(token.lastgroup, token.group(), self.filename,
159 self._line, self._line_number, token.start()))
160 if last_end != len(self._line):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800161 self.error(
162 'invalid token',
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700163 token=Token('INVALID', self._line[last_end:],
164 self.filename, self._line, self._line_number,
165 last_end))
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800166
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700167 if self._line.endswith('\\'):
168 # This line is not finished yet.
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800169 continue
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700170
171 if tokens:
172 # Return a copy of the token list so that the caller can be free
173 # to modify it.
174 yield tokens[::]
175 tokens.clear()
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800176
177
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800178Atom = collections.namedtuple('Atom', ['argument_index', 'op', 'value'])
179"""A single boolean comparison within a filter expression."""
180
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800181Filter = collections.namedtuple('Filter', ['expression', 'action'])
182"""The result of parsing a DNF filter expression, with its action.
183
184Since the expression is in Disjunctive Normal Form, it is composed of two levels
185of lists, one for disjunctions and the inner one for conjunctions. The elements
186of the inner list are Atoms.
187"""
188
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800189Syscall = collections.namedtuple('Syscall', ['name', 'number'])
190"""A system call."""
191
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800192ParsedFilterStatement = collections.namedtuple(
193 'ParsedFilterStatement', ['syscalls', 'filters', 'token'])
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800194"""The result of parsing a filter statement.
195
196Statements have a list of syscalls, and an associated list of filters that will
197be evaluated sequentially when any of the syscalls is invoked.
198"""
199
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800200FilterStatement = collections.namedtuple('FilterStatement',
201 ['syscall', 'frequency', 'filters'])
202"""The filter list for a particular syscall.
203
204This is a mapping from one syscall to a list of filters that are evaluated
205sequentially. The last filter is always an unconditional action.
206"""
207
208ParsedPolicy = collections.namedtuple('ParsedPolicy',
209 ['default_action', 'filter_statements'])
210"""The result of parsing a minijail .policy file."""
211
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800212
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800213# pylint: disable=too-few-public-methods
214class PolicyParser:
215 """A parser for the Minijail seccomp policy file format."""
216
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800217 def __init__(self,
218 arch,
219 *,
220 kill_action,
221 include_depth_limit=10,
222 override_default_action=None):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800223 self._parser_states = [ParserState("<memory>")]
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800224 self._kill_action = kill_action
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800225 self._include_depth_limit = include_depth_limit
226 self._default_action = self._kill_action
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800227 self._override_default_action = override_default_action
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800228 self._frequency_mapping = collections.defaultdict(int)
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800229 self._arch = arch
230
231 @property
232 def _parser_state(self):
233 return self._parser_states[-1]
234
235 # single-constant = identifier
236 # | numeric-constant
237 # ;
238 def _parse_single_constant(self, token):
239 if token.type == 'IDENTIFIER':
240 if token.value not in self._arch.constants:
241 self._parser_state.error('invalid constant', token=token)
242 single_constant = self._arch.constants[token.value]
243 elif token.type == 'NUMERIC_CONSTANT':
244 try:
245 single_constant = int(token.value, base=0)
246 except ValueError:
247 self._parser_state.error('invalid constant', token=token)
248 else:
249 self._parser_state.error('invalid constant', token=token)
250 if single_constant > self._arch.max_unsigned:
251 self._parser_state.error('unsigned overflow', token=token)
252 elif single_constant < self._arch.min_signed:
253 self._parser_state.error('signed underflow', token=token)
254 elif single_constant < 0:
255 # This converts the constant to an unsigned representation of the
256 # same value, since BPF only uses unsigned values.
257 single_constant = self._arch.truncate_word(single_constant)
258 return single_constant
259
260 # constant = [ '~' ] , '(' , value , ')'
261 # | [ '~' ] , single-constant
262 # ;
263 def _parse_constant(self, tokens):
264 negate = False
265 if tokens[0].type == 'BITWISE_COMPLEMENT':
266 negate = True
267 tokens.pop(0)
268 if not tokens:
269 self._parser_state.error('empty complement')
270 if tokens[0].type == 'BITWISE_COMPLEMENT':
271 self._parser_state.error(
272 'invalid double complement', token=tokens[0])
273 if tokens[0].type == 'LPAREN':
274 last_open_paren = tokens.pop(0)
275 single_value = self.parse_value(tokens)
276 if not tokens or tokens[0].type != 'RPAREN':
277 self._parser_state.error(
278 'unclosed parenthesis', token=last_open_paren)
279 else:
280 single_value = self._parse_single_constant(tokens[0])
281 tokens.pop(0)
282 if negate:
283 single_value = self._arch.truncate_word(~single_value)
284 return single_value
285
286 # value = constant , [ { '|' , constant } ]
287 # ;
288 def parse_value(self, tokens):
289 """Parse constants separated bitwise OR operator |.
290
291 Constants can be:
292
293 - A number that can be parsed with int(..., base=0)
294 - A named constant expression.
295 - A parenthesized, valid constant expression.
296 - A valid constant expression prefixed with the unary bitwise
297 complement operator ~.
298 - A series of valid constant expressions separated by bitwise
299 OR operator |.
300
301 If there is an error parsing any of the constants, the whole process
302 fails.
303 """
304
305 value = 0
306 while tokens:
307 value |= self._parse_constant(tokens)
308 if not tokens or tokens[0].type != 'BITWISE_OR':
309 break
310 tokens.pop(0)
311 else:
312 self._parser_state.error('empty constant')
313 return value
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800314
315 # atom = argument , op , value
316 # ;
317 def _parse_atom(self, tokens):
318 if not tokens:
319 self._parser_state.error('missing argument')
320 argument = tokens.pop(0)
321 if argument.type != 'ARGUMENT':
322 self._parser_state.error('invalid argument', token=argument)
323
324 if not tokens:
325 self._parser_state.error('missing operator')
326 operator = tokens.pop(0)
327 if operator.type != 'OP':
328 self._parser_state.error('invalid operator', token=operator)
329
330 value = self.parse_value(tokens)
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800331 argument_index = int(argument.value[3:])
332 if not (0 <= argument_index < bpf.MAX_SYSCALL_ARGUMENTS):
333 self._parser_state.error('invalid argument', token=argument)
334 return Atom(argument_index, operator.value, value)
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800335
336 # clause = atom , [ { '&&' , atom } ]
337 # ;
338 def _parse_clause(self, tokens):
339 atoms = []
340 while tokens:
341 atoms.append(self._parse_atom(tokens))
342 if not tokens or tokens[0].type != 'AND':
343 break
344 tokens.pop(0)
345 else:
346 self._parser_state.error('empty clause')
347 return atoms
348
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800349 # argument-expression = clause , [ { '||' , clause } ]
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800350 # ;
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800351 def parse_argument_expression(self, tokens):
352 """Parse a argument expression in Disjunctive Normal Form.
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800353
354 Since BPF disallows back jumps, we build the basic blocks in reverse
355 order so that all the jump targets are known by the time we need to
356 reference them.
357 """
358
359 clauses = []
360 while tokens:
361 clauses.append(self._parse_clause(tokens))
362 if not tokens or tokens[0].type != 'OR':
363 break
364 tokens.pop(0)
365 else:
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800366 self._parser_state.error('empty argument expression')
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800367 return clauses
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800368
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800369 # default-action = 'kill-process'
370 # | 'kill-thread'
371 # | 'kill'
372 # | 'trap'
Luis Héctor Chávez59a64492021-01-03 05:46:47 -0800373 # | 'user-notify'
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800374 # ;
375 def _parse_default_action(self, tokens):
376 if not tokens:
377 self._parser_state.error('missing default action')
378 action_token = tokens.pop(0)
379 if action_token.type != 'ACTION':
380 return self._parser_state.error(
381 'invalid default action', token=action_token)
382 if action_token.value == 'kill-process':
383 return bpf.KillProcess()
384 if action_token.value == 'kill-thread':
385 return bpf.KillThread()
386 if action_token.value == 'kill':
387 return self._kill_action
388 if action_token.value == 'trap':
389 return bpf.Trap()
Luis Héctor Chávez59a64492021-01-03 05:46:47 -0800390 if action_token.value == 'user-notify':
391 return bpf.UserNotify()
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800392 return self._parser_state.error(
393 'invalid permissive default action', token=action_token)
394
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800395 # action = 'allow' | '1'
396 # | 'kill-process'
397 # | 'kill-thread'
398 # | 'kill'
399 # | 'trap'
400 # | 'trace'
401 # | 'log'
Luis Héctor Chávez59a64492021-01-03 05:46:47 -0800402 # | 'user-notify'
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800403 # | 'return' , single-constant
404 # ;
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800405 def parse_action(self, tokens):
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800406 if not tokens:
407 self._parser_state.error('missing action')
408 action_token = tokens.pop(0)
409 if action_token.type == 'ACTION':
410 if action_token.value == 'allow':
411 return bpf.Allow()
412 if action_token.value == 'kill':
413 return self._kill_action
414 if action_token.value == 'kill-process':
415 return bpf.KillProcess()
416 if action_token.value == 'kill-thread':
417 return bpf.KillThread()
418 if action_token.value == 'trap':
419 return bpf.Trap()
420 if action_token.value == 'trace':
421 return bpf.Trace()
Luis Héctor Chávez59a64492021-01-03 05:46:47 -0800422 if action_token.value == 'user-notify':
423 return bpf.UserNotify()
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800424 if action_token.value == 'log':
425 return bpf.Log()
426 elif action_token.type == 'NUMERIC_CONSTANT':
427 constant = self._parse_single_constant(action_token)
428 if constant == 1:
429 return bpf.Allow()
430 elif action_token.type == 'RETURN':
431 if not tokens:
432 self._parser_state.error('missing return value')
433 return bpf.ReturnErrno(self._parse_single_constant(tokens.pop(0)))
434 return self._parser_state.error('invalid action', token=action_token)
435
436 # single-filter = action
437 # | argument-expression , [ ';' , action ]
438 # ;
439 def _parse_single_filter(self, tokens):
440 if not tokens:
441 self._parser_state.error('missing filter')
442 if tokens[0].type == 'ARGUMENT':
443 # Only argument expressions can start with an ARGUMENT token.
444 argument_expression = self.parse_argument_expression(tokens)
445 if tokens and tokens[0].type == 'SEMICOLON':
446 tokens.pop(0)
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800447 action = self.parse_action(tokens)
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800448 else:
449 action = bpf.Allow()
450 return Filter(argument_expression, action)
451 else:
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800452 return Filter(None, self.parse_action(tokens))
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800453
454 # filter = '{' , single-filter , [ { ',' , single-filter } ] , '}'
455 # | single-filter
456 # ;
457 def parse_filter(self, tokens):
458 """Parse a filter and return a list of Filter objects."""
459 if not tokens:
460 self._parser_state.error('missing filter')
461 filters = []
462 if tokens[0].type == 'LBRACE':
463 opening_brace = tokens.pop(0)
464 while tokens:
465 filters.append(self._parse_single_filter(tokens))
466 if not tokens or tokens[0].type != 'COMMA':
467 break
468 tokens.pop(0)
469 if not tokens or tokens[0].type != 'RBRACE':
470 self._parser_state.error('unclosed brace', token=opening_brace)
471 tokens.pop(0)
472 else:
473 filters.append(self._parse_single_filter(tokens))
474 return filters
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800475
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800476 # key-value-pair = identifier , '=', identifier , [ { ',' , identifier } ]
477 # ;
478 def _parse_key_value_pair(self, tokens):
479 if not tokens:
480 self._parser_state.error('missing key')
481 key = tokens.pop(0)
482 if key.type != 'IDENTIFIER':
483 self._parser_state.error('invalid key', token=key)
484 if not tokens:
485 self._parser_state.error('missing equal')
486 if tokens[0].type != 'EQUAL':
487 self._parser_state.error('invalid equal', token=tokens[0])
488 tokens.pop(0)
489 value_list = []
490 while tokens:
491 value = tokens.pop(0)
492 if value.type != 'IDENTIFIER':
493 self._parser_state.error('invalid value', token=value)
494 value_list.append(value.value)
495 if not tokens or tokens[0].type != 'COMMA':
496 break
497 tokens.pop(0)
498 else:
499 self._parser_state.error('empty value')
500 return (key.value, value_list)
501
502 # metadata = '[' , key-value-pair , [ { ';' , key-value-pair } ] , ']'
503 # ;
504 def _parse_metadata(self, tokens):
505 if not tokens:
506 self._parser_state.error('missing opening bracket')
507 opening_bracket = tokens.pop(0)
508 if opening_bracket.type != 'LBRACKET':
509 self._parser_state.error(
510 'invalid opening bracket', token=opening_bracket)
511 metadata = {}
512 while tokens:
513 first_token = tokens[0]
514 key, value = self._parse_key_value_pair(tokens)
515 if key in metadata:
516 self._parser_state.error(
517 'duplicate metadata key: "%s"' % key, token=first_token)
518 metadata[key] = value
519 if not tokens or tokens[0].type != 'SEMICOLON':
520 break
521 tokens.pop(0)
522 if not tokens or tokens[0].type != 'RBRACKET':
523 self._parser_state.error('unclosed bracket', token=opening_bracket)
524 tokens.pop(0)
525 return metadata
526
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800527 # syscall-descriptor = syscall-name , [ metadata ]
Luis Hector Chavez524da3b2019-03-05 16:44:08 -0800528 # | syscall-group-name , [ metadata ]
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800529 # ;
530 def _parse_syscall_descriptor(self, tokens):
531 if not tokens:
532 self._parser_state.error('missing syscall descriptor')
533 syscall_descriptor = tokens.pop(0)
Matt Delcof68fc8d2019-11-14 16:47:52 -0800534 # `kill` as a syscall name is a special case since kill is also a valid
535 # action and actions have precendence over identifiers.
536 if (syscall_descriptor.type != 'IDENTIFIER' and
537 syscall_descriptor.value != 'kill'):
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800538 self._parser_state.error(
539 'invalid syscall descriptor', token=syscall_descriptor)
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800540 if tokens and tokens[0].type == 'LBRACKET':
541 metadata = self._parse_metadata(tokens)
542 if 'arch' in metadata and self._arch.arch_name not in metadata['arch']:
543 return ()
Luis Hector Chavez524da3b2019-03-05 16:44:08 -0800544 if '@' in syscall_descriptor.value:
545 # This is a syscall group.
546 subtokens = syscall_descriptor.value.split('@')
547 if len(subtokens) != 2:
548 self._parser_state.error(
549 'invalid syscall group name', token=syscall_descriptor)
550 syscall_group_name, syscall_namespace_name = subtokens
551 if syscall_namespace_name not in self._arch.syscall_groups:
552 self._parser_state.error(
553 'nonexistent syscall group namespace',
554 token=syscall_descriptor)
555 syscall_namespace = self._arch.syscall_groups[
556 syscall_namespace_name]
557 if syscall_group_name not in syscall_namespace:
558 self._parser_state.error(
559 'nonexistent syscall group', token=syscall_descriptor)
560 return (Syscall(name, self._arch.syscalls[name])
561 for name in syscall_namespace[syscall_group_name])
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800562 if syscall_descriptor.value not in self._arch.syscalls:
563 self._parser_state.error(
564 'nonexistent syscall', token=syscall_descriptor)
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800565 return (Syscall(syscall_descriptor.value,
566 self._arch.syscalls[syscall_descriptor.value]), )
567
568 # filter-statement = '{' , syscall-descriptor , [ { ',', syscall-descriptor } ] , '}' ,
569 # ':' , filter
570 # | syscall-descriptor , ':' , filter
571 # ;
572 def parse_filter_statement(self, tokens):
573 """Parse a filter statement and return a ParsedFilterStatement."""
574 if not tokens:
575 self._parser_state.error('empty filter statement')
576 syscall_descriptors = []
577 if tokens[0].type == 'LBRACE':
578 opening_brace = tokens.pop(0)
579 while tokens:
580 syscall_descriptors.extend(
581 self._parse_syscall_descriptor(tokens))
582 if not tokens or tokens[0].type != 'COMMA':
583 break
584 tokens.pop(0)
585 if not tokens or tokens[0].type != 'RBRACE':
586 self._parser_state.error('unclosed brace', token=opening_brace)
587 tokens.pop(0)
588 else:
589 syscall_descriptors.extend(self._parse_syscall_descriptor(tokens))
590 if not tokens:
591 self._parser_state.error('missing colon')
592 if tokens[0].type != 'COLON':
593 self._parser_state.error('invalid colon', token=tokens[0])
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800594 # Given that there can be multiple syscalls and filters in a single
595 # filter statement, use the colon token as the anchor for error location
596 # purposes.
597 colon_token = tokens.pop(0)
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800598 parsed_filter = self.parse_filter(tokens)
599 if not syscall_descriptors:
600 return None
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800601 return ParsedFilterStatement(
602 tuple(syscall_descriptors), parsed_filter, colon_token)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800603
604 # include-statement = '@include' , posix-path
605 # ;
606 def _parse_include_statement(self, tokens):
607 if not tokens:
608 self._parser_state.error('empty filter statement')
609 if tokens[0].type != 'INCLUDE':
610 self._parser_state.error('invalid include', token=tokens[0])
611 tokens.pop(0)
612 if not tokens:
613 self._parser_state.error('empty include path')
614 include_path = tokens.pop(0)
615 if include_path.type != 'PATH':
616 self._parser_state.error(
617 'invalid include path', token=include_path)
618 if len(self._parser_states) == self._include_depth_limit:
619 self._parser_state.error('@include statement nested too deep')
620 include_filename = os.path.normpath(
621 os.path.join(
622 os.path.dirname(self._parser_state.filename),
623 include_path.value))
624 if not os.path.isfile(include_filename):
625 self._parser_state.error(
626 'Could not @include %s' % include_filename, token=include_path)
627 return self._parse_policy_file(include_filename)
628
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800629 def _parse_frequency_file(self, filename):
630 self._parser_states.append(ParserState(filename))
631 try:
632 frequency_mapping = collections.defaultdict(int)
633 with open(filename) as frequency_file:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700634 for tokens in self._parser_state.tokenize(frequency_file):
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800635 syscall_numbers = self._parse_syscall_descriptor(tokens)
636 if not tokens:
637 self._parser_state.error('missing colon')
638 if tokens[0].type != 'COLON':
639 self._parser_state.error(
640 'invalid colon', token=tokens[0])
641 tokens.pop(0)
642
643 if not tokens:
644 self._parser_state.error('missing number')
645 number = tokens.pop(0)
646 if number.type != 'NUMERIC_CONSTANT':
647 self._parser_state.error(
648 'invalid number', token=number)
649 number_value = int(number.value, base=0)
650 if number_value < 0:
651 self._parser_state.error(
652 'invalid number', token=number)
653
654 for syscall_number in syscall_numbers:
655 frequency_mapping[syscall_number] += number_value
656 return frequency_mapping
657 finally:
658 self._parser_states.pop()
659
660 # frequency-statement = '@frequency' , posix-path
661 # ;
662 def _parse_frequency_statement(self, tokens):
663 if not tokens:
664 self._parser_state.error('empty frequency statement')
665 if tokens[0].type != 'FREQUENCY':
666 self._parser_state.error('invalid frequency', token=tokens[0])
667 tokens.pop(0)
668 if not tokens:
669 self._parser_state.error('empty frequency path')
670 frequency_path = tokens.pop(0)
671 if frequency_path.type != 'PATH':
672 self._parser_state.error(
673 'invalid frequency path', token=frequency_path)
674 frequency_filename = os.path.normpath(
675 os.path.join(
676 os.path.dirname(self._parser_state.filename),
677 frequency_path.value))
678 if not os.path.isfile(frequency_filename):
679 self._parser_state.error(
680 'Could not open frequency file %s' % frequency_filename,
681 token=frequency_path)
682 return self._parse_frequency_file(frequency_filename)
683
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800684 # default-statement = '@default' , default-action
685 # ;
686 def _parse_default_statement(self, tokens):
687 if not tokens:
688 self._parser_state.error('empty default statement')
689 if tokens[0].type != 'DEFAULT':
690 self._parser_state.error('invalid default', token=tokens[0])
691 tokens.pop(0)
692 if not tokens:
693 self._parser_state.error('empty action')
694 return self._parse_default_action(tokens)
695
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800696 def _parse_policy_file(self, filename):
697 self._parser_states.append(ParserState(filename))
698 try:
699 statements = []
700 with open(filename) as policy_file:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700701 for tokens in self._parser_state.tokenize(policy_file):
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800702 if tokens[0].type == 'INCLUDE':
703 statements.extend(
704 self._parse_include_statement(tokens))
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800705 elif tokens[0].type == 'FREQUENCY':
706 for syscall_number, frequency in self._parse_frequency_statement(
707 tokens).items():
708 self._frequency_mapping[
709 syscall_number] += frequency
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800710 elif tokens[0].type == 'DEFAULT':
711 self._default_action = self._parse_default_statement(
712 tokens)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800713 else:
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800714 statement = self.parse_filter_statement(tokens)
715 if statement is None:
716 # If all the syscalls in the statement are for
717 # another arch, skip the whole statement.
718 continue
719 statements.append(statement)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800720
721 if tokens:
722 self._parser_state.error(
723 'extra tokens', token=tokens[0])
724 return statements
725 finally:
726 self._parser_states.pop()
727
728 def parse_file(self, filename):
729 """Parse a file and return the list of FilterStatements."""
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800730 self._frequency_mapping = collections.defaultdict(int)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800731 try:
732 statements = [x for x in self._parse_policy_file(filename)]
733 except RecursionError:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700734 raise ParseException(
735 'recursion limit exceeded',
736 filename,
737 line=self._parser_states[-1].line)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800738
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800739 # Collapse statements into a single syscall-to-filter-list, remembering
740 # the token for each filter for better diagnostics.
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800741 syscall_filter_mapping = {}
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800742 syscall_filter_definitions = {}
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800743 filter_statements = []
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800744 for syscalls, filters, token in statements:
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800745 for syscall in syscalls:
746 if syscall not in syscall_filter_mapping:
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800747 filter_statements.append(
748 FilterStatement(
749 syscall, self._frequency_mapping.get(syscall, 1),
750 []))
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800751 syscall_filter_mapping[syscall] = filter_statements[-1]
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800752 syscall_filter_definitions[syscall] = []
753 for filt in filters:
754 syscall_filter_mapping[syscall].filters.append(filt)
755 syscall_filter_definitions[syscall].append(token)
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800756 default_action = self._override_default_action or self._default_action
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800757 for filter_statement in filter_statements:
758 unconditional_actions_suffix = list(
759 itertools.dropwhile(lambda filt: filt.expression is not None,
760 filter_statement.filters))
761 if len(unconditional_actions_suffix) == 1:
762 # The last filter already has an unconditional action, no need
763 # to add another one.
764 continue
765 if len(unconditional_actions_suffix) > 1:
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800766 previous_definition_token = syscall_filter_definitions[
767 filter_statement.syscall][
768 -len(unconditional_actions_suffix)]
769 current_definition_token = syscall_filter_definitions[
770 filter_statement.syscall][
771 -len(unconditional_actions_suffix) + 1]
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700772 raise ParseException(
773 ('Syscall %s (number %d) already had '
774 'an unconditional action applied') %
775 (filter_statement.syscall.name,
776 filter_statement.syscall.number),
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800777 filename=current_definition_token.filename,
778 token=current_definition_token) from ParseException(
779 'Previous definition',
780 filename=previous_definition_token.filename,
781 token=previous_definition_token)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800782 assert not unconditional_actions_suffix
783 filter_statement.filters.append(
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800784 Filter(expression=None, action=default_action))
785 return ParsedPolicy(default_action, filter_statements)