blob: bd59a45da2e62d9875cdc35b1b621ab0485b785e [file] [log] [blame]
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -08001#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# Copyright (C) 2018 The Android Open Source Project
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17"""A parser for the Minijail policy file."""
18
19from __future__ import absolute_import
20from __future__ import division
21from __future__ import print_function
22
23import collections
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -080024import itertools
25import os.path
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080026import re
27
Matt Delcoa12687b2020-02-07 17:12:47 -080028try:
29 import bpf
30except ImportError:
31 from minijail import bpf
32
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -080033
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070034Token = collections.namedtuple(
Luis Hector Chavez4228eff2019-12-11 19:07:13 -080035 'Token', ['type', 'value', 'filename', 'line', 'line_number', 'column'])
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080036
37# A regex that can tokenize a Minijail policy file line.
38_TOKEN_SPECIFICATION = (
39 ('COMMENT', r'#.*$'),
40 ('WHITESPACE', r'\s+'),
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070041 ('CONTINUATION', r'\\$'),
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080042 ('DEFAULT', r'@default\b'),
43 ('INCLUDE', r'@include\b'),
44 ('FREQUENCY', r'@frequency\b'),
Nicole Anderson-Aucdc8af32021-07-30 15:24:04 +000045 ('DENYLIST', r'@denylist$'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080046 ('PATH', r'(?:\.)?/\S+'),
47 ('NUMERIC_CONSTANT', r'-?0[xX][0-9a-fA-F]+|-?0[Oo][0-7]+|-?[0-9]+'),
48 ('COLON', r':'),
49 ('SEMICOLON', r';'),
50 ('COMMA', r','),
51 ('BITWISE_COMPLEMENT', r'~'),
52 ('LPAREN', r'\('),
53 ('RPAREN', r'\)'),
54 ('LBRACE', r'\{'),
55 ('RBRACE', r'\}'),
56 ('RBRACKET', r'\]'),
57 ('LBRACKET', r'\['),
58 ('OR', r'\|\|'),
59 ('AND', r'&&'),
60 ('BITWISE_OR', r'\|'),
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080061 ('OP', r'&|\bin\b|==|!=|<=|<|>=|>'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080062 ('EQUAL', r'='),
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080063 ('ARGUMENT', r'\barg[0-9]+\b'),
64 ('RETURN', r'\breturn\b'),
65 ('ACTION',
66 r'\ballow\b|\bkill-process\b|\bkill-thread\b|\bkill\b|\btrap\b|'
Luis Héctor Chávez59a64492021-01-03 05:46:47 -080067 r'\btrace\b|\blog\b|\buser-notify\b'
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080068 ),
Luis Hector Chavez524da3b2019-03-05 16:44:08 -080069 ('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9-@]*'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080070)
71_TOKEN_RE = re.compile('|'.join(
72 r'(?P<%s>%s)' % pair for pair in _TOKEN_SPECIFICATION))
73
74
75class ParseException(Exception):
76 """An exception that is raised when parsing fails."""
77
78 # pylint: disable=too-many-arguments
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070079 def __init__(self,
80 message,
81 filename,
82 *,
83 line='',
84 line_number=1,
85 token=None):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080086 if token:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070087 line = token.line
88 line_number = token.line_number
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080089 column = token.column
90 length = len(token.value)
91 else:
92 column = len(line)
93 length = 1
94
95 message = ('%s(%d:%d): %s') % (filename, line_number, column + 1,
96 message)
97 message += '\n %s' % line
98 message += '\n %s%s' % (' ' * column, '^' * length)
99 super().__init__(message)
100
101
102class ParserState:
103 """Stores the state of the Parser to provide better diagnostics."""
104
105 def __init__(self, filename):
106 self._filename = filename
107 self._line = ''
108 self._line_number = 0
109
110 @property
111 def filename(self):
112 """Return the name of the file being processed."""
113 return self._filename
114
115 @property
116 def line(self):
117 """Return the current line being processed."""
118 return self._line
119
120 @property
121 def line_number(self):
122 """Return the current line number being processed."""
123 return self._line_number
124
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800125 def error(self, message, token=None):
126 """Raise a ParserException with the provided message."""
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700127 raise ParseException(
128 message,
129 self.filename,
130 line=self._line,
131 line_number=self._line_number,
132 token=token)
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800133
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700134 def tokenize(self, lines):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800135 """Return a list of tokens for the current line."""
136 tokens = []
137
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700138 for line_number, line in enumerate(lines):
139 self._line_number = line_number + 1
140 self._line = line.rstrip('\r\n')
141
142 last_end = 0
143 for token in _TOKEN_RE.finditer(self._line):
144 if token.start() != last_end:
145 self.error(
146 'invalid token',
147 token=Token('INVALID',
148 self._line[last_end:token.start()],
149 self.filename, self._line,
150 self._line_number, last_end))
151 last_end = token.end()
152
153 # Omit whitespace and comments now to avoid sprinkling this logic
154 # elsewhere.
155 if token.lastgroup in ('WHITESPACE', 'COMMENT',
156 'CONTINUATION'):
157 continue
158 tokens.append(
159 Token(token.lastgroup, token.group(), self.filename,
160 self._line, self._line_number, token.start()))
161 if last_end != len(self._line):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800162 self.error(
163 'invalid token',
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700164 token=Token('INVALID', self._line[last_end:],
165 self.filename, self._line, self._line_number,
166 last_end))
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800167
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700168 if self._line.endswith('\\'):
169 # This line is not finished yet.
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800170 continue
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700171
172 if tokens:
173 # Return a copy of the token list so that the caller can be free
174 # to modify it.
175 yield tokens[::]
176 tokens.clear()
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800177
178
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800179Atom = collections.namedtuple('Atom', ['argument_index', 'op', 'value'])
180"""A single boolean comparison within a filter expression."""
181
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800182Filter = collections.namedtuple('Filter', ['expression', 'action'])
183"""The result of parsing a DNF filter expression, with its action.
184
185Since the expression is in Disjunctive Normal Form, it is composed of two levels
186of lists, one for disjunctions and the inner one for conjunctions. The elements
187of the inner list are Atoms.
188"""
189
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800190Syscall = collections.namedtuple('Syscall', ['name', 'number'])
191"""A system call."""
192
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800193ParsedFilterStatement = collections.namedtuple(
194 'ParsedFilterStatement', ['syscalls', 'filters', 'token'])
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800195"""The result of parsing a filter statement.
196
197Statements have a list of syscalls, and an associated list of filters that will
198be evaluated sequentially when any of the syscalls is invoked.
199"""
200
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800201FilterStatement = collections.namedtuple('FilterStatement',
202 ['syscall', 'frequency', 'filters'])
203"""The filter list for a particular syscall.
204
205This is a mapping from one syscall to a list of filters that are evaluated
206sequentially. The last filter is always an unconditional action.
207"""
208
209ParsedPolicy = collections.namedtuple('ParsedPolicy',
210 ['default_action', 'filter_statements'])
211"""The result of parsing a minijail .policy file."""
212
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800213
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800214# pylint: disable=too-few-public-methods
215class PolicyParser:
216 """A parser for the Minijail seccomp policy file format."""
217
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800218 def __init__(self,
219 arch,
220 *,
221 kill_action,
222 include_depth_limit=10,
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000223 override_default_action=None,
Nicole Anderson-Au60f60e22021-09-14 19:56:45 +0000224 denylist=False,
225 ret_log=False):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800226 self._parser_states = [ParserState("<memory>")]
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800227 self._kill_action = kill_action
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800228 self._include_depth_limit = include_depth_limit
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000229 if denylist:
230 self._default_action = bpf.Allow()
231 else:
232 self._default_action = self._kill_action
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800233 self._override_default_action = override_default_action
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800234 self._frequency_mapping = collections.defaultdict(int)
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800235 self._arch = arch
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000236 self._denylist = denylist
Nicole Anderson-Au60f60e22021-09-14 19:56:45 +0000237 self._ret_log = ret_log
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800238
239 @property
240 def _parser_state(self):
241 return self._parser_states[-1]
242
243 # single-constant = identifier
244 # | numeric-constant
245 # ;
246 def _parse_single_constant(self, token):
247 if token.type == 'IDENTIFIER':
248 if token.value not in self._arch.constants:
249 self._parser_state.error('invalid constant', token=token)
250 single_constant = self._arch.constants[token.value]
251 elif token.type == 'NUMERIC_CONSTANT':
252 try:
253 single_constant = int(token.value, base=0)
254 except ValueError:
255 self._parser_state.error('invalid constant', token=token)
256 else:
257 self._parser_state.error('invalid constant', token=token)
258 if single_constant > self._arch.max_unsigned:
259 self._parser_state.error('unsigned overflow', token=token)
260 elif single_constant < self._arch.min_signed:
261 self._parser_state.error('signed underflow', token=token)
262 elif single_constant < 0:
263 # This converts the constant to an unsigned representation of the
264 # same value, since BPF only uses unsigned values.
265 single_constant = self._arch.truncate_word(single_constant)
266 return single_constant
267
268 # constant = [ '~' ] , '(' , value , ')'
269 # | [ '~' ] , single-constant
270 # ;
271 def _parse_constant(self, tokens):
272 negate = False
273 if tokens[0].type == 'BITWISE_COMPLEMENT':
274 negate = True
275 tokens.pop(0)
276 if not tokens:
277 self._parser_state.error('empty complement')
278 if tokens[0].type == 'BITWISE_COMPLEMENT':
279 self._parser_state.error(
280 'invalid double complement', token=tokens[0])
281 if tokens[0].type == 'LPAREN':
282 last_open_paren = tokens.pop(0)
283 single_value = self.parse_value(tokens)
284 if not tokens or tokens[0].type != 'RPAREN':
285 self._parser_state.error(
286 'unclosed parenthesis', token=last_open_paren)
287 else:
288 single_value = self._parse_single_constant(tokens[0])
289 tokens.pop(0)
290 if negate:
291 single_value = self._arch.truncate_word(~single_value)
292 return single_value
293
294 # value = constant , [ { '|' , constant } ]
295 # ;
296 def parse_value(self, tokens):
297 """Parse constants separated bitwise OR operator |.
298
299 Constants can be:
300
301 - A number that can be parsed with int(..., base=0)
302 - A named constant expression.
303 - A parenthesized, valid constant expression.
304 - A valid constant expression prefixed with the unary bitwise
305 complement operator ~.
306 - A series of valid constant expressions separated by bitwise
307 OR operator |.
308
309 If there is an error parsing any of the constants, the whole process
310 fails.
311 """
312
313 value = 0
314 while tokens:
315 value |= self._parse_constant(tokens)
316 if not tokens or tokens[0].type != 'BITWISE_OR':
317 break
318 tokens.pop(0)
319 else:
320 self._parser_state.error('empty constant')
321 return value
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800322
323 # atom = argument , op , value
324 # ;
325 def _parse_atom(self, tokens):
326 if not tokens:
327 self._parser_state.error('missing argument')
328 argument = tokens.pop(0)
329 if argument.type != 'ARGUMENT':
330 self._parser_state.error('invalid argument', token=argument)
331
332 if not tokens:
333 self._parser_state.error('missing operator')
334 operator = tokens.pop(0)
335 if operator.type != 'OP':
336 self._parser_state.error('invalid operator', token=operator)
337
338 value = self.parse_value(tokens)
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800339 argument_index = int(argument.value[3:])
340 if not (0 <= argument_index < bpf.MAX_SYSCALL_ARGUMENTS):
341 self._parser_state.error('invalid argument', token=argument)
342 return Atom(argument_index, operator.value, value)
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800343
344 # clause = atom , [ { '&&' , atom } ]
345 # ;
346 def _parse_clause(self, tokens):
347 atoms = []
348 while tokens:
349 atoms.append(self._parse_atom(tokens))
350 if not tokens or tokens[0].type != 'AND':
351 break
352 tokens.pop(0)
353 else:
354 self._parser_state.error('empty clause')
355 return atoms
356
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800357 # argument-expression = clause , [ { '||' , clause } ]
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800358 # ;
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800359 def parse_argument_expression(self, tokens):
360 """Parse a argument expression in Disjunctive Normal Form.
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800361
362 Since BPF disallows back jumps, we build the basic blocks in reverse
363 order so that all the jump targets are known by the time we need to
364 reference them.
365 """
366
367 clauses = []
368 while tokens:
369 clauses.append(self._parse_clause(tokens))
370 if not tokens or tokens[0].type != 'OR':
371 break
372 tokens.pop(0)
373 else:
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800374 self._parser_state.error('empty argument expression')
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800375 return clauses
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800376
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800377 # default-action = 'kill-process'
378 # | 'kill-thread'
379 # | 'kill'
380 # | 'trap'
Luis Héctor Chávez59a64492021-01-03 05:46:47 -0800381 # | 'user-notify'
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800382 # ;
383 def _parse_default_action(self, tokens):
384 if not tokens:
385 self._parser_state.error('missing default action')
386 action_token = tokens.pop(0)
387 if action_token.type != 'ACTION':
388 return self._parser_state.error(
389 'invalid default action', token=action_token)
390 if action_token.value == 'kill-process':
391 return bpf.KillProcess()
392 if action_token.value == 'kill-thread':
393 return bpf.KillThread()
394 if action_token.value == 'kill':
395 return self._kill_action
396 if action_token.value == 'trap':
397 return bpf.Trap()
Luis Héctor Chávez59a64492021-01-03 05:46:47 -0800398 if action_token.value == 'user-notify':
399 return bpf.UserNotify()
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800400 return self._parser_state.error(
401 'invalid permissive default action', token=action_token)
402
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800403 # action = 'allow' | '1'
404 # | 'kill-process'
405 # | 'kill-thread'
Nicole Anderson-Aucdc8af32021-07-30 15:24:04 +0000406 # | 'kill'
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800407 # | 'trap'
408 # | 'trace'
409 # | 'log'
Luis Héctor Chávez59a64492021-01-03 05:46:47 -0800410 # | 'user-notify'
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800411 # | 'return' , single-constant
412 # ;
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800413 def parse_action(self, tokens):
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800414 if not tokens:
415 self._parser_state.error('missing action')
416 action_token = tokens.pop(0)
Nicole Anderson-Aucdc8af32021-07-30 15:24:04 +0000417 # denylist policies must specify a return for every line.
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000418 if self._denylist:
Nicole Anderson-Aucdc8af32021-07-30 15:24:04 +0000419 if action_token.type != 'RETURN':
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000420 self._parser_state.error('invalid denylist policy')
421
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800422 if action_token.type == 'ACTION':
423 if action_token.value == 'allow':
424 return bpf.Allow()
425 if action_token.value == 'kill':
426 return self._kill_action
427 if action_token.value == 'kill-process':
428 return bpf.KillProcess()
429 if action_token.value == 'kill-thread':
430 return bpf.KillThread()
431 if action_token.value == 'trap':
432 return bpf.Trap()
433 if action_token.value == 'trace':
434 return bpf.Trace()
Luis Héctor Chávez59a64492021-01-03 05:46:47 -0800435 if action_token.value == 'user-notify':
436 return bpf.UserNotify()
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800437 if action_token.value == 'log':
438 return bpf.Log()
439 elif action_token.type == 'NUMERIC_CONSTANT':
440 constant = self._parse_single_constant(action_token)
441 if constant == 1:
442 return bpf.Allow()
443 elif action_token.type == 'RETURN':
444 if not tokens:
445 self._parser_state.error('missing return value')
Nicole Anderson-Au60f60e22021-09-14 19:56:45 +0000446 if self._ret_log:
447 tokens.pop(0)
448 return bpf.Log()
449 else:
450 return bpf.ReturnErrno(self._parse_single_constant(tokens.pop(0)))
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800451 return self._parser_state.error('invalid action', token=action_token)
452
453 # single-filter = action
454 # | argument-expression , [ ';' , action ]
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000455 # | '!','(', argument-expression, [ ';', action ], ')'
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800456 # ;
457 def _parse_single_filter(self, tokens):
458 if not tokens:
459 self._parser_state.error('missing filter')
460 if tokens[0].type == 'ARGUMENT':
Nicole Anderson-Aucdc8af32021-07-30 15:24:04 +0000461 # Only argument expressions can start with an ARGUMENT token.
462 argument_expression = self.parse_argument_expression(tokens)
463 if tokens and tokens[0].type == 'SEMICOLON':
464 tokens.pop(0)
465 action = self.parse_action(tokens)
466 else:
467 action = bpf.Allow()
468 return Filter(argument_expression, action)
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800469 else:
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800470 return Filter(None, self.parse_action(tokens))
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800471
472 # filter = '{' , single-filter , [ { ',' , single-filter } ] , '}'
473 # | single-filter
474 # ;
475 def parse_filter(self, tokens):
476 """Parse a filter and return a list of Filter objects."""
477 if not tokens:
478 self._parser_state.error('missing filter')
479 filters = []
480 if tokens[0].type == 'LBRACE':
481 opening_brace = tokens.pop(0)
482 while tokens:
483 filters.append(self._parse_single_filter(tokens))
484 if not tokens or tokens[0].type != 'COMMA':
485 break
486 tokens.pop(0)
487 if not tokens or tokens[0].type != 'RBRACE':
488 self._parser_state.error('unclosed brace', token=opening_brace)
489 tokens.pop(0)
490 else:
491 filters.append(self._parse_single_filter(tokens))
492 return filters
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800493
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800494 # key-value-pair = identifier , '=', identifier , [ { ',' , identifier } ]
495 # ;
496 def _parse_key_value_pair(self, tokens):
497 if not tokens:
498 self._parser_state.error('missing key')
499 key = tokens.pop(0)
500 if key.type != 'IDENTIFIER':
501 self._parser_state.error('invalid key', token=key)
502 if not tokens:
503 self._parser_state.error('missing equal')
504 if tokens[0].type != 'EQUAL':
505 self._parser_state.error('invalid equal', token=tokens[0])
506 tokens.pop(0)
507 value_list = []
508 while tokens:
509 value = tokens.pop(0)
510 if value.type != 'IDENTIFIER':
511 self._parser_state.error('invalid value', token=value)
512 value_list.append(value.value)
513 if not tokens or tokens[0].type != 'COMMA':
514 break
515 tokens.pop(0)
516 else:
517 self._parser_state.error('empty value')
518 return (key.value, value_list)
519
520 # metadata = '[' , key-value-pair , [ { ';' , key-value-pair } ] , ']'
521 # ;
522 def _parse_metadata(self, tokens):
523 if not tokens:
524 self._parser_state.error('missing opening bracket')
525 opening_bracket = tokens.pop(0)
526 if opening_bracket.type != 'LBRACKET':
527 self._parser_state.error(
528 'invalid opening bracket', token=opening_bracket)
529 metadata = {}
530 while tokens:
531 first_token = tokens[0]
532 key, value = self._parse_key_value_pair(tokens)
533 if key in metadata:
534 self._parser_state.error(
535 'duplicate metadata key: "%s"' % key, token=first_token)
536 metadata[key] = value
537 if not tokens or tokens[0].type != 'SEMICOLON':
538 break
539 tokens.pop(0)
540 if not tokens or tokens[0].type != 'RBRACKET':
541 self._parser_state.error('unclosed bracket', token=opening_bracket)
542 tokens.pop(0)
543 return metadata
544
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800545 # syscall-descriptor = syscall-name , [ metadata ]
Luis Hector Chavez524da3b2019-03-05 16:44:08 -0800546 # | syscall-group-name , [ metadata ]
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800547 # ;
548 def _parse_syscall_descriptor(self, tokens):
549 if not tokens:
550 self._parser_state.error('missing syscall descriptor')
551 syscall_descriptor = tokens.pop(0)
Matt Delcof68fc8d2019-11-14 16:47:52 -0800552 # `kill` as a syscall name is a special case since kill is also a valid
553 # action and actions have precendence over identifiers.
554 if (syscall_descriptor.type != 'IDENTIFIER' and
555 syscall_descriptor.value != 'kill'):
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800556 self._parser_state.error(
557 'invalid syscall descriptor', token=syscall_descriptor)
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800558 if tokens and tokens[0].type == 'LBRACKET':
559 metadata = self._parse_metadata(tokens)
560 if 'arch' in metadata and self._arch.arch_name not in metadata['arch']:
561 return ()
Luis Hector Chavez524da3b2019-03-05 16:44:08 -0800562 if '@' in syscall_descriptor.value:
563 # This is a syscall group.
564 subtokens = syscall_descriptor.value.split('@')
565 if len(subtokens) != 2:
566 self._parser_state.error(
567 'invalid syscall group name', token=syscall_descriptor)
568 syscall_group_name, syscall_namespace_name = subtokens
569 if syscall_namespace_name not in self._arch.syscall_groups:
570 self._parser_state.error(
571 'nonexistent syscall group namespace',
572 token=syscall_descriptor)
573 syscall_namespace = self._arch.syscall_groups[
574 syscall_namespace_name]
575 if syscall_group_name not in syscall_namespace:
576 self._parser_state.error(
577 'nonexistent syscall group', token=syscall_descriptor)
578 return (Syscall(name, self._arch.syscalls[name])
579 for name in syscall_namespace[syscall_group_name])
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800580 if syscall_descriptor.value not in self._arch.syscalls:
581 self._parser_state.error(
582 'nonexistent syscall', token=syscall_descriptor)
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800583 return (Syscall(syscall_descriptor.value,
584 self._arch.syscalls[syscall_descriptor.value]), )
585
586 # filter-statement = '{' , syscall-descriptor , [ { ',', syscall-descriptor } ] , '}' ,
587 # ':' , filter
588 # | syscall-descriptor , ':' , filter
589 # ;
590 def parse_filter_statement(self, tokens):
591 """Parse a filter statement and return a ParsedFilterStatement."""
592 if not tokens:
593 self._parser_state.error('empty filter statement')
594 syscall_descriptors = []
595 if tokens[0].type == 'LBRACE':
596 opening_brace = tokens.pop(0)
597 while tokens:
598 syscall_descriptors.extend(
599 self._parse_syscall_descriptor(tokens))
600 if not tokens or tokens[0].type != 'COMMA':
601 break
602 tokens.pop(0)
603 if not tokens or tokens[0].type != 'RBRACE':
604 self._parser_state.error('unclosed brace', token=opening_brace)
605 tokens.pop(0)
606 else:
607 syscall_descriptors.extend(self._parse_syscall_descriptor(tokens))
608 if not tokens:
609 self._parser_state.error('missing colon')
610 if tokens[0].type != 'COLON':
611 self._parser_state.error('invalid colon', token=tokens[0])
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800612 # Given that there can be multiple syscalls and filters in a single
613 # filter statement, use the colon token as the anchor for error location
614 # purposes.
615 colon_token = tokens.pop(0)
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800616 parsed_filter = self.parse_filter(tokens)
617 if not syscall_descriptors:
618 return None
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800619 return ParsedFilterStatement(
620 tuple(syscall_descriptors), parsed_filter, colon_token)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800621
622 # include-statement = '@include' , posix-path
623 # ;
624 def _parse_include_statement(self, tokens):
625 if not tokens:
626 self._parser_state.error('empty filter statement')
627 if tokens[0].type != 'INCLUDE':
628 self._parser_state.error('invalid include', token=tokens[0])
629 tokens.pop(0)
630 if not tokens:
631 self._parser_state.error('empty include path')
632 include_path = tokens.pop(0)
633 if include_path.type != 'PATH':
634 self._parser_state.error(
635 'invalid include path', token=include_path)
636 if len(self._parser_states) == self._include_depth_limit:
637 self._parser_state.error('@include statement nested too deep')
638 include_filename = os.path.normpath(
639 os.path.join(
640 os.path.dirname(self._parser_state.filename),
641 include_path.value))
642 if not os.path.isfile(include_filename):
643 self._parser_state.error(
644 'Could not @include %s' % include_filename, token=include_path)
645 return self._parse_policy_file(include_filename)
646
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800647 def _parse_frequency_file(self, filename):
648 self._parser_states.append(ParserState(filename))
649 try:
650 frequency_mapping = collections.defaultdict(int)
651 with open(filename) as frequency_file:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700652 for tokens in self._parser_state.tokenize(frequency_file):
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800653 syscall_numbers = self._parse_syscall_descriptor(tokens)
654 if not tokens:
655 self._parser_state.error('missing colon')
656 if tokens[0].type != 'COLON':
657 self._parser_state.error(
658 'invalid colon', token=tokens[0])
659 tokens.pop(0)
660
661 if not tokens:
662 self._parser_state.error('missing number')
663 number = tokens.pop(0)
664 if number.type != 'NUMERIC_CONSTANT':
665 self._parser_state.error(
666 'invalid number', token=number)
667 number_value = int(number.value, base=0)
668 if number_value < 0:
669 self._parser_state.error(
670 'invalid number', token=number)
671
672 for syscall_number in syscall_numbers:
673 frequency_mapping[syscall_number] += number_value
674 return frequency_mapping
675 finally:
676 self._parser_states.pop()
677
678 # frequency-statement = '@frequency' , posix-path
679 # ;
680 def _parse_frequency_statement(self, tokens):
681 if not tokens:
682 self._parser_state.error('empty frequency statement')
683 if tokens[0].type != 'FREQUENCY':
684 self._parser_state.error('invalid frequency', token=tokens[0])
685 tokens.pop(0)
686 if not tokens:
687 self._parser_state.error('empty frequency path')
688 frequency_path = tokens.pop(0)
689 if frequency_path.type != 'PATH':
690 self._parser_state.error(
691 'invalid frequency path', token=frequency_path)
692 frequency_filename = os.path.normpath(
693 os.path.join(
694 os.path.dirname(self._parser_state.filename),
695 frequency_path.value))
696 if not os.path.isfile(frequency_filename):
697 self._parser_state.error(
698 'Could not open frequency file %s' % frequency_filename,
699 token=frequency_path)
700 return self._parse_frequency_file(frequency_filename)
701
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800702 # default-statement = '@default' , default-action
703 # ;
704 def _parse_default_statement(self, tokens):
705 if not tokens:
706 self._parser_state.error('empty default statement')
707 if tokens[0].type != 'DEFAULT':
708 self._parser_state.error('invalid default', token=tokens[0])
709 tokens.pop(0)
710 if not tokens:
711 self._parser_state.error('empty action')
712 return self._parse_default_action(tokens)
713
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800714 def _parse_policy_file(self, filename):
715 self._parser_states.append(ParserState(filename))
716 try:
717 statements = []
Nicole Anderson-Aucdc8af32021-07-30 15:24:04 +0000718 denylist_header = False
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800719 with open(filename) as policy_file:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700720 for tokens in self._parser_state.tokenize(policy_file):
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800721 if tokens[0].type == 'INCLUDE':
722 statements.extend(
723 self._parse_include_statement(tokens))
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800724 elif tokens[0].type == 'FREQUENCY':
725 for syscall_number, frequency in self._parse_frequency_statement(
726 tokens).items():
727 self._frequency_mapping[
728 syscall_number] += frequency
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800729 elif tokens[0].type == 'DEFAULT':
730 self._default_action = self._parse_default_statement(
731 tokens)
Nicole Anderson-Aucdc8af32021-07-30 15:24:04 +0000732 elif tokens[0].type == 'DENYLIST':
733 tokens.pop()
734 if not self._denylist:
735 self._parser_state.error('policy is denylist, but '
736 'flag --denylist not '
737 'passed in.')
738 else:
739 denylist_header = True
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800740 else:
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800741 statement = self.parse_filter_statement(tokens)
742 if statement is None:
743 # If all the syscalls in the statement are for
744 # another arch, skip the whole statement.
745 continue
746 statements.append(statement)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800747
748 if tokens:
749 self._parser_state.error(
750 'extra tokens', token=tokens[0])
Nicole Anderson-Aucdc8af32021-07-30 15:24:04 +0000751 if self._denylist and not denylist_header:
752 self._parser_state.error('policy must contain @denylist flag to'
753 ' be compiled with --denylist flag.')
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800754 return statements
755 finally:
756 self._parser_states.pop()
757
758 def parse_file(self, filename):
759 """Parse a file and return the list of FilterStatements."""
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800760 self._frequency_mapping = collections.defaultdict(int)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800761 try:
762 statements = [x for x in self._parse_policy_file(filename)]
763 except RecursionError:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700764 raise ParseException(
765 'recursion limit exceeded',
766 filename,
767 line=self._parser_states[-1].line)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800768
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800769 # Collapse statements into a single syscall-to-filter-list, remembering
770 # the token for each filter for better diagnostics.
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800771 syscall_filter_mapping = {}
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800772 syscall_filter_definitions = {}
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800773 filter_statements = []
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800774 for syscalls, filters, token in statements:
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800775 for syscall in syscalls:
776 if syscall not in syscall_filter_mapping:
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800777 filter_statements.append(
778 FilterStatement(
779 syscall, self._frequency_mapping.get(syscall, 1),
780 []))
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800781 syscall_filter_mapping[syscall] = filter_statements[-1]
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800782 syscall_filter_definitions[syscall] = []
783 for filt in filters:
784 syscall_filter_mapping[syscall].filters.append(filt)
785 syscall_filter_definitions[syscall].append(token)
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800786 default_action = self._override_default_action or self._default_action
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800787 for filter_statement in filter_statements:
788 unconditional_actions_suffix = list(
789 itertools.dropwhile(lambda filt: filt.expression is not None,
790 filter_statement.filters))
791 if len(unconditional_actions_suffix) == 1:
792 # The last filter already has an unconditional action, no need
793 # to add another one.
794 continue
795 if len(unconditional_actions_suffix) > 1:
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800796 previous_definition_token = syscall_filter_definitions[
797 filter_statement.syscall][
798 -len(unconditional_actions_suffix)]
799 current_definition_token = syscall_filter_definitions[
800 filter_statement.syscall][
801 -len(unconditional_actions_suffix) + 1]
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700802 raise ParseException(
803 ('Syscall %s (number %d) already had '
804 'an unconditional action applied') %
805 (filter_statement.syscall.name,
806 filter_statement.syscall.number),
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800807 filename=current_definition_token.filename,
808 token=current_definition_token) from ParseException(
809 'Previous definition',
810 filename=previous_definition_token.filename,
811 token=previous_definition_token)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800812 assert not unconditional_actions_suffix
813 filter_statement.filters.append(
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800814 Filter(expression=None, action=default_action))
815 return ParsedPolicy(default_action, filter_statements)