blob: 87e1493acad2c443ac2b842c0ec6b77057a0daca [file] [log] [blame]
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -08001#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# Copyright (C) 2018 The Android Open Source Project
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17"""A parser for the Minijail policy file."""
18
19from __future__ import absolute_import
20from __future__ import division
21from __future__ import print_function
22
23import collections
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -080024import itertools
25import os.path
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080026import re
27
Matt Delcoa12687b2020-02-07 17:12:47 -080028try:
29 import bpf
30except ImportError:
31 from minijail import bpf
32
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -080033
Denny Huang9a3eaa32021-12-28 07:19:36 +000034# Representations of numbers with different radix (base) in C.
35HEX_REGEX = r'-?0[xX][0-9a-fA-F]+'
36OCTAL_REGEX = r'-?0[0-7]+'
37DECIMAL_REGEX = r'-?[0-9]+'
38
39
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070040Token = collections.namedtuple(
Luis Hector Chavez4228eff2019-12-11 19:07:13 -080041 'Token', ['type', 'value', 'filename', 'line', 'line_number', 'column'])
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080042
43# A regex that can tokenize a Minijail policy file line.
44_TOKEN_SPECIFICATION = (
45 ('COMMENT', r'#.*$'),
46 ('WHITESPACE', r'\s+'),
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070047 ('CONTINUATION', r'\\$'),
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080048 ('DEFAULT', r'@default\b'),
49 ('INCLUDE', r'@include\b'),
50 ('FREQUENCY', r'@frequency\b'),
Nicole Anderson-Aucdc8af32021-07-30 15:24:04 +000051 ('DENYLIST', r'@denylist$'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080052 ('PATH', r'(?:\.)?/\S+'),
Denny Huang9a3eaa32021-12-28 07:19:36 +000053 ('NUMERIC_CONSTANT', f'{HEX_REGEX}|{OCTAL_REGEX}|{DECIMAL_REGEX}'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080054 ('COLON', r':'),
55 ('SEMICOLON', r';'),
56 ('COMMA', r','),
57 ('BITWISE_COMPLEMENT', r'~'),
58 ('LPAREN', r'\('),
59 ('RPAREN', r'\)'),
60 ('LBRACE', r'\{'),
61 ('RBRACE', r'\}'),
62 ('RBRACKET', r'\]'),
63 ('LBRACKET', r'\['),
64 ('OR', r'\|\|'),
65 ('AND', r'&&'),
66 ('BITWISE_OR', r'\|'),
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080067 ('OP', r'&|\bin\b|==|!=|<=|<|>=|>'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080068 ('EQUAL', r'='),
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080069 ('ARGUMENT', r'\barg[0-9]+\b'),
70 ('RETURN', r'\breturn\b'),
71 ('ACTION',
72 r'\ballow\b|\bkill-process\b|\bkill-thread\b|\bkill\b|\btrap\b|'
Luis Héctor Chávez59a64492021-01-03 05:46:47 -080073 r'\btrace\b|\blog\b|\buser-notify\b'
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080074 ),
Luis Hector Chavez524da3b2019-03-05 16:44:08 -080075 ('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9-@]*'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080076)
77_TOKEN_RE = re.compile('|'.join(
78 r'(?P<%s>%s)' % pair for pair in _TOKEN_SPECIFICATION))
79
80
81class ParseException(Exception):
82 """An exception that is raised when parsing fails."""
83
84 # pylint: disable=too-many-arguments
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070085 def __init__(self,
86 message,
87 filename,
88 *,
89 line='',
90 line_number=1,
91 token=None):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080092 if token:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070093 line = token.line
94 line_number = token.line_number
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080095 column = token.column
96 length = len(token.value)
97 else:
98 column = len(line)
99 length = 1
100
101 message = ('%s(%d:%d): %s') % (filename, line_number, column + 1,
102 message)
103 message += '\n %s' % line
104 message += '\n %s%s' % (' ' * column, '^' * length)
105 super().__init__(message)
106
107
108class ParserState:
109 """Stores the state of the Parser to provide better diagnostics."""
110
111 def __init__(self, filename):
112 self._filename = filename
113 self._line = ''
114 self._line_number = 0
115
116 @property
117 def filename(self):
118 """Return the name of the file being processed."""
119 return self._filename
120
121 @property
122 def line(self):
123 """Return the current line being processed."""
124 return self._line
125
126 @property
127 def line_number(self):
128 """Return the current line number being processed."""
129 return self._line_number
130
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800131 def error(self, message, token=None):
132 """Raise a ParserException with the provided message."""
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700133 raise ParseException(
134 message,
135 self.filename,
136 line=self._line,
137 line_number=self._line_number,
138 token=token)
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800139
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700140 def tokenize(self, lines):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800141 """Return a list of tokens for the current line."""
142 tokens = []
143
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700144 for line_number, line in enumerate(lines):
145 self._line_number = line_number + 1
146 self._line = line.rstrip('\r\n')
147
148 last_end = 0
149 for token in _TOKEN_RE.finditer(self._line):
150 if token.start() != last_end:
151 self.error(
152 'invalid token',
153 token=Token('INVALID',
154 self._line[last_end:token.start()],
155 self.filename, self._line,
156 self._line_number, last_end))
157 last_end = token.end()
158
159 # Omit whitespace and comments now to avoid sprinkling this logic
160 # elsewhere.
161 if token.lastgroup in ('WHITESPACE', 'COMMENT',
162 'CONTINUATION'):
163 continue
164 tokens.append(
165 Token(token.lastgroup, token.group(), self.filename,
166 self._line, self._line_number, token.start()))
167 if last_end != len(self._line):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800168 self.error(
169 'invalid token',
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700170 token=Token('INVALID', self._line[last_end:],
171 self.filename, self._line, self._line_number,
172 last_end))
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800173
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700174 if self._line.endswith('\\'):
175 # This line is not finished yet.
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800176 continue
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700177
178 if tokens:
179 # Return a copy of the token list so that the caller can be free
180 # to modify it.
181 yield tokens[::]
182 tokens.clear()
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800183
184
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800185Atom = collections.namedtuple('Atom', ['argument_index', 'op', 'value'])
186"""A single boolean comparison within a filter expression."""
187
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800188Filter = collections.namedtuple('Filter', ['expression', 'action'])
189"""The result of parsing a DNF filter expression, with its action.
190
191Since the expression is in Disjunctive Normal Form, it is composed of two levels
192of lists, one for disjunctions and the inner one for conjunctions. The elements
193of the inner list are Atoms.
194"""
195
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800196Syscall = collections.namedtuple('Syscall', ['name', 'number'])
197"""A system call."""
198
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800199ParsedFilterStatement = collections.namedtuple(
200 'ParsedFilterStatement', ['syscalls', 'filters', 'token'])
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800201"""The result of parsing a filter statement.
202
203Statements have a list of syscalls, and an associated list of filters that will
204be evaluated sequentially when any of the syscalls is invoked.
205"""
206
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800207FilterStatement = collections.namedtuple('FilterStatement',
208 ['syscall', 'frequency', 'filters'])
209"""The filter list for a particular syscall.
210
211This is a mapping from one syscall to a list of filters that are evaluated
212sequentially. The last filter is always an unconditional action.
213"""
214
215ParsedPolicy = collections.namedtuple('ParsedPolicy',
216 ['default_action', 'filter_statements'])
217"""The result of parsing a minijail .policy file."""
218
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800219
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800220# pylint: disable=too-few-public-methods
221class PolicyParser:
222 """A parser for the Minijail seccomp policy file format."""
223
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800224 def __init__(self,
225 arch,
226 *,
227 kill_action,
228 include_depth_limit=10,
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000229 override_default_action=None,
Nicole Anderson-Au60f60e22021-09-14 19:56:45 +0000230 denylist=False,
231 ret_log=False):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800232 self._parser_states = [ParserState("<memory>")]
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800233 self._kill_action = kill_action
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800234 self._include_depth_limit = include_depth_limit
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000235 if denylist:
236 self._default_action = bpf.Allow()
237 else:
238 self._default_action = self._kill_action
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800239 self._override_default_action = override_default_action
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800240 self._frequency_mapping = collections.defaultdict(int)
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800241 self._arch = arch
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000242 self._denylist = denylist
Nicole Anderson-Au60f60e22021-09-14 19:56:45 +0000243 self._ret_log = ret_log
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800244
245 @property
246 def _parser_state(self):
247 return self._parser_states[-1]
248
249 # single-constant = identifier
250 # | numeric-constant
251 # ;
252 def _parse_single_constant(self, token):
253 if token.type == 'IDENTIFIER':
254 if token.value not in self._arch.constants:
255 self._parser_state.error('invalid constant', token=token)
256 single_constant = self._arch.constants[token.value]
257 elif token.type == 'NUMERIC_CONSTANT':
Denny Huang9a3eaa32021-12-28 07:19:36 +0000258 # As `int(_, 0)` in Python != `strtol(_, _, 0)` in C, to make sure
259 # the number parsing behaves exactly in C, instead of using `int()`
260 # directly, we list out all the possible formats for octal, decimal
261 # and hex numbers, and determine the corresponding base by regex.
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800262 try:
Denny Huang9a3eaa32021-12-28 07:19:36 +0000263 if re.match(HEX_REGEX, token.value):
264 base = 16
265 elif re.match(OCTAL_REGEX, token.value):
266 base = 8
267 elif re.match(DECIMAL_REGEX, token.value):
268 base = 10
269 else:
270 # This should never happen.
271 raise ValueError
272 single_constant = int(token.value, base=base)
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800273 except ValueError:
274 self._parser_state.error('invalid constant', token=token)
275 else:
276 self._parser_state.error('invalid constant', token=token)
277 if single_constant > self._arch.max_unsigned:
278 self._parser_state.error('unsigned overflow', token=token)
279 elif single_constant < self._arch.min_signed:
280 self._parser_state.error('signed underflow', token=token)
281 elif single_constant < 0:
282 # This converts the constant to an unsigned representation of the
283 # same value, since BPF only uses unsigned values.
284 single_constant = self._arch.truncate_word(single_constant)
285 return single_constant
286
287 # constant = [ '~' ] , '(' , value , ')'
288 # | [ '~' ] , single-constant
289 # ;
290 def _parse_constant(self, tokens):
291 negate = False
292 if tokens[0].type == 'BITWISE_COMPLEMENT':
293 negate = True
294 tokens.pop(0)
295 if not tokens:
296 self._parser_state.error('empty complement')
297 if tokens[0].type == 'BITWISE_COMPLEMENT':
298 self._parser_state.error(
299 'invalid double complement', token=tokens[0])
300 if tokens[0].type == 'LPAREN':
301 last_open_paren = tokens.pop(0)
302 single_value = self.parse_value(tokens)
303 if not tokens or tokens[0].type != 'RPAREN':
304 self._parser_state.error(
305 'unclosed parenthesis', token=last_open_paren)
306 else:
307 single_value = self._parse_single_constant(tokens[0])
308 tokens.pop(0)
309 if negate:
310 single_value = self._arch.truncate_word(~single_value)
311 return single_value
312
313 # value = constant , [ { '|' , constant } ]
314 # ;
315 def parse_value(self, tokens):
316 """Parse constants separated bitwise OR operator |.
317
318 Constants can be:
319
Denny Huang9a3eaa32021-12-28 07:19:36 +0000320 - A number that can be parsed with strtol() in C.
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800321 - A named constant expression.
322 - A parenthesized, valid constant expression.
323 - A valid constant expression prefixed with the unary bitwise
324 complement operator ~.
325 - A series of valid constant expressions separated by bitwise
326 OR operator |.
327
328 If there is an error parsing any of the constants, the whole process
329 fails.
330 """
331
332 value = 0
333 while tokens:
334 value |= self._parse_constant(tokens)
335 if not tokens or tokens[0].type != 'BITWISE_OR':
336 break
337 tokens.pop(0)
338 else:
339 self._parser_state.error('empty constant')
340 return value
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800341
342 # atom = argument , op , value
343 # ;
344 def _parse_atom(self, tokens):
345 if not tokens:
346 self._parser_state.error('missing argument')
347 argument = tokens.pop(0)
348 if argument.type != 'ARGUMENT':
349 self._parser_state.error('invalid argument', token=argument)
350
351 if not tokens:
352 self._parser_state.error('missing operator')
353 operator = tokens.pop(0)
354 if operator.type != 'OP':
355 self._parser_state.error('invalid operator', token=operator)
356
357 value = self.parse_value(tokens)
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800358 argument_index = int(argument.value[3:])
359 if not (0 <= argument_index < bpf.MAX_SYSCALL_ARGUMENTS):
360 self._parser_state.error('invalid argument', token=argument)
361 return Atom(argument_index, operator.value, value)
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800362
363 # clause = atom , [ { '&&' , atom } ]
364 # ;
365 def _parse_clause(self, tokens):
366 atoms = []
367 while tokens:
368 atoms.append(self._parse_atom(tokens))
369 if not tokens or tokens[0].type != 'AND':
370 break
371 tokens.pop(0)
372 else:
373 self._parser_state.error('empty clause')
374 return atoms
375
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800376 # argument-expression = clause , [ { '||' , clause } ]
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800377 # ;
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800378 def parse_argument_expression(self, tokens):
379 """Parse a argument expression in Disjunctive Normal Form.
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800380
381 Since BPF disallows back jumps, we build the basic blocks in reverse
382 order so that all the jump targets are known by the time we need to
383 reference them.
384 """
385
386 clauses = []
387 while tokens:
388 clauses.append(self._parse_clause(tokens))
389 if not tokens or tokens[0].type != 'OR':
390 break
391 tokens.pop(0)
392 else:
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800393 self._parser_state.error('empty argument expression')
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800394 return clauses
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800395
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800396 # default-action = 'kill-process'
397 # | 'kill-thread'
398 # | 'kill'
399 # | 'trap'
Luis Héctor Chávez59a64492021-01-03 05:46:47 -0800400 # | 'user-notify'
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800401 # ;
402 def _parse_default_action(self, tokens):
403 if not tokens:
404 self._parser_state.error('missing default action')
405 action_token = tokens.pop(0)
406 if action_token.type != 'ACTION':
407 return self._parser_state.error(
408 'invalid default action', token=action_token)
409 if action_token.value == 'kill-process':
410 return bpf.KillProcess()
411 if action_token.value == 'kill-thread':
412 return bpf.KillThread()
413 if action_token.value == 'kill':
414 return self._kill_action
415 if action_token.value == 'trap':
416 return bpf.Trap()
Luis Héctor Chávez59a64492021-01-03 05:46:47 -0800417 if action_token.value == 'user-notify':
418 return bpf.UserNotify()
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800419 return self._parser_state.error(
420 'invalid permissive default action', token=action_token)
421
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800422 # action = 'allow' | '1'
423 # | 'kill-process'
424 # | 'kill-thread'
Nicole Anderson-Aucdc8af32021-07-30 15:24:04 +0000425 # | 'kill'
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800426 # | 'trap'
427 # | 'trace'
428 # | 'log'
Luis Héctor Chávez59a64492021-01-03 05:46:47 -0800429 # | 'user-notify'
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800430 # | 'return' , single-constant
431 # ;
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800432 def parse_action(self, tokens):
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800433 if not tokens:
434 self._parser_state.error('missing action')
435 action_token = tokens.pop(0)
Nicole Anderson-Aucdc8af32021-07-30 15:24:04 +0000436 # denylist policies must specify a return for every line.
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000437 if self._denylist:
Nicole Anderson-Aucdc8af32021-07-30 15:24:04 +0000438 if action_token.type != 'RETURN':
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000439 self._parser_state.error('invalid denylist policy')
440
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800441 if action_token.type == 'ACTION':
442 if action_token.value == 'allow':
443 return bpf.Allow()
444 if action_token.value == 'kill':
445 return self._kill_action
446 if action_token.value == 'kill-process':
447 return bpf.KillProcess()
448 if action_token.value == 'kill-thread':
449 return bpf.KillThread()
450 if action_token.value == 'trap':
451 return bpf.Trap()
452 if action_token.value == 'trace':
453 return bpf.Trace()
Luis Héctor Chávez59a64492021-01-03 05:46:47 -0800454 if action_token.value == 'user-notify':
455 return bpf.UserNotify()
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800456 if action_token.value == 'log':
457 return bpf.Log()
458 elif action_token.type == 'NUMERIC_CONSTANT':
459 constant = self._parse_single_constant(action_token)
460 if constant == 1:
461 return bpf.Allow()
462 elif action_token.type == 'RETURN':
463 if not tokens:
464 self._parser_state.error('missing return value')
Nicole Anderson-Au60f60e22021-09-14 19:56:45 +0000465 if self._ret_log:
466 tokens.pop(0)
467 return bpf.Log()
468 else:
469 return bpf.ReturnErrno(self._parse_single_constant(tokens.pop(0)))
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800470 return self._parser_state.error('invalid action', token=action_token)
471
472 # single-filter = action
473 # | argument-expression , [ ';' , action ]
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000474 # | '!','(', argument-expression, [ ';', action ], ')'
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800475 # ;
476 def _parse_single_filter(self, tokens):
477 if not tokens:
478 self._parser_state.error('missing filter')
479 if tokens[0].type == 'ARGUMENT':
Nicole Anderson-Aucdc8af32021-07-30 15:24:04 +0000480 # Only argument expressions can start with an ARGUMENT token.
481 argument_expression = self.parse_argument_expression(tokens)
482 if tokens and tokens[0].type == 'SEMICOLON':
483 tokens.pop(0)
484 action = self.parse_action(tokens)
485 else:
486 action = bpf.Allow()
487 return Filter(argument_expression, action)
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800488 else:
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800489 return Filter(None, self.parse_action(tokens))
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800490
491 # filter = '{' , single-filter , [ { ',' , single-filter } ] , '}'
492 # | single-filter
493 # ;
494 def parse_filter(self, tokens):
495 """Parse a filter and return a list of Filter objects."""
496 if not tokens:
497 self._parser_state.error('missing filter')
498 filters = []
499 if tokens[0].type == 'LBRACE':
500 opening_brace = tokens.pop(0)
501 while tokens:
502 filters.append(self._parse_single_filter(tokens))
503 if not tokens or tokens[0].type != 'COMMA':
504 break
505 tokens.pop(0)
506 if not tokens or tokens[0].type != 'RBRACE':
507 self._parser_state.error('unclosed brace', token=opening_brace)
508 tokens.pop(0)
509 else:
510 filters.append(self._parse_single_filter(tokens))
511 return filters
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800512
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800513 # key-value-pair = identifier , '=', identifier , [ { ',' , identifier } ]
514 # ;
515 def _parse_key_value_pair(self, tokens):
516 if not tokens:
517 self._parser_state.error('missing key')
518 key = tokens.pop(0)
519 if key.type != 'IDENTIFIER':
520 self._parser_state.error('invalid key', token=key)
521 if not tokens:
522 self._parser_state.error('missing equal')
523 if tokens[0].type != 'EQUAL':
524 self._parser_state.error('invalid equal', token=tokens[0])
525 tokens.pop(0)
526 value_list = []
527 while tokens:
528 value = tokens.pop(0)
529 if value.type != 'IDENTIFIER':
530 self._parser_state.error('invalid value', token=value)
531 value_list.append(value.value)
532 if not tokens or tokens[0].type != 'COMMA':
533 break
534 tokens.pop(0)
535 else:
536 self._parser_state.error('empty value')
537 return (key.value, value_list)
538
539 # metadata = '[' , key-value-pair , [ { ';' , key-value-pair } ] , ']'
540 # ;
541 def _parse_metadata(self, tokens):
542 if not tokens:
543 self._parser_state.error('missing opening bracket')
544 opening_bracket = tokens.pop(0)
545 if opening_bracket.type != 'LBRACKET':
546 self._parser_state.error(
547 'invalid opening bracket', token=opening_bracket)
548 metadata = {}
549 while tokens:
550 first_token = tokens[0]
551 key, value = self._parse_key_value_pair(tokens)
552 if key in metadata:
553 self._parser_state.error(
554 'duplicate metadata key: "%s"' % key, token=first_token)
555 metadata[key] = value
556 if not tokens or tokens[0].type != 'SEMICOLON':
557 break
558 tokens.pop(0)
559 if not tokens or tokens[0].type != 'RBRACKET':
560 self._parser_state.error('unclosed bracket', token=opening_bracket)
561 tokens.pop(0)
562 return metadata
563
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800564 # syscall-descriptor = syscall-name , [ metadata ]
Luis Hector Chavez524da3b2019-03-05 16:44:08 -0800565 # | syscall-group-name , [ metadata ]
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800566 # ;
567 def _parse_syscall_descriptor(self, tokens):
568 if not tokens:
569 self._parser_state.error('missing syscall descriptor')
570 syscall_descriptor = tokens.pop(0)
Matt Delcof68fc8d2019-11-14 16:47:52 -0800571 # `kill` as a syscall name is a special case since kill is also a valid
572 # action and actions have precendence over identifiers.
573 if (syscall_descriptor.type != 'IDENTIFIER' and
574 syscall_descriptor.value != 'kill'):
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800575 self._parser_state.error(
576 'invalid syscall descriptor', token=syscall_descriptor)
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800577 if tokens and tokens[0].type == 'LBRACKET':
578 metadata = self._parse_metadata(tokens)
579 if 'arch' in metadata and self._arch.arch_name not in metadata['arch']:
580 return ()
Luis Hector Chavez524da3b2019-03-05 16:44:08 -0800581 if '@' in syscall_descriptor.value:
582 # This is a syscall group.
583 subtokens = syscall_descriptor.value.split('@')
584 if len(subtokens) != 2:
585 self._parser_state.error(
586 'invalid syscall group name', token=syscall_descriptor)
587 syscall_group_name, syscall_namespace_name = subtokens
588 if syscall_namespace_name not in self._arch.syscall_groups:
589 self._parser_state.error(
590 'nonexistent syscall group namespace',
591 token=syscall_descriptor)
592 syscall_namespace = self._arch.syscall_groups[
593 syscall_namespace_name]
594 if syscall_group_name not in syscall_namespace:
595 self._parser_state.error(
596 'nonexistent syscall group', token=syscall_descriptor)
597 return (Syscall(name, self._arch.syscalls[name])
598 for name in syscall_namespace[syscall_group_name])
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800599 if syscall_descriptor.value not in self._arch.syscalls:
600 self._parser_state.error(
601 'nonexistent syscall', token=syscall_descriptor)
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800602 return (Syscall(syscall_descriptor.value,
603 self._arch.syscalls[syscall_descriptor.value]), )
604
605 # filter-statement = '{' , syscall-descriptor , [ { ',', syscall-descriptor } ] , '}' ,
606 # ':' , filter
607 # | syscall-descriptor , ':' , filter
608 # ;
609 def parse_filter_statement(self, tokens):
610 """Parse a filter statement and return a ParsedFilterStatement."""
611 if not tokens:
612 self._parser_state.error('empty filter statement')
613 syscall_descriptors = []
614 if tokens[0].type == 'LBRACE':
615 opening_brace = tokens.pop(0)
616 while tokens:
617 syscall_descriptors.extend(
618 self._parse_syscall_descriptor(tokens))
619 if not tokens or tokens[0].type != 'COMMA':
620 break
621 tokens.pop(0)
622 if not tokens or tokens[0].type != 'RBRACE':
623 self._parser_state.error('unclosed brace', token=opening_brace)
624 tokens.pop(0)
625 else:
626 syscall_descriptors.extend(self._parse_syscall_descriptor(tokens))
627 if not tokens:
628 self._parser_state.error('missing colon')
629 if tokens[0].type != 'COLON':
630 self._parser_state.error('invalid colon', token=tokens[0])
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800631 # Given that there can be multiple syscalls and filters in a single
632 # filter statement, use the colon token as the anchor for error location
633 # purposes.
634 colon_token = tokens.pop(0)
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800635 parsed_filter = self.parse_filter(tokens)
636 if not syscall_descriptors:
637 return None
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800638 return ParsedFilterStatement(
639 tuple(syscall_descriptors), parsed_filter, colon_token)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800640
641 # include-statement = '@include' , posix-path
642 # ;
643 def _parse_include_statement(self, tokens):
644 if not tokens:
645 self._parser_state.error('empty filter statement')
646 if tokens[0].type != 'INCLUDE':
647 self._parser_state.error('invalid include', token=tokens[0])
648 tokens.pop(0)
649 if not tokens:
650 self._parser_state.error('empty include path')
651 include_path = tokens.pop(0)
652 if include_path.type != 'PATH':
653 self._parser_state.error(
654 'invalid include path', token=include_path)
655 if len(self._parser_states) == self._include_depth_limit:
656 self._parser_state.error('@include statement nested too deep')
657 include_filename = os.path.normpath(
658 os.path.join(
659 os.path.dirname(self._parser_state.filename),
660 include_path.value))
661 if not os.path.isfile(include_filename):
662 self._parser_state.error(
663 'Could not @include %s' % include_filename, token=include_path)
664 return self._parse_policy_file(include_filename)
665
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800666 def _parse_frequency_file(self, filename):
667 self._parser_states.append(ParserState(filename))
668 try:
669 frequency_mapping = collections.defaultdict(int)
670 with open(filename) as frequency_file:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700671 for tokens in self._parser_state.tokenize(frequency_file):
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800672 syscall_numbers = self._parse_syscall_descriptor(tokens)
673 if not tokens:
674 self._parser_state.error('missing colon')
675 if tokens[0].type != 'COLON':
676 self._parser_state.error(
677 'invalid colon', token=tokens[0])
678 tokens.pop(0)
679
680 if not tokens:
681 self._parser_state.error('missing number')
682 number = tokens.pop(0)
683 if number.type != 'NUMERIC_CONSTANT':
684 self._parser_state.error(
685 'invalid number', token=number)
686 number_value = int(number.value, base=0)
687 if number_value < 0:
688 self._parser_state.error(
689 'invalid number', token=number)
690
691 for syscall_number in syscall_numbers:
692 frequency_mapping[syscall_number] += number_value
693 return frequency_mapping
694 finally:
695 self._parser_states.pop()
696
697 # frequency-statement = '@frequency' , posix-path
698 # ;
699 def _parse_frequency_statement(self, tokens):
700 if not tokens:
701 self._parser_state.error('empty frequency statement')
702 if tokens[0].type != 'FREQUENCY':
703 self._parser_state.error('invalid frequency', token=tokens[0])
704 tokens.pop(0)
705 if not tokens:
706 self._parser_state.error('empty frequency path')
707 frequency_path = tokens.pop(0)
708 if frequency_path.type != 'PATH':
709 self._parser_state.error(
710 'invalid frequency path', token=frequency_path)
711 frequency_filename = os.path.normpath(
712 os.path.join(
713 os.path.dirname(self._parser_state.filename),
714 frequency_path.value))
715 if not os.path.isfile(frequency_filename):
716 self._parser_state.error(
717 'Could not open frequency file %s' % frequency_filename,
718 token=frequency_path)
719 return self._parse_frequency_file(frequency_filename)
720
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800721 # default-statement = '@default' , default-action
722 # ;
723 def _parse_default_statement(self, tokens):
724 if not tokens:
725 self._parser_state.error('empty default statement')
726 if tokens[0].type != 'DEFAULT':
727 self._parser_state.error('invalid default', token=tokens[0])
728 tokens.pop(0)
729 if not tokens:
730 self._parser_state.error('empty action')
731 return self._parse_default_action(tokens)
732
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800733 def _parse_policy_file(self, filename):
734 self._parser_states.append(ParserState(filename))
735 try:
736 statements = []
Nicole Anderson-Aucdc8af32021-07-30 15:24:04 +0000737 denylist_header = False
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800738 with open(filename) as policy_file:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700739 for tokens in self._parser_state.tokenize(policy_file):
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800740 if tokens[0].type == 'INCLUDE':
741 statements.extend(
742 self._parse_include_statement(tokens))
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800743 elif tokens[0].type == 'FREQUENCY':
744 for syscall_number, frequency in self._parse_frequency_statement(
745 tokens).items():
746 self._frequency_mapping[
747 syscall_number] += frequency
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800748 elif tokens[0].type == 'DEFAULT':
749 self._default_action = self._parse_default_statement(
750 tokens)
Nicole Anderson-Aucdc8af32021-07-30 15:24:04 +0000751 elif tokens[0].type == 'DENYLIST':
752 tokens.pop()
753 if not self._denylist:
754 self._parser_state.error('policy is denylist, but '
755 'flag --denylist not '
756 'passed in.')
757 else:
758 denylist_header = True
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800759 else:
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800760 statement = self.parse_filter_statement(tokens)
761 if statement is None:
762 # If all the syscalls in the statement are for
763 # another arch, skip the whole statement.
764 continue
765 statements.append(statement)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800766
767 if tokens:
768 self._parser_state.error(
769 'extra tokens', token=tokens[0])
Nicole Anderson-Aucdc8af32021-07-30 15:24:04 +0000770 if self._denylist and not denylist_header:
771 self._parser_state.error('policy must contain @denylist flag to'
772 ' be compiled with --denylist flag.')
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800773 return statements
774 finally:
775 self._parser_states.pop()
776
777 def parse_file(self, filename):
778 """Parse a file and return the list of FilterStatements."""
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800779 self._frequency_mapping = collections.defaultdict(int)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800780 try:
781 statements = [x for x in self._parse_policy_file(filename)]
782 except RecursionError:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700783 raise ParseException(
784 'recursion limit exceeded',
785 filename,
786 line=self._parser_states[-1].line)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800787
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800788 # Collapse statements into a single syscall-to-filter-list, remembering
789 # the token for each filter for better diagnostics.
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800790 syscall_filter_mapping = {}
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800791 syscall_filter_definitions = {}
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800792 filter_statements = []
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800793 for syscalls, filters, token in statements:
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800794 for syscall in syscalls:
795 if syscall not in syscall_filter_mapping:
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800796 filter_statements.append(
797 FilterStatement(
798 syscall, self._frequency_mapping.get(syscall, 1),
799 []))
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800800 syscall_filter_mapping[syscall] = filter_statements[-1]
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800801 syscall_filter_definitions[syscall] = []
802 for filt in filters:
803 syscall_filter_mapping[syscall].filters.append(filt)
804 syscall_filter_definitions[syscall].append(token)
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800805 default_action = self._override_default_action or self._default_action
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800806 for filter_statement in filter_statements:
807 unconditional_actions_suffix = list(
808 itertools.dropwhile(lambda filt: filt.expression is not None,
809 filter_statement.filters))
810 if len(unconditional_actions_suffix) == 1:
811 # The last filter already has an unconditional action, no need
812 # to add another one.
813 continue
814 if len(unconditional_actions_suffix) > 1:
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800815 previous_definition_token = syscall_filter_definitions[
816 filter_statement.syscall][
817 -len(unconditional_actions_suffix)]
818 current_definition_token = syscall_filter_definitions[
819 filter_statement.syscall][
820 -len(unconditional_actions_suffix) + 1]
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700821 raise ParseException(
822 ('Syscall %s (number %d) already had '
823 'an unconditional action applied') %
824 (filter_statement.syscall.name,
825 filter_statement.syscall.number),
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800826 filename=current_definition_token.filename,
827 token=current_definition_token) from ParseException(
828 'Previous definition',
829 filename=previous_definition_token.filename,
830 token=previous_definition_token)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800831 assert not unconditional_actions_suffix
832 filter_statement.filters.append(
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800833 Filter(expression=None, action=default_action))
834 return ParsedPolicy(default_action, filter_statements)