blob: 9e8dcd5fdc55b3287208ebdfcae6d3921ce260fa [file] [log] [blame]
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -08001#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# Copyright (C) 2018 The Android Open Source Project
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17"""A parser for the Minijail policy file."""
18
19from __future__ import absolute_import
20from __future__ import division
21from __future__ import print_function
22
23import collections
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -080024import itertools
25import os.path
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080026import re
27
Matt Delcoa12687b2020-02-07 17:12:47 -080028try:
29 import bpf
30except ImportError:
31 from minijail import bpf
32
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -080033
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070034Token = collections.namedtuple(
Luis Hector Chavez4228eff2019-12-11 19:07:13 -080035 'Token', ['type', 'value', 'filename', 'line', 'line_number', 'column'])
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080036
37# A regex that can tokenize a Minijail policy file line.
38_TOKEN_SPECIFICATION = (
39 ('COMMENT', r'#.*$'),
40 ('WHITESPACE', r'\s+'),
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070041 ('CONTINUATION', r'\\$'),
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080042 ('DEFAULT', r'@default\b'),
43 ('INCLUDE', r'@include\b'),
44 ('FREQUENCY', r'@frequency\b'),
Nicole Anderson-Aucdc8af32021-07-30 15:24:04 +000045 ('DENYLIST', r'@denylist$'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080046 ('PATH', r'(?:\.)?/\S+'),
47 ('NUMERIC_CONSTANT', r'-?0[xX][0-9a-fA-F]+|-?0[Oo][0-7]+|-?[0-9]+'),
48 ('COLON', r':'),
49 ('SEMICOLON', r';'),
50 ('COMMA', r','),
51 ('BITWISE_COMPLEMENT', r'~'),
52 ('LPAREN', r'\('),
53 ('RPAREN', r'\)'),
54 ('LBRACE', r'\{'),
55 ('RBRACE', r'\}'),
56 ('RBRACKET', r'\]'),
57 ('LBRACKET', r'\['),
58 ('OR', r'\|\|'),
59 ('AND', r'&&'),
60 ('BITWISE_OR', r'\|'),
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080061 ('OP', r'&|\bin\b|==|!=|<=|<|>=|>'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080062 ('EQUAL', r'='),
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080063 ('ARGUMENT', r'\barg[0-9]+\b'),
64 ('RETURN', r'\breturn\b'),
65 ('ACTION',
66 r'\ballow\b|\bkill-process\b|\bkill-thread\b|\bkill\b|\btrap\b|'
Luis Héctor Chávez59a64492021-01-03 05:46:47 -080067 r'\btrace\b|\blog\b|\buser-notify\b'
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080068 ),
Luis Hector Chavez524da3b2019-03-05 16:44:08 -080069 ('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9-@]*'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080070)
71_TOKEN_RE = re.compile('|'.join(
72 r'(?P<%s>%s)' % pair for pair in _TOKEN_SPECIFICATION))
73
74
75class ParseException(Exception):
76 """An exception that is raised when parsing fails."""
77
78 # pylint: disable=too-many-arguments
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070079 def __init__(self,
80 message,
81 filename,
82 *,
83 line='',
84 line_number=1,
85 token=None):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080086 if token:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070087 line = token.line
88 line_number = token.line_number
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080089 column = token.column
90 length = len(token.value)
91 else:
92 column = len(line)
93 length = 1
94
95 message = ('%s(%d:%d): %s') % (filename, line_number, column + 1,
96 message)
97 message += '\n %s' % line
98 message += '\n %s%s' % (' ' * column, '^' * length)
99 super().__init__(message)
100
101
102class ParserState:
103 """Stores the state of the Parser to provide better diagnostics."""
104
105 def __init__(self, filename):
106 self._filename = filename
107 self._line = ''
108 self._line_number = 0
109
110 @property
111 def filename(self):
112 """Return the name of the file being processed."""
113 return self._filename
114
115 @property
116 def line(self):
117 """Return the current line being processed."""
118 return self._line
119
120 @property
121 def line_number(self):
122 """Return the current line number being processed."""
123 return self._line_number
124
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800125 def error(self, message, token=None):
126 """Raise a ParserException with the provided message."""
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700127 raise ParseException(
128 message,
129 self.filename,
130 line=self._line,
131 line_number=self._line_number,
132 token=token)
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800133
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700134 def tokenize(self, lines):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800135 """Return a list of tokens for the current line."""
136 tokens = []
137
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700138 for line_number, line in enumerate(lines):
139 self._line_number = line_number + 1
140 self._line = line.rstrip('\r\n')
141
142 last_end = 0
143 for token in _TOKEN_RE.finditer(self._line):
144 if token.start() != last_end:
145 self.error(
146 'invalid token',
147 token=Token('INVALID',
148 self._line[last_end:token.start()],
149 self.filename, self._line,
150 self._line_number, last_end))
151 last_end = token.end()
152
153 # Omit whitespace and comments now to avoid sprinkling this logic
154 # elsewhere.
155 if token.lastgroup in ('WHITESPACE', 'COMMENT',
156 'CONTINUATION'):
157 continue
158 tokens.append(
159 Token(token.lastgroup, token.group(), self.filename,
160 self._line, self._line_number, token.start()))
161 if last_end != len(self._line):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800162 self.error(
163 'invalid token',
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700164 token=Token('INVALID', self._line[last_end:],
165 self.filename, self._line, self._line_number,
166 last_end))
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800167
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700168 if self._line.endswith('\\'):
169 # This line is not finished yet.
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800170 continue
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700171
172 if tokens:
173 # Return a copy of the token list so that the caller can be free
174 # to modify it.
175 yield tokens[::]
176 tokens.clear()
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800177
178
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800179Atom = collections.namedtuple('Atom', ['argument_index', 'op', 'value'])
180"""A single boolean comparison within a filter expression."""
181
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800182Filter = collections.namedtuple('Filter', ['expression', 'action'])
183"""The result of parsing a DNF filter expression, with its action.
184
185Since the expression is in Disjunctive Normal Form, it is composed of two levels
186of lists, one for disjunctions and the inner one for conjunctions. The elements
187of the inner list are Atoms.
188"""
189
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800190Syscall = collections.namedtuple('Syscall', ['name', 'number'])
191"""A system call."""
192
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800193ParsedFilterStatement = collections.namedtuple(
194 'ParsedFilterStatement', ['syscalls', 'filters', 'token'])
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800195"""The result of parsing a filter statement.
196
197Statements have a list of syscalls, and an associated list of filters that will
198be evaluated sequentially when any of the syscalls is invoked.
199"""
200
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800201FilterStatement = collections.namedtuple('FilterStatement',
202 ['syscall', 'frequency', 'filters'])
203"""The filter list for a particular syscall.
204
205This is a mapping from one syscall to a list of filters that are evaluated
206sequentially. The last filter is always an unconditional action.
207"""
208
209ParsedPolicy = collections.namedtuple('ParsedPolicy',
210 ['default_action', 'filter_statements'])
211"""The result of parsing a minijail .policy file."""
212
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800213
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800214# pylint: disable=too-few-public-methods
215class PolicyParser:
216 """A parser for the Minijail seccomp policy file format."""
217
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800218 def __init__(self,
219 arch,
220 *,
221 kill_action,
222 include_depth_limit=10,
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000223 override_default_action=None,
224 denylist=False):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800225 self._parser_states = [ParserState("<memory>")]
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800226 self._kill_action = kill_action
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800227 self._include_depth_limit = include_depth_limit
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000228 if denylist:
229 self._default_action = bpf.Allow()
230 else:
231 self._default_action = self._kill_action
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800232 self._override_default_action = override_default_action
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800233 self._frequency_mapping = collections.defaultdict(int)
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800234 self._arch = arch
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000235 self._denylist = denylist
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800236
237 @property
238 def _parser_state(self):
239 return self._parser_states[-1]
240
241 # single-constant = identifier
242 # | numeric-constant
243 # ;
244 def _parse_single_constant(self, token):
245 if token.type == 'IDENTIFIER':
246 if token.value not in self._arch.constants:
247 self._parser_state.error('invalid constant', token=token)
248 single_constant = self._arch.constants[token.value]
249 elif token.type == 'NUMERIC_CONSTANT':
250 try:
251 single_constant = int(token.value, base=0)
252 except ValueError:
253 self._parser_state.error('invalid constant', token=token)
254 else:
255 self._parser_state.error('invalid constant', token=token)
256 if single_constant > self._arch.max_unsigned:
257 self._parser_state.error('unsigned overflow', token=token)
258 elif single_constant < self._arch.min_signed:
259 self._parser_state.error('signed underflow', token=token)
260 elif single_constant < 0:
261 # This converts the constant to an unsigned representation of the
262 # same value, since BPF only uses unsigned values.
263 single_constant = self._arch.truncate_word(single_constant)
264 return single_constant
265
266 # constant = [ '~' ] , '(' , value , ')'
267 # | [ '~' ] , single-constant
268 # ;
269 def _parse_constant(self, tokens):
270 negate = False
271 if tokens[0].type == 'BITWISE_COMPLEMENT':
272 negate = True
273 tokens.pop(0)
274 if not tokens:
275 self._parser_state.error('empty complement')
276 if tokens[0].type == 'BITWISE_COMPLEMENT':
277 self._parser_state.error(
278 'invalid double complement', token=tokens[0])
279 if tokens[0].type == 'LPAREN':
280 last_open_paren = tokens.pop(0)
281 single_value = self.parse_value(tokens)
282 if not tokens or tokens[0].type != 'RPAREN':
283 self._parser_state.error(
284 'unclosed parenthesis', token=last_open_paren)
285 else:
286 single_value = self._parse_single_constant(tokens[0])
287 tokens.pop(0)
288 if negate:
289 single_value = self._arch.truncate_word(~single_value)
290 return single_value
291
292 # value = constant , [ { '|' , constant } ]
293 # ;
294 def parse_value(self, tokens):
295 """Parse constants separated bitwise OR operator |.
296
297 Constants can be:
298
299 - A number that can be parsed with int(..., base=0)
300 - A named constant expression.
301 - A parenthesized, valid constant expression.
302 - A valid constant expression prefixed with the unary bitwise
303 complement operator ~.
304 - A series of valid constant expressions separated by bitwise
305 OR operator |.
306
307 If there is an error parsing any of the constants, the whole process
308 fails.
309 """
310
311 value = 0
312 while tokens:
313 value |= self._parse_constant(tokens)
314 if not tokens or tokens[0].type != 'BITWISE_OR':
315 break
316 tokens.pop(0)
317 else:
318 self._parser_state.error('empty constant')
319 return value
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800320
321 # atom = argument , op , value
322 # ;
323 def _parse_atom(self, tokens):
324 if not tokens:
325 self._parser_state.error('missing argument')
326 argument = tokens.pop(0)
327 if argument.type != 'ARGUMENT':
328 self._parser_state.error('invalid argument', token=argument)
329
330 if not tokens:
331 self._parser_state.error('missing operator')
332 operator = tokens.pop(0)
333 if operator.type != 'OP':
334 self._parser_state.error('invalid operator', token=operator)
335
336 value = self.parse_value(tokens)
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800337 argument_index = int(argument.value[3:])
338 if not (0 <= argument_index < bpf.MAX_SYSCALL_ARGUMENTS):
339 self._parser_state.error('invalid argument', token=argument)
340 return Atom(argument_index, operator.value, value)
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800341
342 # clause = atom , [ { '&&' , atom } ]
343 # ;
344 def _parse_clause(self, tokens):
345 atoms = []
346 while tokens:
347 atoms.append(self._parse_atom(tokens))
348 if not tokens or tokens[0].type != 'AND':
349 break
350 tokens.pop(0)
351 else:
352 self._parser_state.error('empty clause')
353 return atoms
354
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800355 # argument-expression = clause , [ { '||' , clause } ]
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800356 # ;
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800357 def parse_argument_expression(self, tokens):
358 """Parse a argument expression in Disjunctive Normal Form.
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800359
360 Since BPF disallows back jumps, we build the basic blocks in reverse
361 order so that all the jump targets are known by the time we need to
362 reference them.
363 """
364
365 clauses = []
366 while tokens:
367 clauses.append(self._parse_clause(tokens))
368 if not tokens or tokens[0].type != 'OR':
369 break
370 tokens.pop(0)
371 else:
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800372 self._parser_state.error('empty argument expression')
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800373 return clauses
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800374
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800375 # default-action = 'kill-process'
376 # | 'kill-thread'
377 # | 'kill'
378 # | 'trap'
Luis Héctor Chávez59a64492021-01-03 05:46:47 -0800379 # | 'user-notify'
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800380 # ;
381 def _parse_default_action(self, tokens):
382 if not tokens:
383 self._parser_state.error('missing default action')
384 action_token = tokens.pop(0)
385 if action_token.type != 'ACTION':
386 return self._parser_state.error(
387 'invalid default action', token=action_token)
388 if action_token.value == 'kill-process':
389 return bpf.KillProcess()
390 if action_token.value == 'kill-thread':
391 return bpf.KillThread()
392 if action_token.value == 'kill':
393 return self._kill_action
394 if action_token.value == 'trap':
395 return bpf.Trap()
Luis Héctor Chávez59a64492021-01-03 05:46:47 -0800396 if action_token.value == 'user-notify':
397 return bpf.UserNotify()
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800398 return self._parser_state.error(
399 'invalid permissive default action', token=action_token)
400
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800401 # action = 'allow' | '1'
402 # | 'kill-process'
403 # | 'kill-thread'
Nicole Anderson-Aucdc8af32021-07-30 15:24:04 +0000404 # | 'kill'
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800405 # | 'trap'
406 # | 'trace'
407 # | 'log'
Luis Héctor Chávez59a64492021-01-03 05:46:47 -0800408 # | 'user-notify'
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800409 # | 'return' , single-constant
410 # ;
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800411 def parse_action(self, tokens):
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800412 if not tokens:
413 self._parser_state.error('missing action')
414 action_token = tokens.pop(0)
Nicole Anderson-Aucdc8af32021-07-30 15:24:04 +0000415 # denylist policies must specify a return for every line.
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000416 if self._denylist:
Nicole Anderson-Aucdc8af32021-07-30 15:24:04 +0000417 if action_token.type != 'RETURN':
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000418 self._parser_state.error('invalid denylist policy')
419
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800420 if action_token.type == 'ACTION':
421 if action_token.value == 'allow':
422 return bpf.Allow()
423 if action_token.value == 'kill':
424 return self._kill_action
425 if action_token.value == 'kill-process':
426 return bpf.KillProcess()
427 if action_token.value == 'kill-thread':
428 return bpf.KillThread()
429 if action_token.value == 'trap':
430 return bpf.Trap()
431 if action_token.value == 'trace':
432 return bpf.Trace()
Luis Héctor Chávez59a64492021-01-03 05:46:47 -0800433 if action_token.value == 'user-notify':
434 return bpf.UserNotify()
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800435 if action_token.value == 'log':
436 return bpf.Log()
437 elif action_token.type == 'NUMERIC_CONSTANT':
438 constant = self._parse_single_constant(action_token)
439 if constant == 1:
440 return bpf.Allow()
441 elif action_token.type == 'RETURN':
442 if not tokens:
443 self._parser_state.error('missing return value')
444 return bpf.ReturnErrno(self._parse_single_constant(tokens.pop(0)))
445 return self._parser_state.error('invalid action', token=action_token)
446
447 # single-filter = action
448 # | argument-expression , [ ';' , action ]
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000449 # | '!','(', argument-expression, [ ';', action ], ')'
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800450 # ;
451 def _parse_single_filter(self, tokens):
452 if not tokens:
453 self._parser_state.error('missing filter')
454 if tokens[0].type == 'ARGUMENT':
Nicole Anderson-Aucdc8af32021-07-30 15:24:04 +0000455 # Only argument expressions can start with an ARGUMENT token.
456 argument_expression = self.parse_argument_expression(tokens)
457 if tokens and tokens[0].type == 'SEMICOLON':
458 tokens.pop(0)
459 action = self.parse_action(tokens)
460 else:
461 action = bpf.Allow()
462 return Filter(argument_expression, action)
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800463 else:
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800464 return Filter(None, self.parse_action(tokens))
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800465
466 # filter = '{' , single-filter , [ { ',' , single-filter } ] , '}'
467 # | single-filter
468 # ;
469 def parse_filter(self, tokens):
470 """Parse a filter and return a list of Filter objects."""
471 if not tokens:
472 self._parser_state.error('missing filter')
473 filters = []
474 if tokens[0].type == 'LBRACE':
475 opening_brace = tokens.pop(0)
476 while tokens:
477 filters.append(self._parse_single_filter(tokens))
478 if not tokens or tokens[0].type != 'COMMA':
479 break
480 tokens.pop(0)
481 if not tokens or tokens[0].type != 'RBRACE':
482 self._parser_state.error('unclosed brace', token=opening_brace)
483 tokens.pop(0)
484 else:
485 filters.append(self._parse_single_filter(tokens))
486 return filters
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800487
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800488 # key-value-pair = identifier , '=', identifier , [ { ',' , identifier } ]
489 # ;
490 def _parse_key_value_pair(self, tokens):
491 if not tokens:
492 self._parser_state.error('missing key')
493 key = tokens.pop(0)
494 if key.type != 'IDENTIFIER':
495 self._parser_state.error('invalid key', token=key)
496 if not tokens:
497 self._parser_state.error('missing equal')
498 if tokens[0].type != 'EQUAL':
499 self._parser_state.error('invalid equal', token=tokens[0])
500 tokens.pop(0)
501 value_list = []
502 while tokens:
503 value = tokens.pop(0)
504 if value.type != 'IDENTIFIER':
505 self._parser_state.error('invalid value', token=value)
506 value_list.append(value.value)
507 if not tokens or tokens[0].type != 'COMMA':
508 break
509 tokens.pop(0)
510 else:
511 self._parser_state.error('empty value')
512 return (key.value, value_list)
513
514 # metadata = '[' , key-value-pair , [ { ';' , key-value-pair } ] , ']'
515 # ;
516 def _parse_metadata(self, tokens):
517 if not tokens:
518 self._parser_state.error('missing opening bracket')
519 opening_bracket = tokens.pop(0)
520 if opening_bracket.type != 'LBRACKET':
521 self._parser_state.error(
522 'invalid opening bracket', token=opening_bracket)
523 metadata = {}
524 while tokens:
525 first_token = tokens[0]
526 key, value = self._parse_key_value_pair(tokens)
527 if key in metadata:
528 self._parser_state.error(
529 'duplicate metadata key: "%s"' % key, token=first_token)
530 metadata[key] = value
531 if not tokens or tokens[0].type != 'SEMICOLON':
532 break
533 tokens.pop(0)
534 if not tokens or tokens[0].type != 'RBRACKET':
535 self._parser_state.error('unclosed bracket', token=opening_bracket)
536 tokens.pop(0)
537 return metadata
538
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800539 # syscall-descriptor = syscall-name , [ metadata ]
Luis Hector Chavez524da3b2019-03-05 16:44:08 -0800540 # | syscall-group-name , [ metadata ]
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800541 # ;
542 def _parse_syscall_descriptor(self, tokens):
543 if not tokens:
544 self._parser_state.error('missing syscall descriptor')
545 syscall_descriptor = tokens.pop(0)
Matt Delcof68fc8d2019-11-14 16:47:52 -0800546 # `kill` as a syscall name is a special case since kill is also a valid
547 # action and actions have precendence over identifiers.
548 if (syscall_descriptor.type != 'IDENTIFIER' and
549 syscall_descriptor.value != 'kill'):
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800550 self._parser_state.error(
551 'invalid syscall descriptor', token=syscall_descriptor)
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800552 if tokens and tokens[0].type == 'LBRACKET':
553 metadata = self._parse_metadata(tokens)
554 if 'arch' in metadata and self._arch.arch_name not in metadata['arch']:
555 return ()
Luis Hector Chavez524da3b2019-03-05 16:44:08 -0800556 if '@' in syscall_descriptor.value:
557 # This is a syscall group.
558 subtokens = syscall_descriptor.value.split('@')
559 if len(subtokens) != 2:
560 self._parser_state.error(
561 'invalid syscall group name', token=syscall_descriptor)
562 syscall_group_name, syscall_namespace_name = subtokens
563 if syscall_namespace_name not in self._arch.syscall_groups:
564 self._parser_state.error(
565 'nonexistent syscall group namespace',
566 token=syscall_descriptor)
567 syscall_namespace = self._arch.syscall_groups[
568 syscall_namespace_name]
569 if syscall_group_name not in syscall_namespace:
570 self._parser_state.error(
571 'nonexistent syscall group', token=syscall_descriptor)
572 return (Syscall(name, self._arch.syscalls[name])
573 for name in syscall_namespace[syscall_group_name])
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800574 if syscall_descriptor.value not in self._arch.syscalls:
575 self._parser_state.error(
576 'nonexistent syscall', token=syscall_descriptor)
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800577 return (Syscall(syscall_descriptor.value,
578 self._arch.syscalls[syscall_descriptor.value]), )
579
580 # filter-statement = '{' , syscall-descriptor , [ { ',', syscall-descriptor } ] , '}' ,
581 # ':' , filter
582 # | syscall-descriptor , ':' , filter
583 # ;
584 def parse_filter_statement(self, tokens):
585 """Parse a filter statement and return a ParsedFilterStatement."""
586 if not tokens:
587 self._parser_state.error('empty filter statement')
588 syscall_descriptors = []
589 if tokens[0].type == 'LBRACE':
590 opening_brace = tokens.pop(0)
591 while tokens:
592 syscall_descriptors.extend(
593 self._parse_syscall_descriptor(tokens))
594 if not tokens or tokens[0].type != 'COMMA':
595 break
596 tokens.pop(0)
597 if not tokens or tokens[0].type != 'RBRACE':
598 self._parser_state.error('unclosed brace', token=opening_brace)
599 tokens.pop(0)
600 else:
601 syscall_descriptors.extend(self._parse_syscall_descriptor(tokens))
602 if not tokens:
603 self._parser_state.error('missing colon')
604 if tokens[0].type != 'COLON':
605 self._parser_state.error('invalid colon', token=tokens[0])
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800606 # Given that there can be multiple syscalls and filters in a single
607 # filter statement, use the colon token as the anchor for error location
608 # purposes.
609 colon_token = tokens.pop(0)
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800610 parsed_filter = self.parse_filter(tokens)
611 if not syscall_descriptors:
612 return None
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800613 return ParsedFilterStatement(
614 tuple(syscall_descriptors), parsed_filter, colon_token)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800615
616 # include-statement = '@include' , posix-path
617 # ;
618 def _parse_include_statement(self, tokens):
619 if not tokens:
620 self._parser_state.error('empty filter statement')
621 if tokens[0].type != 'INCLUDE':
622 self._parser_state.error('invalid include', token=tokens[0])
623 tokens.pop(0)
624 if not tokens:
625 self._parser_state.error('empty include path')
626 include_path = tokens.pop(0)
627 if include_path.type != 'PATH':
628 self._parser_state.error(
629 'invalid include path', token=include_path)
630 if len(self._parser_states) == self._include_depth_limit:
631 self._parser_state.error('@include statement nested too deep')
632 include_filename = os.path.normpath(
633 os.path.join(
634 os.path.dirname(self._parser_state.filename),
635 include_path.value))
636 if not os.path.isfile(include_filename):
637 self._parser_state.error(
638 'Could not @include %s' % include_filename, token=include_path)
639 return self._parse_policy_file(include_filename)
640
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800641 def _parse_frequency_file(self, filename):
642 self._parser_states.append(ParserState(filename))
643 try:
644 frequency_mapping = collections.defaultdict(int)
645 with open(filename) as frequency_file:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700646 for tokens in self._parser_state.tokenize(frequency_file):
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800647 syscall_numbers = self._parse_syscall_descriptor(tokens)
648 if not tokens:
649 self._parser_state.error('missing colon')
650 if tokens[0].type != 'COLON':
651 self._parser_state.error(
652 'invalid colon', token=tokens[0])
653 tokens.pop(0)
654
655 if not tokens:
656 self._parser_state.error('missing number')
657 number = tokens.pop(0)
658 if number.type != 'NUMERIC_CONSTANT':
659 self._parser_state.error(
660 'invalid number', token=number)
661 number_value = int(number.value, base=0)
662 if number_value < 0:
663 self._parser_state.error(
664 'invalid number', token=number)
665
666 for syscall_number in syscall_numbers:
667 frequency_mapping[syscall_number] += number_value
668 return frequency_mapping
669 finally:
670 self._parser_states.pop()
671
672 # frequency-statement = '@frequency' , posix-path
673 # ;
674 def _parse_frequency_statement(self, tokens):
675 if not tokens:
676 self._parser_state.error('empty frequency statement')
677 if tokens[0].type != 'FREQUENCY':
678 self._parser_state.error('invalid frequency', token=tokens[0])
679 tokens.pop(0)
680 if not tokens:
681 self._parser_state.error('empty frequency path')
682 frequency_path = tokens.pop(0)
683 if frequency_path.type != 'PATH':
684 self._parser_state.error(
685 'invalid frequency path', token=frequency_path)
686 frequency_filename = os.path.normpath(
687 os.path.join(
688 os.path.dirname(self._parser_state.filename),
689 frequency_path.value))
690 if not os.path.isfile(frequency_filename):
691 self._parser_state.error(
692 'Could not open frequency file %s' % frequency_filename,
693 token=frequency_path)
694 return self._parse_frequency_file(frequency_filename)
695
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800696 # default-statement = '@default' , default-action
697 # ;
698 def _parse_default_statement(self, tokens):
699 if not tokens:
700 self._parser_state.error('empty default statement')
701 if tokens[0].type != 'DEFAULT':
702 self._parser_state.error('invalid default', token=tokens[0])
703 tokens.pop(0)
704 if not tokens:
705 self._parser_state.error('empty action')
706 return self._parse_default_action(tokens)
707
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800708 def _parse_policy_file(self, filename):
709 self._parser_states.append(ParserState(filename))
710 try:
711 statements = []
Nicole Anderson-Aucdc8af32021-07-30 15:24:04 +0000712 denylist_header = False
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800713 with open(filename) as policy_file:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700714 for tokens in self._parser_state.tokenize(policy_file):
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800715 if tokens[0].type == 'INCLUDE':
716 statements.extend(
717 self._parse_include_statement(tokens))
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800718 elif tokens[0].type == 'FREQUENCY':
719 for syscall_number, frequency in self._parse_frequency_statement(
720 tokens).items():
721 self._frequency_mapping[
722 syscall_number] += frequency
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800723 elif tokens[0].type == 'DEFAULT':
724 self._default_action = self._parse_default_statement(
725 tokens)
Nicole Anderson-Aucdc8af32021-07-30 15:24:04 +0000726 elif tokens[0].type == 'DENYLIST':
727 tokens.pop()
728 if not self._denylist:
729 self._parser_state.error('policy is denylist, but '
730 'flag --denylist not '
731 'passed in.')
732 else:
733 denylist_header = True
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800734 else:
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800735 statement = self.parse_filter_statement(tokens)
736 if statement is None:
737 # If all the syscalls in the statement are for
738 # another arch, skip the whole statement.
739 continue
740 statements.append(statement)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800741
742 if tokens:
743 self._parser_state.error(
744 'extra tokens', token=tokens[0])
Nicole Anderson-Aucdc8af32021-07-30 15:24:04 +0000745 if self._denylist and not denylist_header:
746 self._parser_state.error('policy must contain @denylist flag to'
747 ' be compiled with --denylist flag.')
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800748 return statements
749 finally:
750 self._parser_states.pop()
751
752 def parse_file(self, filename):
753 """Parse a file and return the list of FilterStatements."""
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800754 self._frequency_mapping = collections.defaultdict(int)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800755 try:
756 statements = [x for x in self._parse_policy_file(filename)]
757 except RecursionError:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700758 raise ParseException(
759 'recursion limit exceeded',
760 filename,
761 line=self._parser_states[-1].line)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800762
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800763 # Collapse statements into a single syscall-to-filter-list, remembering
764 # the token for each filter for better diagnostics.
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800765 syscall_filter_mapping = {}
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800766 syscall_filter_definitions = {}
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800767 filter_statements = []
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800768 for syscalls, filters, token in statements:
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800769 for syscall in syscalls:
770 if syscall not in syscall_filter_mapping:
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800771 filter_statements.append(
772 FilterStatement(
773 syscall, self._frequency_mapping.get(syscall, 1),
774 []))
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800775 syscall_filter_mapping[syscall] = filter_statements[-1]
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800776 syscall_filter_definitions[syscall] = []
777 for filt in filters:
778 syscall_filter_mapping[syscall].filters.append(filt)
779 syscall_filter_definitions[syscall].append(token)
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800780 default_action = self._override_default_action or self._default_action
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800781 for filter_statement in filter_statements:
782 unconditional_actions_suffix = list(
783 itertools.dropwhile(lambda filt: filt.expression is not None,
784 filter_statement.filters))
785 if len(unconditional_actions_suffix) == 1:
786 # The last filter already has an unconditional action, no need
787 # to add another one.
788 continue
789 if len(unconditional_actions_suffix) > 1:
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800790 previous_definition_token = syscall_filter_definitions[
791 filter_statement.syscall][
792 -len(unconditional_actions_suffix)]
793 current_definition_token = syscall_filter_definitions[
794 filter_statement.syscall][
795 -len(unconditional_actions_suffix) + 1]
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700796 raise ParseException(
797 ('Syscall %s (number %d) already had '
798 'an unconditional action applied') %
799 (filter_statement.syscall.name,
800 filter_statement.syscall.number),
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800801 filename=current_definition_token.filename,
802 token=current_definition_token) from ParseException(
803 'Previous definition',
804 filename=previous_definition_token.filename,
805 token=previous_definition_token)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800806 assert not unconditional_actions_suffix
807 filter_statement.filters.append(
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800808 Filter(expression=None, action=default_action))
809 return ParsedPolicy(default_action, filter_statements)