blob: b7e5f69276e11b288b5dc2293e5de19065690f38 [file] [log] [blame]
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -08001#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# Copyright (C) 2018 The Android Open Source Project
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17"""A parser for the Minijail policy file."""
18
19from __future__ import absolute_import
20from __future__ import division
21from __future__ import print_function
22
23import collections
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -080024import itertools
25import os.path
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080026import re
27
Matt Delcoa12687b2020-02-07 17:12:47 -080028try:
29 import bpf
30except ImportError:
31 from minijail import bpf
32
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -080033
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070034Token = collections.namedtuple(
Luis Hector Chavez4228eff2019-12-11 19:07:13 -080035 'Token', ['type', 'value', 'filename', 'line', 'line_number', 'column'])
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080036
37# A regex that can tokenize a Minijail policy file line.
38_TOKEN_SPECIFICATION = (
39 ('COMMENT', r'#.*$'),
40 ('WHITESPACE', r'\s+'),
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070041 ('CONTINUATION', r'\\$'),
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080042 ('DEFAULT', r'@default\b'),
43 ('INCLUDE', r'@include\b'),
44 ('FREQUENCY', r'@frequency\b'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080045 ('PATH', r'(?:\.)?/\S+'),
46 ('NUMERIC_CONSTANT', r'-?0[xX][0-9a-fA-F]+|-?0[Oo][0-7]+|-?[0-9]+'),
47 ('COLON', r':'),
48 ('SEMICOLON', r';'),
49 ('COMMA', r','),
50 ('BITWISE_COMPLEMENT', r'~'),
51 ('LPAREN', r'\('),
52 ('RPAREN', r'\)'),
53 ('LBRACE', r'\{'),
54 ('RBRACE', r'\}'),
55 ('RBRACKET', r'\]'),
56 ('LBRACKET', r'\['),
57 ('OR', r'\|\|'),
58 ('AND', r'&&'),
59 ('BITWISE_OR', r'\|'),
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080060 ('OP', r'&|\bin\b|==|!=|<=|<|>=|>'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080061 ('EQUAL', r'='),
Nicole Anderson-Au4820a382021-06-18 20:16:14 +000062 ('NOT', r'!'),
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080063 ('ARGUMENT', r'\barg[0-9]+\b'),
64 ('RETURN', r'\breturn\b'),
65 ('ACTION',
66 r'\ballow\b|\bkill-process\b|\bkill-thread\b|\bkill\b|\btrap\b|'
Luis Héctor Chávez59a64492021-01-03 05:46:47 -080067 r'\btrace\b|\blog\b|\buser-notify\b'
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080068 ),
Luis Hector Chavez524da3b2019-03-05 16:44:08 -080069 ('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9-@]*'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080070)
71_TOKEN_RE = re.compile('|'.join(
72 r'(?P<%s>%s)' % pair for pair in _TOKEN_SPECIFICATION))
73
74
75class ParseException(Exception):
76 """An exception that is raised when parsing fails."""
77
78 # pylint: disable=too-many-arguments
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070079 def __init__(self,
80 message,
81 filename,
82 *,
83 line='',
84 line_number=1,
85 token=None):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080086 if token:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070087 line = token.line
88 line_number = token.line_number
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080089 column = token.column
90 length = len(token.value)
91 else:
92 column = len(line)
93 length = 1
94
95 message = ('%s(%d:%d): %s') % (filename, line_number, column + 1,
96 message)
97 message += '\n %s' % line
98 message += '\n %s%s' % (' ' * column, '^' * length)
99 super().__init__(message)
100
101
102class ParserState:
103 """Stores the state of the Parser to provide better diagnostics."""
104
105 def __init__(self, filename):
106 self._filename = filename
107 self._line = ''
108 self._line_number = 0
109
110 @property
111 def filename(self):
112 """Return the name of the file being processed."""
113 return self._filename
114
115 @property
116 def line(self):
117 """Return the current line being processed."""
118 return self._line
119
120 @property
121 def line_number(self):
122 """Return the current line number being processed."""
123 return self._line_number
124
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800125 def error(self, message, token=None):
126 """Raise a ParserException with the provided message."""
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700127 raise ParseException(
128 message,
129 self.filename,
130 line=self._line,
131 line_number=self._line_number,
132 token=token)
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800133
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700134 def tokenize(self, lines):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800135 """Return a list of tokens for the current line."""
136 tokens = []
137
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700138 for line_number, line in enumerate(lines):
139 self._line_number = line_number + 1
140 self._line = line.rstrip('\r\n')
141
142 last_end = 0
143 for token in _TOKEN_RE.finditer(self._line):
144 if token.start() != last_end:
145 self.error(
146 'invalid token',
147 token=Token('INVALID',
148 self._line[last_end:token.start()],
149 self.filename, self._line,
150 self._line_number, last_end))
151 last_end = token.end()
152
153 # Omit whitespace and comments now to avoid sprinkling this logic
154 # elsewhere.
155 if token.lastgroup in ('WHITESPACE', 'COMMENT',
156 'CONTINUATION'):
157 continue
158 tokens.append(
159 Token(token.lastgroup, token.group(), self.filename,
160 self._line, self._line_number, token.start()))
161 if last_end != len(self._line):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800162 self.error(
163 'invalid token',
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700164 token=Token('INVALID', self._line[last_end:],
165 self.filename, self._line, self._line_number,
166 last_end))
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800167
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700168 if self._line.endswith('\\'):
169 # This line is not finished yet.
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800170 continue
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700171
172 if tokens:
173 # Return a copy of the token list so that the caller can be free
174 # to modify it.
175 yield tokens[::]
176 tokens.clear()
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800177
178
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800179Atom = collections.namedtuple('Atom', ['argument_index', 'op', 'value'])
180"""A single boolean comparison within a filter expression."""
181
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800182Filter = collections.namedtuple('Filter', ['expression', 'action'])
183"""The result of parsing a DNF filter expression, with its action.
184
185Since the expression is in Disjunctive Normal Form, it is composed of two levels
186of lists, one for disjunctions and the inner one for conjunctions. The elements
187of the inner list are Atoms.
188"""
189
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800190Syscall = collections.namedtuple('Syscall', ['name', 'number'])
191"""A system call."""
192
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800193ParsedFilterStatement = collections.namedtuple(
194 'ParsedFilterStatement', ['syscalls', 'filters', 'token'])
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800195"""The result of parsing a filter statement.
196
197Statements have a list of syscalls, and an associated list of filters that will
198be evaluated sequentially when any of the syscalls is invoked.
199"""
200
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800201FilterStatement = collections.namedtuple('FilterStatement',
202 ['syscall', 'frequency', 'filters'])
203"""The filter list for a particular syscall.
204
205This is a mapping from one syscall to a list of filters that are evaluated
206sequentially. The last filter is always an unconditional action.
207"""
208
209ParsedPolicy = collections.namedtuple('ParsedPolicy',
210 ['default_action', 'filter_statements'])
211"""The result of parsing a minijail .policy file."""
212
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800213
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800214# pylint: disable=too-few-public-methods
215class PolicyParser:
216 """A parser for the Minijail seccomp policy file format."""
217
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800218 def __init__(self,
219 arch,
220 *,
221 kill_action,
222 include_depth_limit=10,
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000223 override_default_action=None,
224 denylist=False):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800225 self._parser_states = [ParserState("<memory>")]
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800226 self._kill_action = kill_action
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800227 self._include_depth_limit = include_depth_limit
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000228 if denylist:
229 self._default_action = bpf.Allow()
230 else:
231 self._default_action = self._kill_action
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800232 self._override_default_action = override_default_action
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800233 self._frequency_mapping = collections.defaultdict(int)
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800234 self._arch = arch
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000235 self._denylist = denylist
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800236
237 @property
238 def _parser_state(self):
239 return self._parser_states[-1]
240
241 # single-constant = identifier
242 # | numeric-constant
243 # ;
244 def _parse_single_constant(self, token):
245 if token.type == 'IDENTIFIER':
246 if token.value not in self._arch.constants:
247 self._parser_state.error('invalid constant', token=token)
248 single_constant = self._arch.constants[token.value]
249 elif token.type == 'NUMERIC_CONSTANT':
250 try:
251 single_constant = int(token.value, base=0)
252 except ValueError:
253 self._parser_state.error('invalid constant', token=token)
254 else:
255 self._parser_state.error('invalid constant', token=token)
256 if single_constant > self._arch.max_unsigned:
257 self._parser_state.error('unsigned overflow', token=token)
258 elif single_constant < self._arch.min_signed:
259 self._parser_state.error('signed underflow', token=token)
260 elif single_constant < 0:
261 # This converts the constant to an unsigned representation of the
262 # same value, since BPF only uses unsigned values.
263 single_constant = self._arch.truncate_word(single_constant)
264 return single_constant
265
266 # constant = [ '~' ] , '(' , value , ')'
267 # | [ '~' ] , single-constant
268 # ;
269 def _parse_constant(self, tokens):
270 negate = False
271 if tokens[0].type == 'BITWISE_COMPLEMENT':
272 negate = True
273 tokens.pop(0)
274 if not tokens:
275 self._parser_state.error('empty complement')
276 if tokens[0].type == 'BITWISE_COMPLEMENT':
277 self._parser_state.error(
278 'invalid double complement', token=tokens[0])
279 if tokens[0].type == 'LPAREN':
280 last_open_paren = tokens.pop(0)
281 single_value = self.parse_value(tokens)
282 if not tokens or tokens[0].type != 'RPAREN':
283 self._parser_state.error(
284 'unclosed parenthesis', token=last_open_paren)
285 else:
286 single_value = self._parse_single_constant(tokens[0])
287 tokens.pop(0)
288 if negate:
289 single_value = self._arch.truncate_word(~single_value)
290 return single_value
291
292 # value = constant , [ { '|' , constant } ]
293 # ;
294 def parse_value(self, tokens):
295 """Parse constants separated bitwise OR operator |.
296
297 Constants can be:
298
299 - A number that can be parsed with int(..., base=0)
300 - A named constant expression.
301 - A parenthesized, valid constant expression.
302 - A valid constant expression prefixed with the unary bitwise
303 complement operator ~.
304 - A series of valid constant expressions separated by bitwise
305 OR operator |.
306
307 If there is an error parsing any of the constants, the whole process
308 fails.
309 """
310
311 value = 0
312 while tokens:
313 value |= self._parse_constant(tokens)
314 if not tokens or tokens[0].type != 'BITWISE_OR':
315 break
316 tokens.pop(0)
317 else:
318 self._parser_state.error('empty constant')
319 return value
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800320
321 # atom = argument , op , value
322 # ;
323 def _parse_atom(self, tokens):
324 if not tokens:
325 self._parser_state.error('missing argument')
326 argument = tokens.pop(0)
327 if argument.type != 'ARGUMENT':
328 self._parser_state.error('invalid argument', token=argument)
329
330 if not tokens:
331 self._parser_state.error('missing operator')
332 operator = tokens.pop(0)
333 if operator.type != 'OP':
334 self._parser_state.error('invalid operator', token=operator)
335
336 value = self.parse_value(tokens)
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800337 argument_index = int(argument.value[3:])
338 if not (0 <= argument_index < bpf.MAX_SYSCALL_ARGUMENTS):
339 self._parser_state.error('invalid argument', token=argument)
340 return Atom(argument_index, operator.value, value)
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800341
342 # clause = atom , [ { '&&' , atom } ]
343 # ;
344 def _parse_clause(self, tokens):
345 atoms = []
346 while tokens:
347 atoms.append(self._parse_atom(tokens))
348 if not tokens or tokens[0].type != 'AND':
349 break
350 tokens.pop(0)
351 else:
352 self._parser_state.error('empty clause')
353 return atoms
354
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800355 # argument-expression = clause , [ { '||' , clause } ]
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800356 # ;
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800357 def parse_argument_expression(self, tokens):
358 """Parse a argument expression in Disjunctive Normal Form.
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800359
360 Since BPF disallows back jumps, we build the basic blocks in reverse
361 order so that all the jump targets are known by the time we need to
362 reference them.
363 """
364
365 clauses = []
366 while tokens:
367 clauses.append(self._parse_clause(tokens))
368 if not tokens or tokens[0].type != 'OR':
369 break
370 tokens.pop(0)
371 else:
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800372 self._parser_state.error('empty argument expression')
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800373 return clauses
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800374
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800375 # default-action = 'kill-process'
376 # | 'kill-thread'
377 # | 'kill'
378 # | 'trap'
Luis Héctor Chávez59a64492021-01-03 05:46:47 -0800379 # | 'user-notify'
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800380 # ;
381 def _parse_default_action(self, tokens):
382 if not tokens:
383 self._parser_state.error('missing default action')
384 action_token = tokens.pop(0)
385 if action_token.type != 'ACTION':
386 return self._parser_state.error(
387 'invalid default action', token=action_token)
388 if action_token.value == 'kill-process':
389 return bpf.KillProcess()
390 if action_token.value == 'kill-thread':
391 return bpf.KillThread()
392 if action_token.value == 'kill':
393 return self._kill_action
394 if action_token.value == 'trap':
395 return bpf.Trap()
Luis Héctor Chávez59a64492021-01-03 05:46:47 -0800396 if action_token.value == 'user-notify':
397 return bpf.UserNotify()
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800398 return self._parser_state.error(
399 'invalid permissive default action', token=action_token)
400
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800401 # action = 'allow' | '1'
402 # | 'kill-process'
403 # | 'kill-thread'
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000404 # | 'kill' | '0'
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800405 # | 'trap'
406 # | 'trace'
407 # | 'log'
Luis Héctor Chávez59a64492021-01-03 05:46:47 -0800408 # | 'user-notify'
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800409 # | 'return' , single-constant
410 # ;
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800411 def parse_action(self, tokens):
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800412 if not tokens:
413 self._parser_state.error('missing action')
414 action_token = tokens.pop(0)
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000415 # The only valid denylist action is 0
416 if self._denylist:
417 if action_token.type != 'NUMERIC_CONSTANT':
418 self._parser_state.error('invalid denylist policy')
419
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800420 if action_token.type == 'ACTION':
421 if action_token.value == 'allow':
422 return bpf.Allow()
423 if action_token.value == 'kill':
424 return self._kill_action
425 if action_token.value == 'kill-process':
426 return bpf.KillProcess()
427 if action_token.value == 'kill-thread':
428 return bpf.KillThread()
429 if action_token.value == 'trap':
430 return bpf.Trap()
431 if action_token.value == 'trace':
432 return bpf.Trace()
Luis Héctor Chávez59a64492021-01-03 05:46:47 -0800433 if action_token.value == 'user-notify':
434 return bpf.UserNotify()
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800435 if action_token.value == 'log':
436 return bpf.Log()
437 elif action_token.type == 'NUMERIC_CONSTANT':
438 constant = self._parse_single_constant(action_token)
439 if constant == 1:
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000440 if self._denylist:
441 self._parser_state.error('invalid denylist policy')
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800442 return bpf.Allow()
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000443 if constant == 0:
444 if not self._denylist:
445 self._parser_state.error('invalid allowlist policy')
446 return self._kill_action
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800447 elif action_token.type == 'RETURN':
448 if not tokens:
449 self._parser_state.error('missing return value')
450 return bpf.ReturnErrno(self._parse_single_constant(tokens.pop(0)))
451 return self._parser_state.error('invalid action', token=action_token)
452
453 # single-filter = action
454 # | argument-expression , [ ';' , action ]
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000455 # | '!','(', argument-expression, [ ';', action ], ')'
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800456 # ;
457 def _parse_single_filter(self, tokens):
458 if not tokens:
459 self._parser_state.error('missing filter')
460 if tokens[0].type == 'ARGUMENT':
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000461 if self._denylist:
462 self._parser_state.error('invalid denylist policy')
463 action = bpf.Allow()
464 elif tokens[0].type == 'NOT':
465 if not self._denylist:
466 self._parser_state.error('invalid allowlist policy')
467 tokens.pop(0)
468 action = self._kill_action
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800469 else:
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800470 return Filter(None, self.parse_action(tokens))
Nicole Anderson-Au4820a382021-06-18 20:16:14 +0000471 # Parse an argument expression, either allowlist or denylist.
472 # Only argument expressions can start with an ARGUMENT token.
473 argument_expression = self.parse_argument_expression(tokens)
474 if tokens and tokens[0].type == 'SEMICOLON':
475 tokens.pop(0)
476 action = self.parse_action(tokens)
477 return Filter(argument_expression, action)
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800478
479 # filter = '{' , single-filter , [ { ',' , single-filter } ] , '}'
480 # | single-filter
481 # ;
482 def parse_filter(self, tokens):
483 """Parse a filter and return a list of Filter objects."""
484 if not tokens:
485 self._parser_state.error('missing filter')
486 filters = []
487 if tokens[0].type == 'LBRACE':
488 opening_brace = tokens.pop(0)
489 while tokens:
490 filters.append(self._parse_single_filter(tokens))
491 if not tokens or tokens[0].type != 'COMMA':
492 break
493 tokens.pop(0)
494 if not tokens or tokens[0].type != 'RBRACE':
495 self._parser_state.error('unclosed brace', token=opening_brace)
496 tokens.pop(0)
497 else:
498 filters.append(self._parse_single_filter(tokens))
499 return filters
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800500
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800501 # key-value-pair = identifier , '=', identifier , [ { ',' , identifier } ]
502 # ;
503 def _parse_key_value_pair(self, tokens):
504 if not tokens:
505 self._parser_state.error('missing key')
506 key = tokens.pop(0)
507 if key.type != 'IDENTIFIER':
508 self._parser_state.error('invalid key', token=key)
509 if not tokens:
510 self._parser_state.error('missing equal')
511 if tokens[0].type != 'EQUAL':
512 self._parser_state.error('invalid equal', token=tokens[0])
513 tokens.pop(0)
514 value_list = []
515 while tokens:
516 value = tokens.pop(0)
517 if value.type != 'IDENTIFIER':
518 self._parser_state.error('invalid value', token=value)
519 value_list.append(value.value)
520 if not tokens or tokens[0].type != 'COMMA':
521 break
522 tokens.pop(0)
523 else:
524 self._parser_state.error('empty value')
525 return (key.value, value_list)
526
527 # metadata = '[' , key-value-pair , [ { ';' , key-value-pair } ] , ']'
528 # ;
529 def _parse_metadata(self, tokens):
530 if not tokens:
531 self._parser_state.error('missing opening bracket')
532 opening_bracket = tokens.pop(0)
533 if opening_bracket.type != 'LBRACKET':
534 self._parser_state.error(
535 'invalid opening bracket', token=opening_bracket)
536 metadata = {}
537 while tokens:
538 first_token = tokens[0]
539 key, value = self._parse_key_value_pair(tokens)
540 if key in metadata:
541 self._parser_state.error(
542 'duplicate metadata key: "%s"' % key, token=first_token)
543 metadata[key] = value
544 if not tokens or tokens[0].type != 'SEMICOLON':
545 break
546 tokens.pop(0)
547 if not tokens or tokens[0].type != 'RBRACKET':
548 self._parser_state.error('unclosed bracket', token=opening_bracket)
549 tokens.pop(0)
550 return metadata
551
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800552 # syscall-descriptor = syscall-name , [ metadata ]
Luis Hector Chavez524da3b2019-03-05 16:44:08 -0800553 # | syscall-group-name , [ metadata ]
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800554 # ;
555 def _parse_syscall_descriptor(self, tokens):
556 if not tokens:
557 self._parser_state.error('missing syscall descriptor')
558 syscall_descriptor = tokens.pop(0)
Matt Delcof68fc8d2019-11-14 16:47:52 -0800559 # `kill` as a syscall name is a special case since kill is also a valid
560 # action and actions have precendence over identifiers.
561 if (syscall_descriptor.type != 'IDENTIFIER' and
562 syscall_descriptor.value != 'kill'):
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800563 self._parser_state.error(
564 'invalid syscall descriptor', token=syscall_descriptor)
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800565 if tokens and tokens[0].type == 'LBRACKET':
566 metadata = self._parse_metadata(tokens)
567 if 'arch' in metadata and self._arch.arch_name not in metadata['arch']:
568 return ()
Luis Hector Chavez524da3b2019-03-05 16:44:08 -0800569 if '@' in syscall_descriptor.value:
570 # This is a syscall group.
571 subtokens = syscall_descriptor.value.split('@')
572 if len(subtokens) != 2:
573 self._parser_state.error(
574 'invalid syscall group name', token=syscall_descriptor)
575 syscall_group_name, syscall_namespace_name = subtokens
576 if syscall_namespace_name not in self._arch.syscall_groups:
577 self._parser_state.error(
578 'nonexistent syscall group namespace',
579 token=syscall_descriptor)
580 syscall_namespace = self._arch.syscall_groups[
581 syscall_namespace_name]
582 if syscall_group_name not in syscall_namespace:
583 self._parser_state.error(
584 'nonexistent syscall group', token=syscall_descriptor)
585 return (Syscall(name, self._arch.syscalls[name])
586 for name in syscall_namespace[syscall_group_name])
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800587 if syscall_descriptor.value not in self._arch.syscalls:
588 self._parser_state.error(
589 'nonexistent syscall', token=syscall_descriptor)
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800590 return (Syscall(syscall_descriptor.value,
591 self._arch.syscalls[syscall_descriptor.value]), )
592
593 # filter-statement = '{' , syscall-descriptor , [ { ',', syscall-descriptor } ] , '}' ,
594 # ':' , filter
595 # | syscall-descriptor , ':' , filter
596 # ;
597 def parse_filter_statement(self, tokens):
598 """Parse a filter statement and return a ParsedFilterStatement."""
599 if not tokens:
600 self._parser_state.error('empty filter statement')
601 syscall_descriptors = []
602 if tokens[0].type == 'LBRACE':
603 opening_brace = tokens.pop(0)
604 while tokens:
605 syscall_descriptors.extend(
606 self._parse_syscall_descriptor(tokens))
607 if not tokens or tokens[0].type != 'COMMA':
608 break
609 tokens.pop(0)
610 if not tokens or tokens[0].type != 'RBRACE':
611 self._parser_state.error('unclosed brace', token=opening_brace)
612 tokens.pop(0)
613 else:
614 syscall_descriptors.extend(self._parse_syscall_descriptor(tokens))
615 if not tokens:
616 self._parser_state.error('missing colon')
617 if tokens[0].type != 'COLON':
618 self._parser_state.error('invalid colon', token=tokens[0])
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800619 # Given that there can be multiple syscalls and filters in a single
620 # filter statement, use the colon token as the anchor for error location
621 # purposes.
622 colon_token = tokens.pop(0)
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800623 parsed_filter = self.parse_filter(tokens)
624 if not syscall_descriptors:
625 return None
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800626 return ParsedFilterStatement(
627 tuple(syscall_descriptors), parsed_filter, colon_token)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800628
629 # include-statement = '@include' , posix-path
630 # ;
631 def _parse_include_statement(self, tokens):
632 if not tokens:
633 self._parser_state.error('empty filter statement')
634 if tokens[0].type != 'INCLUDE':
635 self._parser_state.error('invalid include', token=tokens[0])
636 tokens.pop(0)
637 if not tokens:
638 self._parser_state.error('empty include path')
639 include_path = tokens.pop(0)
640 if include_path.type != 'PATH':
641 self._parser_state.error(
642 'invalid include path', token=include_path)
643 if len(self._parser_states) == self._include_depth_limit:
644 self._parser_state.error('@include statement nested too deep')
645 include_filename = os.path.normpath(
646 os.path.join(
647 os.path.dirname(self._parser_state.filename),
648 include_path.value))
649 if not os.path.isfile(include_filename):
650 self._parser_state.error(
651 'Could not @include %s' % include_filename, token=include_path)
652 return self._parse_policy_file(include_filename)
653
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800654 def _parse_frequency_file(self, filename):
655 self._parser_states.append(ParserState(filename))
656 try:
657 frequency_mapping = collections.defaultdict(int)
658 with open(filename) as frequency_file:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700659 for tokens in self._parser_state.tokenize(frequency_file):
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800660 syscall_numbers = self._parse_syscall_descriptor(tokens)
661 if not tokens:
662 self._parser_state.error('missing colon')
663 if tokens[0].type != 'COLON':
664 self._parser_state.error(
665 'invalid colon', token=tokens[0])
666 tokens.pop(0)
667
668 if not tokens:
669 self._parser_state.error('missing number')
670 number = tokens.pop(0)
671 if number.type != 'NUMERIC_CONSTANT':
672 self._parser_state.error(
673 'invalid number', token=number)
674 number_value = int(number.value, base=0)
675 if number_value < 0:
676 self._parser_state.error(
677 'invalid number', token=number)
678
679 for syscall_number in syscall_numbers:
680 frequency_mapping[syscall_number] += number_value
681 return frequency_mapping
682 finally:
683 self._parser_states.pop()
684
685 # frequency-statement = '@frequency' , posix-path
686 # ;
687 def _parse_frequency_statement(self, tokens):
688 if not tokens:
689 self._parser_state.error('empty frequency statement')
690 if tokens[0].type != 'FREQUENCY':
691 self._parser_state.error('invalid frequency', token=tokens[0])
692 tokens.pop(0)
693 if not tokens:
694 self._parser_state.error('empty frequency path')
695 frequency_path = tokens.pop(0)
696 if frequency_path.type != 'PATH':
697 self._parser_state.error(
698 'invalid frequency path', token=frequency_path)
699 frequency_filename = os.path.normpath(
700 os.path.join(
701 os.path.dirname(self._parser_state.filename),
702 frequency_path.value))
703 if not os.path.isfile(frequency_filename):
704 self._parser_state.error(
705 'Could not open frequency file %s' % frequency_filename,
706 token=frequency_path)
707 return self._parse_frequency_file(frequency_filename)
708
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800709 # default-statement = '@default' , default-action
710 # ;
711 def _parse_default_statement(self, tokens):
712 if not tokens:
713 self._parser_state.error('empty default statement')
714 if tokens[0].type != 'DEFAULT':
715 self._parser_state.error('invalid default', token=tokens[0])
716 tokens.pop(0)
717 if not tokens:
718 self._parser_state.error('empty action')
719 return self._parse_default_action(tokens)
720
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800721 def _parse_policy_file(self, filename):
722 self._parser_states.append(ParserState(filename))
723 try:
724 statements = []
725 with open(filename) as policy_file:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700726 for tokens in self._parser_state.tokenize(policy_file):
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800727 if tokens[0].type == 'INCLUDE':
728 statements.extend(
729 self._parse_include_statement(tokens))
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800730 elif tokens[0].type == 'FREQUENCY':
731 for syscall_number, frequency in self._parse_frequency_statement(
732 tokens).items():
733 self._frequency_mapping[
734 syscall_number] += frequency
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800735 elif tokens[0].type == 'DEFAULT':
736 self._default_action = self._parse_default_statement(
737 tokens)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800738 else:
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800739 statement = self.parse_filter_statement(tokens)
740 if statement is None:
741 # If all the syscalls in the statement are for
742 # another arch, skip the whole statement.
743 continue
744 statements.append(statement)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800745
746 if tokens:
747 self._parser_state.error(
748 'extra tokens', token=tokens[0])
749 return statements
750 finally:
751 self._parser_states.pop()
752
753 def parse_file(self, filename):
754 """Parse a file and return the list of FilterStatements."""
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800755 self._frequency_mapping = collections.defaultdict(int)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800756 try:
757 statements = [x for x in self._parse_policy_file(filename)]
758 except RecursionError:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700759 raise ParseException(
760 'recursion limit exceeded',
761 filename,
762 line=self._parser_states[-1].line)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800763
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800764 # Collapse statements into a single syscall-to-filter-list, remembering
765 # the token for each filter for better diagnostics.
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800766 syscall_filter_mapping = {}
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800767 syscall_filter_definitions = {}
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800768 filter_statements = []
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800769 for syscalls, filters, token in statements:
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800770 for syscall in syscalls:
771 if syscall not in syscall_filter_mapping:
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800772 filter_statements.append(
773 FilterStatement(
774 syscall, self._frequency_mapping.get(syscall, 1),
775 []))
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800776 syscall_filter_mapping[syscall] = filter_statements[-1]
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800777 syscall_filter_definitions[syscall] = []
778 for filt in filters:
779 syscall_filter_mapping[syscall].filters.append(filt)
780 syscall_filter_definitions[syscall].append(token)
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800781 default_action = self._override_default_action or self._default_action
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800782 for filter_statement in filter_statements:
783 unconditional_actions_suffix = list(
784 itertools.dropwhile(lambda filt: filt.expression is not None,
785 filter_statement.filters))
786 if len(unconditional_actions_suffix) == 1:
787 # The last filter already has an unconditional action, no need
788 # to add another one.
789 continue
790 if len(unconditional_actions_suffix) > 1:
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800791 previous_definition_token = syscall_filter_definitions[
792 filter_statement.syscall][
793 -len(unconditional_actions_suffix)]
794 current_definition_token = syscall_filter_definitions[
795 filter_statement.syscall][
796 -len(unconditional_actions_suffix) + 1]
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700797 raise ParseException(
798 ('Syscall %s (number %d) already had '
799 'an unconditional action applied') %
800 (filter_statement.syscall.name,
801 filter_statement.syscall.number),
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800802 filename=current_definition_token.filename,
803 token=current_definition_token) from ParseException(
804 'Previous definition',
805 filename=previous_definition_token.filename,
806 token=previous_definition_token)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800807 assert not unconditional_actions_suffix
808 filter_statement.filters.append(
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800809 Filter(expression=None, action=default_action))
810 return ParsedPolicy(default_action, filter_statements)