blob: d58dbd6ecb8045add9796b457b326b405de03f8d [file] [log] [blame]
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -08001#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# Copyright (C) 2018 The Android Open Source Project
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17"""A parser for the Minijail policy file."""
18
19from __future__ import absolute_import
20from __future__ import division
21from __future__ import print_function
22
23import collections
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -080024import itertools
25import os.path
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080026import re
27
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -080028import bpf
29
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070030Token = collections.namedtuple(
Luis Hector Chavez4228eff2019-12-11 19:07:13 -080031 'Token', ['type', 'value', 'filename', 'line', 'line_number', 'column'])
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080032
33# A regex that can tokenize a Minijail policy file line.
34_TOKEN_SPECIFICATION = (
35 ('COMMENT', r'#.*$'),
36 ('WHITESPACE', r'\s+'),
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070037 ('CONTINUATION', r'\\$'),
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080038 ('DEFAULT', r'@default\b'),
39 ('INCLUDE', r'@include\b'),
40 ('FREQUENCY', r'@frequency\b'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080041 ('PATH', r'(?:\.)?/\S+'),
42 ('NUMERIC_CONSTANT', r'-?0[xX][0-9a-fA-F]+|-?0[Oo][0-7]+|-?[0-9]+'),
43 ('COLON', r':'),
44 ('SEMICOLON', r';'),
45 ('COMMA', r','),
46 ('BITWISE_COMPLEMENT', r'~'),
47 ('LPAREN', r'\('),
48 ('RPAREN', r'\)'),
49 ('LBRACE', r'\{'),
50 ('RBRACE', r'\}'),
51 ('RBRACKET', r'\]'),
52 ('LBRACKET', r'\['),
53 ('OR', r'\|\|'),
54 ('AND', r'&&'),
55 ('BITWISE_OR', r'\|'),
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080056 ('OP', r'&|\bin\b|==|!=|<=|<|>=|>'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080057 ('EQUAL', r'='),
Luis Hector Chavez080ceef2019-11-15 02:56:45 -080058 ('ARGUMENT', r'\barg[0-9]+\b'),
59 ('RETURN', r'\breturn\b'),
60 ('ACTION',
61 r'\ballow\b|\bkill-process\b|\bkill-thread\b|\bkill\b|\btrap\b|'
62 r'\btrace\b|\blog\b'
63 ),
Luis Hector Chavez524da3b2019-03-05 16:44:08 -080064 ('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9-@]*'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080065)
66_TOKEN_RE = re.compile('|'.join(
67 r'(?P<%s>%s)' % pair for pair in _TOKEN_SPECIFICATION))
68
69
70class ParseException(Exception):
71 """An exception that is raised when parsing fails."""
72
73 # pylint: disable=too-many-arguments
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070074 def __init__(self,
75 message,
76 filename,
77 *,
78 line='',
79 line_number=1,
80 token=None):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080081 if token:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -070082 line = token.line
83 line_number = token.line_number
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080084 column = token.column
85 length = len(token.value)
86 else:
87 column = len(line)
88 length = 1
89
90 message = ('%s(%d:%d): %s') % (filename, line_number, column + 1,
91 message)
92 message += '\n %s' % line
93 message += '\n %s%s' % (' ' * column, '^' * length)
94 super().__init__(message)
95
96
97class ParserState:
98 """Stores the state of the Parser to provide better diagnostics."""
99
100 def __init__(self, filename):
101 self._filename = filename
102 self._line = ''
103 self._line_number = 0
104
105 @property
106 def filename(self):
107 """Return the name of the file being processed."""
108 return self._filename
109
110 @property
111 def line(self):
112 """Return the current line being processed."""
113 return self._line
114
115 @property
116 def line_number(self):
117 """Return the current line number being processed."""
118 return self._line_number
119
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800120 def error(self, message, token=None):
121 """Raise a ParserException with the provided message."""
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700122 raise ParseException(
123 message,
124 self.filename,
125 line=self._line,
126 line_number=self._line_number,
127 token=token)
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800128
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700129 def tokenize(self, lines):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800130 """Return a list of tokens for the current line."""
131 tokens = []
132
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700133 for line_number, line in enumerate(lines):
134 self._line_number = line_number + 1
135 self._line = line.rstrip('\r\n')
136
137 last_end = 0
138 for token in _TOKEN_RE.finditer(self._line):
139 if token.start() != last_end:
140 self.error(
141 'invalid token',
142 token=Token('INVALID',
143 self._line[last_end:token.start()],
144 self.filename, self._line,
145 self._line_number, last_end))
146 last_end = token.end()
147
148 # Omit whitespace and comments now to avoid sprinkling this logic
149 # elsewhere.
150 if token.lastgroup in ('WHITESPACE', 'COMMENT',
151 'CONTINUATION'):
152 continue
153 tokens.append(
154 Token(token.lastgroup, token.group(), self.filename,
155 self._line, self._line_number, token.start()))
156 if last_end != len(self._line):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800157 self.error(
158 'invalid token',
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700159 token=Token('INVALID', self._line[last_end:],
160 self.filename, self._line, self._line_number,
161 last_end))
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800162
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700163 if self._line.endswith('\\'):
164 # This line is not finished yet.
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800165 continue
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700166
167 if tokens:
168 # Return a copy of the token list so that the caller can be free
169 # to modify it.
170 yield tokens[::]
171 tokens.clear()
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800172
173
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800174Atom = collections.namedtuple('Atom', ['argument_index', 'op', 'value'])
175"""A single boolean comparison within a filter expression."""
176
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800177Filter = collections.namedtuple('Filter', ['expression', 'action'])
178"""The result of parsing a DNF filter expression, with its action.
179
180Since the expression is in Disjunctive Normal Form, it is composed of two levels
181of lists, one for disjunctions and the inner one for conjunctions. The elements
182of the inner list are Atoms.
183"""
184
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800185Syscall = collections.namedtuple('Syscall', ['name', 'number'])
186"""A system call."""
187
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800188ParsedFilterStatement = collections.namedtuple(
189 'ParsedFilterStatement', ['syscalls', 'filters', 'token'])
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800190"""The result of parsing a filter statement.
191
192Statements have a list of syscalls, and an associated list of filters that will
193be evaluated sequentially when any of the syscalls is invoked.
194"""
195
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800196FilterStatement = collections.namedtuple('FilterStatement',
197 ['syscall', 'frequency', 'filters'])
198"""The filter list for a particular syscall.
199
200This is a mapping from one syscall to a list of filters that are evaluated
201sequentially. The last filter is always an unconditional action.
202"""
203
204ParsedPolicy = collections.namedtuple('ParsedPolicy',
205 ['default_action', 'filter_statements'])
206"""The result of parsing a minijail .policy file."""
207
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800208
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800209# pylint: disable=too-few-public-methods
210class PolicyParser:
211 """A parser for the Minijail seccomp policy file format."""
212
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800213 def __init__(self,
214 arch,
215 *,
216 kill_action,
217 include_depth_limit=10,
218 override_default_action=None):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800219 self._parser_states = [ParserState("<memory>")]
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800220 self._kill_action = kill_action
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800221 self._include_depth_limit = include_depth_limit
222 self._default_action = self._kill_action
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800223 self._override_default_action = override_default_action
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800224 self._frequency_mapping = collections.defaultdict(int)
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800225 self._arch = arch
226
227 @property
228 def _parser_state(self):
229 return self._parser_states[-1]
230
231 # single-constant = identifier
232 # | numeric-constant
233 # ;
234 def _parse_single_constant(self, token):
235 if token.type == 'IDENTIFIER':
236 if token.value not in self._arch.constants:
237 self._parser_state.error('invalid constant', token=token)
238 single_constant = self._arch.constants[token.value]
239 elif token.type == 'NUMERIC_CONSTANT':
240 try:
241 single_constant = int(token.value, base=0)
242 except ValueError:
243 self._parser_state.error('invalid constant', token=token)
244 else:
245 self._parser_state.error('invalid constant', token=token)
246 if single_constant > self._arch.max_unsigned:
247 self._parser_state.error('unsigned overflow', token=token)
248 elif single_constant < self._arch.min_signed:
249 self._parser_state.error('signed underflow', token=token)
250 elif single_constant < 0:
251 # This converts the constant to an unsigned representation of the
252 # same value, since BPF only uses unsigned values.
253 single_constant = self._arch.truncate_word(single_constant)
254 return single_constant
255
256 # constant = [ '~' ] , '(' , value , ')'
257 # | [ '~' ] , single-constant
258 # ;
259 def _parse_constant(self, tokens):
260 negate = False
261 if tokens[0].type == 'BITWISE_COMPLEMENT':
262 negate = True
263 tokens.pop(0)
264 if not tokens:
265 self._parser_state.error('empty complement')
266 if tokens[0].type == 'BITWISE_COMPLEMENT':
267 self._parser_state.error(
268 'invalid double complement', token=tokens[0])
269 if tokens[0].type == 'LPAREN':
270 last_open_paren = tokens.pop(0)
271 single_value = self.parse_value(tokens)
272 if not tokens or tokens[0].type != 'RPAREN':
273 self._parser_state.error(
274 'unclosed parenthesis', token=last_open_paren)
275 else:
276 single_value = self._parse_single_constant(tokens[0])
277 tokens.pop(0)
278 if negate:
279 single_value = self._arch.truncate_word(~single_value)
280 return single_value
281
282 # value = constant , [ { '|' , constant } ]
283 # ;
284 def parse_value(self, tokens):
285 """Parse constants separated bitwise OR operator |.
286
287 Constants can be:
288
289 - A number that can be parsed with int(..., base=0)
290 - A named constant expression.
291 - A parenthesized, valid constant expression.
292 - A valid constant expression prefixed with the unary bitwise
293 complement operator ~.
294 - A series of valid constant expressions separated by bitwise
295 OR operator |.
296
297 If there is an error parsing any of the constants, the whole process
298 fails.
299 """
300
301 value = 0
302 while tokens:
303 value |= self._parse_constant(tokens)
304 if not tokens or tokens[0].type != 'BITWISE_OR':
305 break
306 tokens.pop(0)
307 else:
308 self._parser_state.error('empty constant')
309 return value
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800310
311 # atom = argument , op , value
312 # ;
313 def _parse_atom(self, tokens):
314 if not tokens:
315 self._parser_state.error('missing argument')
316 argument = tokens.pop(0)
317 if argument.type != 'ARGUMENT':
318 self._parser_state.error('invalid argument', token=argument)
319
320 if not tokens:
321 self._parser_state.error('missing operator')
322 operator = tokens.pop(0)
323 if operator.type != 'OP':
324 self._parser_state.error('invalid operator', token=operator)
325
326 value = self.parse_value(tokens)
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800327 argument_index = int(argument.value[3:])
328 if not (0 <= argument_index < bpf.MAX_SYSCALL_ARGUMENTS):
329 self._parser_state.error('invalid argument', token=argument)
330 return Atom(argument_index, operator.value, value)
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800331
332 # clause = atom , [ { '&&' , atom } ]
333 # ;
334 def _parse_clause(self, tokens):
335 atoms = []
336 while tokens:
337 atoms.append(self._parse_atom(tokens))
338 if not tokens or tokens[0].type != 'AND':
339 break
340 tokens.pop(0)
341 else:
342 self._parser_state.error('empty clause')
343 return atoms
344
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800345 # argument-expression = clause , [ { '||' , clause } ]
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800346 # ;
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800347 def parse_argument_expression(self, tokens):
348 """Parse a argument expression in Disjunctive Normal Form.
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800349
350 Since BPF disallows back jumps, we build the basic blocks in reverse
351 order so that all the jump targets are known by the time we need to
352 reference them.
353 """
354
355 clauses = []
356 while tokens:
357 clauses.append(self._parse_clause(tokens))
358 if not tokens or tokens[0].type != 'OR':
359 break
360 tokens.pop(0)
361 else:
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800362 self._parser_state.error('empty argument expression')
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800363 return clauses
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800364
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800365 # default-action = 'kill-process'
366 # | 'kill-thread'
367 # | 'kill'
368 # | 'trap'
369 # ;
370 def _parse_default_action(self, tokens):
371 if not tokens:
372 self._parser_state.error('missing default action')
373 action_token = tokens.pop(0)
374 if action_token.type != 'ACTION':
375 return self._parser_state.error(
376 'invalid default action', token=action_token)
377 if action_token.value == 'kill-process':
378 return bpf.KillProcess()
379 if action_token.value == 'kill-thread':
380 return bpf.KillThread()
381 if action_token.value == 'kill':
382 return self._kill_action
383 if action_token.value == 'trap':
384 return bpf.Trap()
385 return self._parser_state.error(
386 'invalid permissive default action', token=action_token)
387
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800388 # action = 'allow' | '1'
389 # | 'kill-process'
390 # | 'kill-thread'
391 # | 'kill'
392 # | 'trap'
393 # | 'trace'
394 # | 'log'
395 # | 'return' , single-constant
396 # ;
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800397 def parse_action(self, tokens):
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800398 if not tokens:
399 self._parser_state.error('missing action')
400 action_token = tokens.pop(0)
401 if action_token.type == 'ACTION':
402 if action_token.value == 'allow':
403 return bpf.Allow()
404 if action_token.value == 'kill':
405 return self._kill_action
406 if action_token.value == 'kill-process':
407 return bpf.KillProcess()
408 if action_token.value == 'kill-thread':
409 return bpf.KillThread()
410 if action_token.value == 'trap':
411 return bpf.Trap()
412 if action_token.value == 'trace':
413 return bpf.Trace()
414 if action_token.value == 'log':
415 return bpf.Log()
416 elif action_token.type == 'NUMERIC_CONSTANT':
417 constant = self._parse_single_constant(action_token)
418 if constant == 1:
419 return bpf.Allow()
420 elif action_token.type == 'RETURN':
421 if not tokens:
422 self._parser_state.error('missing return value')
423 return bpf.ReturnErrno(self._parse_single_constant(tokens.pop(0)))
424 return self._parser_state.error('invalid action', token=action_token)
425
426 # single-filter = action
427 # | argument-expression , [ ';' , action ]
428 # ;
429 def _parse_single_filter(self, tokens):
430 if not tokens:
431 self._parser_state.error('missing filter')
432 if tokens[0].type == 'ARGUMENT':
433 # Only argument expressions can start with an ARGUMENT token.
434 argument_expression = self.parse_argument_expression(tokens)
435 if tokens and tokens[0].type == 'SEMICOLON':
436 tokens.pop(0)
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800437 action = self.parse_action(tokens)
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800438 else:
439 action = bpf.Allow()
440 return Filter(argument_expression, action)
441 else:
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800442 return Filter(None, self.parse_action(tokens))
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800443
444 # filter = '{' , single-filter , [ { ',' , single-filter } ] , '}'
445 # | single-filter
446 # ;
447 def parse_filter(self, tokens):
448 """Parse a filter and return a list of Filter objects."""
449 if not tokens:
450 self._parser_state.error('missing filter')
451 filters = []
452 if tokens[0].type == 'LBRACE':
453 opening_brace = tokens.pop(0)
454 while tokens:
455 filters.append(self._parse_single_filter(tokens))
456 if not tokens or tokens[0].type != 'COMMA':
457 break
458 tokens.pop(0)
459 if not tokens or tokens[0].type != 'RBRACE':
460 self._parser_state.error('unclosed brace', token=opening_brace)
461 tokens.pop(0)
462 else:
463 filters.append(self._parse_single_filter(tokens))
464 return filters
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800465
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800466 # key-value-pair = identifier , '=', identifier , [ { ',' , identifier } ]
467 # ;
468 def _parse_key_value_pair(self, tokens):
469 if not tokens:
470 self._parser_state.error('missing key')
471 key = tokens.pop(0)
472 if key.type != 'IDENTIFIER':
473 self._parser_state.error('invalid key', token=key)
474 if not tokens:
475 self._parser_state.error('missing equal')
476 if tokens[0].type != 'EQUAL':
477 self._parser_state.error('invalid equal', token=tokens[0])
478 tokens.pop(0)
479 value_list = []
480 while tokens:
481 value = tokens.pop(0)
482 if value.type != 'IDENTIFIER':
483 self._parser_state.error('invalid value', token=value)
484 value_list.append(value.value)
485 if not tokens or tokens[0].type != 'COMMA':
486 break
487 tokens.pop(0)
488 else:
489 self._parser_state.error('empty value')
490 return (key.value, value_list)
491
492 # metadata = '[' , key-value-pair , [ { ';' , key-value-pair } ] , ']'
493 # ;
494 def _parse_metadata(self, tokens):
495 if not tokens:
496 self._parser_state.error('missing opening bracket')
497 opening_bracket = tokens.pop(0)
498 if opening_bracket.type != 'LBRACKET':
499 self._parser_state.error(
500 'invalid opening bracket', token=opening_bracket)
501 metadata = {}
502 while tokens:
503 first_token = tokens[0]
504 key, value = self._parse_key_value_pair(tokens)
505 if key in metadata:
506 self._parser_state.error(
507 'duplicate metadata key: "%s"' % key, token=first_token)
508 metadata[key] = value
509 if not tokens or tokens[0].type != 'SEMICOLON':
510 break
511 tokens.pop(0)
512 if not tokens or tokens[0].type != 'RBRACKET':
513 self._parser_state.error('unclosed bracket', token=opening_bracket)
514 tokens.pop(0)
515 return metadata
516
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800517 # syscall-descriptor = syscall-name , [ metadata ]
Luis Hector Chavez524da3b2019-03-05 16:44:08 -0800518 # | syscall-group-name , [ metadata ]
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800519 # ;
520 def _parse_syscall_descriptor(self, tokens):
521 if not tokens:
522 self._parser_state.error('missing syscall descriptor')
523 syscall_descriptor = tokens.pop(0)
Matt Delcof68fc8d2019-11-14 16:47:52 -0800524 # `kill` as a syscall name is a special case since kill is also a valid
525 # action and actions have precendence over identifiers.
526 if (syscall_descriptor.type != 'IDENTIFIER' and
527 syscall_descriptor.value != 'kill'):
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800528 self._parser_state.error(
529 'invalid syscall descriptor', token=syscall_descriptor)
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800530 if tokens and tokens[0].type == 'LBRACKET':
531 metadata = self._parse_metadata(tokens)
532 if 'arch' in metadata and self._arch.arch_name not in metadata['arch']:
533 return ()
Luis Hector Chavez524da3b2019-03-05 16:44:08 -0800534 if '@' in syscall_descriptor.value:
535 # This is a syscall group.
536 subtokens = syscall_descriptor.value.split('@')
537 if len(subtokens) != 2:
538 self._parser_state.error(
539 'invalid syscall group name', token=syscall_descriptor)
540 syscall_group_name, syscall_namespace_name = subtokens
541 if syscall_namespace_name not in self._arch.syscall_groups:
542 self._parser_state.error(
543 'nonexistent syscall group namespace',
544 token=syscall_descriptor)
545 syscall_namespace = self._arch.syscall_groups[
546 syscall_namespace_name]
547 if syscall_group_name not in syscall_namespace:
548 self._parser_state.error(
549 'nonexistent syscall group', token=syscall_descriptor)
550 return (Syscall(name, self._arch.syscalls[name])
551 for name in syscall_namespace[syscall_group_name])
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800552 if syscall_descriptor.value not in self._arch.syscalls:
553 self._parser_state.error(
554 'nonexistent syscall', token=syscall_descriptor)
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800555 return (Syscall(syscall_descriptor.value,
556 self._arch.syscalls[syscall_descriptor.value]), )
557
558 # filter-statement = '{' , syscall-descriptor , [ { ',', syscall-descriptor } ] , '}' ,
559 # ':' , filter
560 # | syscall-descriptor , ':' , filter
561 # ;
562 def parse_filter_statement(self, tokens):
563 """Parse a filter statement and return a ParsedFilterStatement."""
564 if not tokens:
565 self._parser_state.error('empty filter statement')
566 syscall_descriptors = []
567 if tokens[0].type == 'LBRACE':
568 opening_brace = tokens.pop(0)
569 while tokens:
570 syscall_descriptors.extend(
571 self._parse_syscall_descriptor(tokens))
572 if not tokens or tokens[0].type != 'COMMA':
573 break
574 tokens.pop(0)
575 if not tokens or tokens[0].type != 'RBRACE':
576 self._parser_state.error('unclosed brace', token=opening_brace)
577 tokens.pop(0)
578 else:
579 syscall_descriptors.extend(self._parse_syscall_descriptor(tokens))
580 if not tokens:
581 self._parser_state.error('missing colon')
582 if tokens[0].type != 'COLON':
583 self._parser_state.error('invalid colon', token=tokens[0])
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800584 # Given that there can be multiple syscalls and filters in a single
585 # filter statement, use the colon token as the anchor for error location
586 # purposes.
587 colon_token = tokens.pop(0)
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800588 parsed_filter = self.parse_filter(tokens)
589 if not syscall_descriptors:
590 return None
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800591 return ParsedFilterStatement(
592 tuple(syscall_descriptors), parsed_filter, colon_token)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800593
594 # include-statement = '@include' , posix-path
595 # ;
596 def _parse_include_statement(self, tokens):
597 if not tokens:
598 self._parser_state.error('empty filter statement')
599 if tokens[0].type != 'INCLUDE':
600 self._parser_state.error('invalid include', token=tokens[0])
601 tokens.pop(0)
602 if not tokens:
603 self._parser_state.error('empty include path')
604 include_path = tokens.pop(0)
605 if include_path.type != 'PATH':
606 self._parser_state.error(
607 'invalid include path', token=include_path)
608 if len(self._parser_states) == self._include_depth_limit:
609 self._parser_state.error('@include statement nested too deep')
610 include_filename = os.path.normpath(
611 os.path.join(
612 os.path.dirname(self._parser_state.filename),
613 include_path.value))
614 if not os.path.isfile(include_filename):
615 self._parser_state.error(
616 'Could not @include %s' % include_filename, token=include_path)
617 return self._parse_policy_file(include_filename)
618
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800619 def _parse_frequency_file(self, filename):
620 self._parser_states.append(ParserState(filename))
621 try:
622 frequency_mapping = collections.defaultdict(int)
623 with open(filename) as frequency_file:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700624 for tokens in self._parser_state.tokenize(frequency_file):
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800625 syscall_numbers = self._parse_syscall_descriptor(tokens)
626 if not tokens:
627 self._parser_state.error('missing colon')
628 if tokens[0].type != 'COLON':
629 self._parser_state.error(
630 'invalid colon', token=tokens[0])
631 tokens.pop(0)
632
633 if not tokens:
634 self._parser_state.error('missing number')
635 number = tokens.pop(0)
636 if number.type != 'NUMERIC_CONSTANT':
637 self._parser_state.error(
638 'invalid number', token=number)
639 number_value = int(number.value, base=0)
640 if number_value < 0:
641 self._parser_state.error(
642 'invalid number', token=number)
643
644 for syscall_number in syscall_numbers:
645 frequency_mapping[syscall_number] += number_value
646 return frequency_mapping
647 finally:
648 self._parser_states.pop()
649
650 # frequency-statement = '@frequency' , posix-path
651 # ;
652 def _parse_frequency_statement(self, tokens):
653 if not tokens:
654 self._parser_state.error('empty frequency statement')
655 if tokens[0].type != 'FREQUENCY':
656 self._parser_state.error('invalid frequency', token=tokens[0])
657 tokens.pop(0)
658 if not tokens:
659 self._parser_state.error('empty frequency path')
660 frequency_path = tokens.pop(0)
661 if frequency_path.type != 'PATH':
662 self._parser_state.error(
663 'invalid frequency path', token=frequency_path)
664 frequency_filename = os.path.normpath(
665 os.path.join(
666 os.path.dirname(self._parser_state.filename),
667 frequency_path.value))
668 if not os.path.isfile(frequency_filename):
669 self._parser_state.error(
670 'Could not open frequency file %s' % frequency_filename,
671 token=frequency_path)
672 return self._parse_frequency_file(frequency_filename)
673
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800674 # default-statement = '@default' , default-action
675 # ;
676 def _parse_default_statement(self, tokens):
677 if not tokens:
678 self._parser_state.error('empty default statement')
679 if tokens[0].type != 'DEFAULT':
680 self._parser_state.error('invalid default', token=tokens[0])
681 tokens.pop(0)
682 if not tokens:
683 self._parser_state.error('empty action')
684 return self._parse_default_action(tokens)
685
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800686 def _parse_policy_file(self, filename):
687 self._parser_states.append(ParserState(filename))
688 try:
689 statements = []
690 with open(filename) as policy_file:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700691 for tokens in self._parser_state.tokenize(policy_file):
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800692 if tokens[0].type == 'INCLUDE':
693 statements.extend(
694 self._parse_include_statement(tokens))
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800695 elif tokens[0].type == 'FREQUENCY':
696 for syscall_number, frequency in self._parse_frequency_statement(
697 tokens).items():
698 self._frequency_mapping[
699 syscall_number] += frequency
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800700 elif tokens[0].type == 'DEFAULT':
701 self._default_action = self._parse_default_statement(
702 tokens)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800703 else:
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800704 statement = self.parse_filter_statement(tokens)
705 if statement is None:
706 # If all the syscalls in the statement are for
707 # another arch, skip the whole statement.
708 continue
709 statements.append(statement)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800710
711 if tokens:
712 self._parser_state.error(
713 'extra tokens', token=tokens[0])
714 return statements
715 finally:
716 self._parser_states.pop()
717
718 def parse_file(self, filename):
719 """Parse a file and return the list of FilterStatements."""
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800720 self._frequency_mapping = collections.defaultdict(int)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800721 try:
722 statements = [x for x in self._parse_policy_file(filename)]
723 except RecursionError:
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700724 raise ParseException(
725 'recursion limit exceeded',
726 filename,
727 line=self._parser_states[-1].line)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800728
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800729 # Collapse statements into a single syscall-to-filter-list, remembering
730 # the token for each filter for better diagnostics.
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800731 syscall_filter_mapping = {}
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800732 syscall_filter_definitions = {}
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800733 filter_statements = []
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800734 for syscalls, filters, token in statements:
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800735 for syscall in syscalls:
736 if syscall not in syscall_filter_mapping:
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800737 filter_statements.append(
738 FilterStatement(
739 syscall, self._frequency_mapping.get(syscall, 1),
740 []))
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800741 syscall_filter_mapping[syscall] = filter_statements[-1]
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800742 syscall_filter_definitions[syscall] = []
743 for filt in filters:
744 syscall_filter_mapping[syscall].filters.append(filt)
745 syscall_filter_definitions[syscall].append(token)
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800746 default_action = self._override_default_action or self._default_action
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800747 for filter_statement in filter_statements:
748 unconditional_actions_suffix = list(
749 itertools.dropwhile(lambda filt: filt.expression is not None,
750 filter_statement.filters))
751 if len(unconditional_actions_suffix) == 1:
752 # The last filter already has an unconditional action, no need
753 # to add another one.
754 continue
755 if len(unconditional_actions_suffix) > 1:
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800756 previous_definition_token = syscall_filter_definitions[
757 filter_statement.syscall][
758 -len(unconditional_actions_suffix)]
759 current_definition_token = syscall_filter_definitions[
760 filter_statement.syscall][
761 -len(unconditional_actions_suffix) + 1]
Luis Hector Chavezd0fd13d2019-03-17 19:01:52 -0700762 raise ParseException(
763 ('Syscall %s (number %d) already had '
764 'an unconditional action applied') %
765 (filter_statement.syscall.name,
766 filter_statement.syscall.number),
Luis Hector Chavez4228eff2019-12-11 19:07:13 -0800767 filename=current_definition_token.filename,
768 token=current_definition_token) from ParseException(
769 'Previous definition',
770 filename=previous_definition_token.filename,
771 token=previous_definition_token)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800772 assert not unconditional_actions_suffix
773 filter_statement.filters.append(
Luis Hector Chavez891d3552019-03-09 18:46:53 -0800774 Filter(expression=None, action=default_action))
775 return ParsedPolicy(default_action, filter_statements)