blob: 41d2f52569018ad19ee08b8244a891958cb358c9 [file] [log] [blame]
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -08001#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# Copyright (C) 2018 The Android Open Source Project
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17"""A parser for the Minijail policy file."""
18
19from __future__ import absolute_import
20from __future__ import division
21from __future__ import print_function
22
23import collections
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -080024import itertools
25import os.path
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080026import re
27
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -080028import bpf
29
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080030Token = collections.namedtuple('token',
31 ['type', 'value', 'filename', 'line', 'column'])
32
33# A regex that can tokenize a Minijail policy file line.
34_TOKEN_SPECIFICATION = (
35 ('COMMENT', r'#.*$'),
36 ('WHITESPACE', r'\s+'),
Luis Hector Chavezb7926be2018-12-05 16:54:26 -080037 ('DEFAULT', r'@default'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080038 ('INCLUDE', r'@include'),
Luis Hector Chavezb4408862018-12-05 16:54:16 -080039 ('FREQUENCY', r'@frequency'),
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -080040 ('PATH', r'(?:\.)?/\S+'),
41 ('NUMERIC_CONSTANT', r'-?0[xX][0-9a-fA-F]+|-?0[Oo][0-7]+|-?[0-9]+'),
42 ('COLON', r':'),
43 ('SEMICOLON', r';'),
44 ('COMMA', r','),
45 ('BITWISE_COMPLEMENT', r'~'),
46 ('LPAREN', r'\('),
47 ('RPAREN', r'\)'),
48 ('LBRACE', r'\{'),
49 ('RBRACE', r'\}'),
50 ('RBRACKET', r'\]'),
51 ('LBRACKET', r'\['),
52 ('OR', r'\|\|'),
53 ('AND', r'&&'),
54 ('BITWISE_OR', r'\|'),
55 ('OP', r'&|in|==|!=|<=|<|>=|>'),
56 ('EQUAL', r'='),
57 ('ARGUMENT', r'arg[0-9]+'),
58 ('RETURN', r'return'),
59 ('ACTION', r'allow|kill-process|kill-thread|kill|trap|trace|log'),
60 ('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9@]*'),
61)
62_TOKEN_RE = re.compile('|'.join(
63 r'(?P<%s>%s)' % pair for pair in _TOKEN_SPECIFICATION))
64
65
66class ParseException(Exception):
67 """An exception that is raised when parsing fails."""
68
69 # pylint: disable=too-many-arguments
70 def __init__(self, message, filename, line, line_number=1, token=None):
71 if token:
72 column = token.column
73 length = len(token.value)
74 else:
75 column = len(line)
76 length = 1
77
78 message = ('%s(%d:%d): %s') % (filename, line_number, column + 1,
79 message)
80 message += '\n %s' % line
81 message += '\n %s%s' % (' ' * column, '^' * length)
82 super().__init__(message)
83
84
85class ParserState:
86 """Stores the state of the Parser to provide better diagnostics."""
87
88 def __init__(self, filename):
89 self._filename = filename
90 self._line = ''
91 self._line_number = 0
92
93 @property
94 def filename(self):
95 """Return the name of the file being processed."""
96 return self._filename
97
98 @property
99 def line(self):
100 """Return the current line being processed."""
101 return self._line
102
103 @property
104 def line_number(self):
105 """Return the current line number being processed."""
106 return self._line_number
107
108 def set_line(self, line):
109 """Update the current line being processed."""
110 self._line = line
111 self._line_number += 1
112
113 def error(self, message, token=None):
114 """Raise a ParserException with the provided message."""
115 raise ParseException(message, self.filename, self.line,
116 self.line_number, token)
117
118 def tokenize(self):
119 """Return a list of tokens for the current line."""
120 tokens = []
121
122 last_end = 0
123 for token in _TOKEN_RE.finditer(self.line):
124 if token.start() != last_end:
125 self.error(
126 'invalid token',
127 token=Token('INVALID', self.line[last_end:token.start()],
128 self.filename, self.line_number, last_end))
129 last_end = token.end()
130
131 # Omit whitespace and comments now to avoid sprinkling this logic
132 # elsewhere.
133 if token.lastgroup in ('WHITESPACE', 'COMMENT'):
134 continue
135 tokens.append(
136 Token(token.lastgroup, token.group(), self.filename,
137 self.line_number, token.start()))
138 if last_end != len(self.line):
139 self.error(
140 'invalid token',
141 token=Token('INVALID', self.line[last_end:], self.filename,
142 self.line_number, last_end))
143 return tokens
144
145
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800146Atom = collections.namedtuple('Atom', ['argument_index', 'op', 'value'])
147"""A single boolean comparison within a filter expression."""
148
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800149Filter = collections.namedtuple('Filter', ['expression', 'action'])
150"""The result of parsing a DNF filter expression, with its action.
151
152Since the expression is in Disjunctive Normal Form, it is composed of two levels
153of lists, one for disjunctions and the inner one for conjunctions. The elements
154of the inner list are Atoms.
155"""
156
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800157Syscall = collections.namedtuple('Syscall', ['name', 'number'])
158"""A system call."""
159
160ParsedFilterStatement = collections.namedtuple('ParsedFilterStatement',
161 ['syscalls', 'filters'])
162"""The result of parsing a filter statement.
163
164Statements have a list of syscalls, and an associated list of filters that will
165be evaluated sequentially when any of the syscalls is invoked.
166"""
167
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800168FilterStatement = collections.namedtuple('FilterStatement',
169 ['syscall', 'frequency', 'filters'])
170"""The filter list for a particular syscall.
171
172This is a mapping from one syscall to a list of filters that are evaluated
173sequentially. The last filter is always an unconditional action.
174"""
175
176ParsedPolicy = collections.namedtuple('ParsedPolicy',
177 ['default_action', 'filter_statements'])
178"""The result of parsing a minijail .policy file."""
179
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800180
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800181# pylint: disable=too-few-public-methods
182class PolicyParser:
183 """A parser for the Minijail seccomp policy file format."""
184
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800185 def __init__(self, arch, *, kill_action, include_depth_limit=10):
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800186 self._parser_states = [ParserState("<memory>")]
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800187 self._kill_action = kill_action
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800188 self._include_depth_limit = include_depth_limit
189 self._default_action = self._kill_action
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800190 self._frequency_mapping = collections.defaultdict(int)
Luis Hector Chavezd4ce4492018-12-04 20:00:32 -0800191 self._arch = arch
192
193 @property
194 def _parser_state(self):
195 return self._parser_states[-1]
196
197 # single-constant = identifier
198 # | numeric-constant
199 # ;
200 def _parse_single_constant(self, token):
201 if token.type == 'IDENTIFIER':
202 if token.value not in self._arch.constants:
203 self._parser_state.error('invalid constant', token=token)
204 single_constant = self._arch.constants[token.value]
205 elif token.type == 'NUMERIC_CONSTANT':
206 try:
207 single_constant = int(token.value, base=0)
208 except ValueError:
209 self._parser_state.error('invalid constant', token=token)
210 else:
211 self._parser_state.error('invalid constant', token=token)
212 if single_constant > self._arch.max_unsigned:
213 self._parser_state.error('unsigned overflow', token=token)
214 elif single_constant < self._arch.min_signed:
215 self._parser_state.error('signed underflow', token=token)
216 elif single_constant < 0:
217 # This converts the constant to an unsigned representation of the
218 # same value, since BPF only uses unsigned values.
219 single_constant = self._arch.truncate_word(single_constant)
220 return single_constant
221
222 # constant = [ '~' ] , '(' , value , ')'
223 # | [ '~' ] , single-constant
224 # ;
225 def _parse_constant(self, tokens):
226 negate = False
227 if tokens[0].type == 'BITWISE_COMPLEMENT':
228 negate = True
229 tokens.pop(0)
230 if not tokens:
231 self._parser_state.error('empty complement')
232 if tokens[0].type == 'BITWISE_COMPLEMENT':
233 self._parser_state.error(
234 'invalid double complement', token=tokens[0])
235 if tokens[0].type == 'LPAREN':
236 last_open_paren = tokens.pop(0)
237 single_value = self.parse_value(tokens)
238 if not tokens or tokens[0].type != 'RPAREN':
239 self._parser_state.error(
240 'unclosed parenthesis', token=last_open_paren)
241 else:
242 single_value = self._parse_single_constant(tokens[0])
243 tokens.pop(0)
244 if negate:
245 single_value = self._arch.truncate_word(~single_value)
246 return single_value
247
248 # value = constant , [ { '|' , constant } ]
249 # ;
250 def parse_value(self, tokens):
251 """Parse constants separated bitwise OR operator |.
252
253 Constants can be:
254
255 - A number that can be parsed with int(..., base=0)
256 - A named constant expression.
257 - A parenthesized, valid constant expression.
258 - A valid constant expression prefixed with the unary bitwise
259 complement operator ~.
260 - A series of valid constant expressions separated by bitwise
261 OR operator |.
262
263 If there is an error parsing any of the constants, the whole process
264 fails.
265 """
266
267 value = 0
268 while tokens:
269 value |= self._parse_constant(tokens)
270 if not tokens or tokens[0].type != 'BITWISE_OR':
271 break
272 tokens.pop(0)
273 else:
274 self._parser_state.error('empty constant')
275 return value
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800276
277 # atom = argument , op , value
278 # ;
279 def _parse_atom(self, tokens):
280 if not tokens:
281 self._parser_state.error('missing argument')
282 argument = tokens.pop(0)
283 if argument.type != 'ARGUMENT':
284 self._parser_state.error('invalid argument', token=argument)
285
286 if not tokens:
287 self._parser_state.error('missing operator')
288 operator = tokens.pop(0)
289 if operator.type != 'OP':
290 self._parser_state.error('invalid operator', token=operator)
291
292 value = self.parse_value(tokens)
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800293 argument_index = int(argument.value[3:])
294 if not (0 <= argument_index < bpf.MAX_SYSCALL_ARGUMENTS):
295 self._parser_state.error('invalid argument', token=argument)
296 return Atom(argument_index, operator.value, value)
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800297
298 # clause = atom , [ { '&&' , atom } ]
299 # ;
300 def _parse_clause(self, tokens):
301 atoms = []
302 while tokens:
303 atoms.append(self._parse_atom(tokens))
304 if not tokens or tokens[0].type != 'AND':
305 break
306 tokens.pop(0)
307 else:
308 self._parser_state.error('empty clause')
309 return atoms
310
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800311 # argument-expression = clause , [ { '||' , clause } ]
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800312 # ;
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800313 def parse_argument_expression(self, tokens):
314 """Parse a argument expression in Disjunctive Normal Form.
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800315
316 Since BPF disallows back jumps, we build the basic blocks in reverse
317 order so that all the jump targets are known by the time we need to
318 reference them.
319 """
320
321 clauses = []
322 while tokens:
323 clauses.append(self._parse_clause(tokens))
324 if not tokens or tokens[0].type != 'OR':
325 break
326 tokens.pop(0)
327 else:
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800328 self._parser_state.error('empty argument expression')
Luis Hector Chavez0516e182018-12-04 20:36:00 -0800329 return clauses
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800330
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800331 # default-action = 'kill-process'
332 # | 'kill-thread'
333 # | 'kill'
334 # | 'trap'
335 # ;
336 def _parse_default_action(self, tokens):
337 if not tokens:
338 self._parser_state.error('missing default action')
339 action_token = tokens.pop(0)
340 if action_token.type != 'ACTION':
341 return self._parser_state.error(
342 'invalid default action', token=action_token)
343 if action_token.value == 'kill-process':
344 return bpf.KillProcess()
345 if action_token.value == 'kill-thread':
346 return bpf.KillThread()
347 if action_token.value == 'kill':
348 return self._kill_action
349 if action_token.value == 'trap':
350 return bpf.Trap()
351 return self._parser_state.error(
352 'invalid permissive default action', token=action_token)
353
Luis Hector Chavez1ac9eca2018-12-04 21:28:52 -0800354 # action = 'allow' | '1'
355 # | 'kill-process'
356 # | 'kill-thread'
357 # | 'kill'
358 # | 'trap'
359 # | 'trace'
360 # | 'log'
361 # | 'return' , single-constant
362 # ;
363 def _parse_action(self, tokens):
364 if not tokens:
365 self._parser_state.error('missing action')
366 action_token = tokens.pop(0)
367 if action_token.type == 'ACTION':
368 if action_token.value == 'allow':
369 return bpf.Allow()
370 if action_token.value == 'kill':
371 return self._kill_action
372 if action_token.value == 'kill-process':
373 return bpf.KillProcess()
374 if action_token.value == 'kill-thread':
375 return bpf.KillThread()
376 if action_token.value == 'trap':
377 return bpf.Trap()
378 if action_token.value == 'trace':
379 return bpf.Trace()
380 if action_token.value == 'log':
381 return bpf.Log()
382 elif action_token.type == 'NUMERIC_CONSTANT':
383 constant = self._parse_single_constant(action_token)
384 if constant == 1:
385 return bpf.Allow()
386 elif action_token.type == 'RETURN':
387 if not tokens:
388 self._parser_state.error('missing return value')
389 return bpf.ReturnErrno(self._parse_single_constant(tokens.pop(0)))
390 return self._parser_state.error('invalid action', token=action_token)
391
392 # single-filter = action
393 # | argument-expression , [ ';' , action ]
394 # ;
395 def _parse_single_filter(self, tokens):
396 if not tokens:
397 self._parser_state.error('missing filter')
398 if tokens[0].type == 'ARGUMENT':
399 # Only argument expressions can start with an ARGUMENT token.
400 argument_expression = self.parse_argument_expression(tokens)
401 if tokens and tokens[0].type == 'SEMICOLON':
402 tokens.pop(0)
403 action = self._parse_action(tokens)
404 else:
405 action = bpf.Allow()
406 return Filter(argument_expression, action)
407 else:
408 return Filter(None, self._parse_action(tokens))
409
410 # filter = '{' , single-filter , [ { ',' , single-filter } ] , '}'
411 # | single-filter
412 # ;
413 def parse_filter(self, tokens):
414 """Parse a filter and return a list of Filter objects."""
415 if not tokens:
416 self._parser_state.error('missing filter')
417 filters = []
418 if tokens[0].type == 'LBRACE':
419 opening_brace = tokens.pop(0)
420 while tokens:
421 filters.append(self._parse_single_filter(tokens))
422 if not tokens or tokens[0].type != 'COMMA':
423 break
424 tokens.pop(0)
425 if not tokens or tokens[0].type != 'RBRACE':
426 self._parser_state.error('unclosed brace', token=opening_brace)
427 tokens.pop(0)
428 else:
429 filters.append(self._parse_single_filter(tokens))
430 return filters
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800431
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800432 # key-value-pair = identifier , '=', identifier , [ { ',' , identifier } ]
433 # ;
434 def _parse_key_value_pair(self, tokens):
435 if not tokens:
436 self._parser_state.error('missing key')
437 key = tokens.pop(0)
438 if key.type != 'IDENTIFIER':
439 self._parser_state.error('invalid key', token=key)
440 if not tokens:
441 self._parser_state.error('missing equal')
442 if tokens[0].type != 'EQUAL':
443 self._parser_state.error('invalid equal', token=tokens[0])
444 tokens.pop(0)
445 value_list = []
446 while tokens:
447 value = tokens.pop(0)
448 if value.type != 'IDENTIFIER':
449 self._parser_state.error('invalid value', token=value)
450 value_list.append(value.value)
451 if not tokens or tokens[0].type != 'COMMA':
452 break
453 tokens.pop(0)
454 else:
455 self._parser_state.error('empty value')
456 return (key.value, value_list)
457
458 # metadata = '[' , key-value-pair , [ { ';' , key-value-pair } ] , ']'
459 # ;
460 def _parse_metadata(self, tokens):
461 if not tokens:
462 self._parser_state.error('missing opening bracket')
463 opening_bracket = tokens.pop(0)
464 if opening_bracket.type != 'LBRACKET':
465 self._parser_state.error(
466 'invalid opening bracket', token=opening_bracket)
467 metadata = {}
468 while tokens:
469 first_token = tokens[0]
470 key, value = self._parse_key_value_pair(tokens)
471 if key in metadata:
472 self._parser_state.error(
473 'duplicate metadata key: "%s"' % key, token=first_token)
474 metadata[key] = value
475 if not tokens or tokens[0].type != 'SEMICOLON':
476 break
477 tokens.pop(0)
478 if not tokens or tokens[0].type != 'RBRACKET':
479 self._parser_state.error('unclosed bracket', token=opening_bracket)
480 tokens.pop(0)
481 return metadata
482
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800483 # syscall-descriptor = syscall-name , [ metadata ]
484 # | libc-function , [ metadata ]
485 # ;
486 def _parse_syscall_descriptor(self, tokens):
487 if not tokens:
488 self._parser_state.error('missing syscall descriptor')
489 syscall_descriptor = tokens.pop(0)
490 if syscall_descriptor.type != 'IDENTIFIER':
491 self._parser_state.error(
492 'invalid syscall descriptor', token=syscall_descriptor)
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800493 # TODO(lhchavez): Support libc function names.
494 if tokens and tokens[0].type == 'LBRACKET':
495 metadata = self._parse_metadata(tokens)
496 if 'arch' in metadata and self._arch.arch_name not in metadata['arch']:
497 return ()
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800498 if syscall_descriptor.value not in self._arch.syscalls:
499 self._parser_state.error(
500 'nonexistent syscall', token=syscall_descriptor)
Luis Hector Chavez75406c22018-12-04 21:57:06 -0800501 return (Syscall(syscall_descriptor.value,
502 self._arch.syscalls[syscall_descriptor.value]), )
503
504 # filter-statement = '{' , syscall-descriptor , [ { ',', syscall-descriptor } ] , '}' ,
505 # ':' , filter
506 # | syscall-descriptor , ':' , filter
507 # ;
508 def parse_filter_statement(self, tokens):
509 """Parse a filter statement and return a ParsedFilterStatement."""
510 if not tokens:
511 self._parser_state.error('empty filter statement')
512 syscall_descriptors = []
513 if tokens[0].type == 'LBRACE':
514 opening_brace = tokens.pop(0)
515 while tokens:
516 syscall_descriptors.extend(
517 self._parse_syscall_descriptor(tokens))
518 if not tokens or tokens[0].type != 'COMMA':
519 break
520 tokens.pop(0)
521 if not tokens or tokens[0].type != 'RBRACE':
522 self._parser_state.error('unclosed brace', token=opening_brace)
523 tokens.pop(0)
524 else:
525 syscall_descriptors.extend(self._parse_syscall_descriptor(tokens))
526 if not tokens:
527 self._parser_state.error('missing colon')
528 if tokens[0].type != 'COLON':
529 self._parser_state.error('invalid colon', token=tokens[0])
530 tokens.pop(0)
531 parsed_filter = self.parse_filter(tokens)
532 if not syscall_descriptors:
533 return None
534 return ParsedFilterStatement(tuple(syscall_descriptors), parsed_filter)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800535
536 # include-statement = '@include' , posix-path
537 # ;
538 def _parse_include_statement(self, tokens):
539 if not tokens:
540 self._parser_state.error('empty filter statement')
541 if tokens[0].type != 'INCLUDE':
542 self._parser_state.error('invalid include', token=tokens[0])
543 tokens.pop(0)
544 if not tokens:
545 self._parser_state.error('empty include path')
546 include_path = tokens.pop(0)
547 if include_path.type != 'PATH':
548 self._parser_state.error(
549 'invalid include path', token=include_path)
550 if len(self._parser_states) == self._include_depth_limit:
551 self._parser_state.error('@include statement nested too deep')
552 include_filename = os.path.normpath(
553 os.path.join(
554 os.path.dirname(self._parser_state.filename),
555 include_path.value))
556 if not os.path.isfile(include_filename):
557 self._parser_state.error(
558 'Could not @include %s' % include_filename, token=include_path)
559 return self._parse_policy_file(include_filename)
560
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800561 def _parse_frequency_file(self, filename):
562 self._parser_states.append(ParserState(filename))
563 try:
564 frequency_mapping = collections.defaultdict(int)
565 with open(filename) as frequency_file:
566 for line in frequency_file:
567 self._parser_state.set_line(line.rstrip())
568 tokens = self._parser_state.tokenize()
569
570 if not tokens:
571 continue
572
573 syscall_numbers = self._parse_syscall_descriptor(tokens)
574 if not tokens:
575 self._parser_state.error('missing colon')
576 if tokens[0].type != 'COLON':
577 self._parser_state.error(
578 'invalid colon', token=tokens[0])
579 tokens.pop(0)
580
581 if not tokens:
582 self._parser_state.error('missing number')
583 number = tokens.pop(0)
584 if number.type != 'NUMERIC_CONSTANT':
585 self._parser_state.error(
586 'invalid number', token=number)
587 number_value = int(number.value, base=0)
588 if number_value < 0:
589 self._parser_state.error(
590 'invalid number', token=number)
591
592 for syscall_number in syscall_numbers:
593 frequency_mapping[syscall_number] += number_value
594 return frequency_mapping
595 finally:
596 self._parser_states.pop()
597
598 # frequency-statement = '@frequency' , posix-path
599 # ;
600 def _parse_frequency_statement(self, tokens):
601 if not tokens:
602 self._parser_state.error('empty frequency statement')
603 if tokens[0].type != 'FREQUENCY':
604 self._parser_state.error('invalid frequency', token=tokens[0])
605 tokens.pop(0)
606 if not tokens:
607 self._parser_state.error('empty frequency path')
608 frequency_path = tokens.pop(0)
609 if frequency_path.type != 'PATH':
610 self._parser_state.error(
611 'invalid frequency path', token=frequency_path)
612 frequency_filename = os.path.normpath(
613 os.path.join(
614 os.path.dirname(self._parser_state.filename),
615 frequency_path.value))
616 if not os.path.isfile(frequency_filename):
617 self._parser_state.error(
618 'Could not open frequency file %s' % frequency_filename,
619 token=frequency_path)
620 return self._parse_frequency_file(frequency_filename)
621
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800622 # default-statement = '@default' , default-action
623 # ;
624 def _parse_default_statement(self, tokens):
625 if not tokens:
626 self._parser_state.error('empty default statement')
627 if tokens[0].type != 'DEFAULT':
628 self._parser_state.error('invalid default', token=tokens[0])
629 tokens.pop(0)
630 if not tokens:
631 self._parser_state.error('empty action')
632 return self._parse_default_action(tokens)
633
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800634 def _parse_policy_file(self, filename):
635 self._parser_states.append(ParserState(filename))
636 try:
637 statements = []
638 with open(filename) as policy_file:
639 for line in policy_file:
640 self._parser_state.set_line(line.rstrip())
641 tokens = self._parser_state.tokenize()
642
643 if not tokens:
644 # Allow empty lines.
645 continue
646
647 if tokens[0].type == 'INCLUDE':
648 statements.extend(
649 self._parse_include_statement(tokens))
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800650 elif tokens[0].type == 'FREQUENCY':
651 for syscall_number, frequency in self._parse_frequency_statement(
652 tokens).items():
653 self._frequency_mapping[
654 syscall_number] += frequency
Luis Hector Chavezb7926be2018-12-05 16:54:26 -0800655 elif tokens[0].type == 'DEFAULT':
656 self._default_action = self._parse_default_statement(
657 tokens)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800658 else:
Luis Hector Chavez571e9582018-12-05 09:44:00 -0800659 statement = self.parse_filter_statement(tokens)
660 if statement is None:
661 # If all the syscalls in the statement are for
662 # another arch, skip the whole statement.
663 continue
664 statements.append(statement)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800665
666 if tokens:
667 self._parser_state.error(
668 'extra tokens', token=tokens[0])
669 return statements
670 finally:
671 self._parser_states.pop()
672
673 def parse_file(self, filename):
674 """Parse a file and return the list of FilterStatements."""
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800675 self._frequency_mapping = collections.defaultdict(int)
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800676 try:
677 statements = [x for x in self._parse_policy_file(filename)]
678 except RecursionError:
679 raise ParseException('recursion limit exceeded', filename,
680 self._parser_states[-1].line)
681
682 # Collapse statements into a single syscall-to-filter-list.
683 syscall_filter_mapping = {}
684 filter_statements = []
685 for syscalls, filters in statements:
686 for syscall in syscalls:
687 if syscall not in syscall_filter_mapping:
Luis Hector Chavezb4408862018-12-05 16:54:16 -0800688 filter_statements.append(
689 FilterStatement(
690 syscall, self._frequency_mapping.get(syscall, 1),
691 []))
Luis Hector Chavez5dfe9192018-12-04 22:38:43 -0800692 syscall_filter_mapping[syscall] = filter_statements[-1]
693 syscall_filter_mapping[syscall].filters.extend(filters)
694 for filter_statement in filter_statements:
695 unconditional_actions_suffix = list(
696 itertools.dropwhile(lambda filt: filt.expression is not None,
697 filter_statement.filters))
698 if len(unconditional_actions_suffix) == 1:
699 # The last filter already has an unconditional action, no need
700 # to add another one.
701 continue
702 if len(unconditional_actions_suffix) > 1:
703 raise ParseException(('Syscall %s (number %d) already had '
704 'an unconditional action applied') %
705 (filter_statement.syscall.name,
706 filter_statement.syscall.number),
707 filename, self._parser_states[-1].line)
708 assert not unconditional_actions_suffix
709 filter_statement.filters.append(
710 Filter(expression=None, action=self._default_action))
711 return ParsedPolicy(self._default_action, filter_statements)