Luis Hector Chavez | 05392b8 | 2018-10-28 21:40:10 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
| 2 | # -*- coding: utf-8 -*- |
| 3 | # |
| 4 | # Copyright (C) 2018 The Android Open Source Project |
| 5 | # |
| 6 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 7 | # you may not use this file except in compliance with the License. |
| 8 | # You may obtain a copy of the License at |
| 9 | # |
| 10 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | # |
| 12 | # Unless required by applicable law or agreed to in writing, software |
| 13 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | # See the License for the specific language governing permissions and |
| 16 | # limitations under the License. |
| 17 | """A BPF compiler for the Minijail policy file.""" |
| 18 | |
| 19 | from __future__ import print_function |
| 20 | |
Luis Hector Chavez | 7a21ffe | 2018-12-05 16:54:30 -0800 | [diff] [blame^] | 21 | import enum |
| 22 | |
Luis Hector Chavez | 05392b8 | 2018-10-28 21:40:10 -0700 | [diff] [blame] | 23 | import bpf |
| 24 | import parser # pylint: disable=wrong-import-order |
| 25 | |
| 26 | |
Luis Hector Chavez | 7a21ffe | 2018-12-05 16:54:30 -0800 | [diff] [blame^] | 27 | class OptimizationStrategy(enum.Enum): |
| 28 | """The available optimization strategies.""" |
| 29 | |
| 30 | # Generate a linear chain of syscall number checks. Works best for policies |
| 31 | # with very few syscalls. |
| 32 | LINEAR = 'linear' |
| 33 | |
| 34 | # Generate a binary search tree for the syscalls. Works best for policies |
| 35 | # with a lot of syscalls, where no one syscall dominates. |
| 36 | BST = 'bst' |
| 37 | |
| 38 | def __str__(self): |
| 39 | return self.value |
| 40 | |
| 41 | |
Luis Hector Chavez | 05392b8 | 2018-10-28 21:40:10 -0700 | [diff] [blame] | 42 | class SyscallPolicyEntry: |
| 43 | """The parsed version of a seccomp policy line.""" |
| 44 | |
| 45 | def __init__(self, name, number, frequency): |
| 46 | self.name = name |
| 47 | self.number = number |
| 48 | self.frequency = frequency |
| 49 | self.accumulated = 0 |
| 50 | self.filter = None |
| 51 | |
| 52 | def __repr__(self): |
| 53 | return ('SyscallPolicyEntry<name: %s, number: %d, ' |
| 54 | 'frequency: %d, filter: %r>') % (self.name, self.number, |
| 55 | self.frequency, |
| 56 | self.filter.instructions |
| 57 | if self.filter else None) |
| 58 | |
| 59 | def simulate(self, arch, syscall_number, *args): |
| 60 | """Simulate the policy with the given arguments.""" |
| 61 | if not self.filter: |
| 62 | return (0, 'ALLOW') |
| 63 | return bpf.simulate(self.filter.instructions, arch, syscall_number, |
| 64 | *args) |
| 65 | |
| 66 | |
Luis Hector Chavez | 7a21ffe | 2018-12-05 16:54:30 -0800 | [diff] [blame^] | 67 | def _compile_entries_linear(entries, accept_action, reject_action, visitor): |
| 68 | # Compiles the list of entries into a simple linear list of comparisons. In |
| 69 | # order to make the generated code a bit more efficient, we sort the |
| 70 | # entries by frequency, so that the most frequently-called syscalls appear |
| 71 | # earlier in the chain. |
| 72 | next_action = reject_action |
| 73 | entries.sort(key=lambda e: -e.frequency) |
| 74 | for entry in entries[::-1]: |
| 75 | if entry.filter: |
| 76 | next_action = bpf.SyscallEntry(entry.number, entry.filter, |
| 77 | next_action) |
| 78 | entry.filter.accept(visitor) |
| 79 | else: |
| 80 | next_action = bpf.SyscallEntry(entry.number, accept_action, |
| 81 | next_action) |
| 82 | return next_action |
| 83 | |
| 84 | |
| 85 | def _compile_entries_bst(entries, accept_action, reject_action, visitor): |
| 86 | # Instead of generating a linear list of comparisons, this method generates |
| 87 | # a binary search tree. |
| 88 | # |
| 89 | # Even though we are going to perform a binary search over the syscall |
| 90 | # number, we would still like to rotate some of the internal nodes of the |
| 91 | # binary search tree so that more frequently-used syscalls can be accessed |
| 92 | # more cheaply (i.e. fewer internal nodes need to be traversed to reach |
| 93 | # them). |
| 94 | # |
| 95 | # The overall idea then is to, at any step, instead of naively partitioning |
| 96 | # the list of syscalls by the midpoint of the interval, we choose a |
| 97 | # midpoint that minimizes the difference of the sum of all frequencies |
| 98 | # between the left and right subtrees. For that, we need to sort the |
| 99 | # entries by syscall number and keep track of the accumulated frequency of |
| 100 | # all entries prior to the current one so that we can compue the midpoint |
| 101 | # efficiently. |
| 102 | # |
| 103 | # TODO(lhchavez): There is one further possible optimization, which is to |
| 104 | # hoist any syscalls that are more frequent than all other syscalls in the |
| 105 | # BST combined into a linear chain before entering the BST. |
| 106 | entries.sort(key=lambda e: e.number) |
| 107 | accumulated = 0 |
| 108 | for entry in entries: |
| 109 | accumulated += entry.frequency |
| 110 | entry.accumulated = accumulated |
| 111 | |
| 112 | # Recursively create the internal nodes. |
| 113 | def _generate_syscall_bst(entries, lower_bound=0, upper_bound=2**64 - 1): |
| 114 | assert entries |
| 115 | if len(entries) == 1: |
| 116 | # This is a single entry, but the interval we are currently |
| 117 | # considering contains other syscalls that we want to reject. So |
| 118 | # instead of an internal node, create a leaf node with an equality |
| 119 | # comparison. |
| 120 | assert lower_bound < upper_bound |
| 121 | if entries[0].filter: |
| 122 | entries[0].filter.accept(visitor) |
| 123 | return bpf.SyscallEntry( |
| 124 | entries[0].number, |
| 125 | entries[0].filter, |
| 126 | reject_action, |
| 127 | op=bpf.BPF_JEQ) |
| 128 | return bpf.SyscallEntry( |
| 129 | entries[0].number, |
| 130 | accept_action, |
| 131 | reject_action, |
| 132 | op=bpf.BPF_JEQ) |
| 133 | |
| 134 | # Find the midpoint that minimizes the difference between accumulated |
| 135 | # costs in the left and right subtrees. |
| 136 | previous_accumulated = entries[0].accumulated - entries[0].frequency |
| 137 | last_accumulated = entries[-1].accumulated - previous_accumulated |
| 138 | best = (1e99, -1) |
| 139 | for i, entry in enumerate(entries): |
| 140 | if not i: |
| 141 | continue |
| 142 | left_accumulated = entry.accumulated - previous_accumulated |
| 143 | right_accumulated = last_accumulated - left_accumulated |
| 144 | best = min(best, (abs(left_accumulated - right_accumulated), i)) |
| 145 | midpoint = best[1] |
| 146 | assert midpoint >= 1, best |
| 147 | |
| 148 | # Now we build the right and left subtrees independently. If any of the |
| 149 | # subtrees consist of a single entry _and_ the bounds are tight around |
| 150 | # that entry (that is, the bounds contain _only_ the syscall we are |
| 151 | # going to consider), we can avoid emitting a leaf node and instead |
| 152 | # have the comparison jump directly into the action that would be taken |
| 153 | # by the entry. |
| 154 | if entries[midpoint].number == upper_bound: |
| 155 | if entries[midpoint].filter: |
| 156 | entries[midpoint].filter.accept(visitor) |
| 157 | right_subtree = entries[midpoint].filter |
| 158 | else: |
| 159 | right_subtree = accept_action |
| 160 | else: |
| 161 | right_subtree = _generate_syscall_bst( |
| 162 | entries[midpoint:], entries[midpoint].number, upper_bound) |
| 163 | |
| 164 | if lower_bound == entries[midpoint].number - 1: |
| 165 | assert entries[midpoint - 1].number == lower_bound |
| 166 | if entries[midpoint - 1].filter: |
| 167 | entries[midpoint - 1].filter.accept(visitor) |
| 168 | left_subtree = entries[midpoint - 1].filter |
| 169 | else: |
| 170 | left_subtree = accept_action |
| 171 | else: |
| 172 | left_subtree = _generate_syscall_bst( |
| 173 | entries[:midpoint], lower_bound, entries[midpoint].number - 1) |
| 174 | |
| 175 | # Finally, now that both subtrees have been generated, we can create |
| 176 | # the internal node of the binary search tree. |
| 177 | return bpf.SyscallEntry( |
| 178 | entries[midpoint].number, |
| 179 | right_subtree, |
| 180 | left_subtree, |
| 181 | op=bpf.BPF_JGE) |
| 182 | |
| 183 | return _generate_syscall_bst(entries) |
| 184 | |
| 185 | |
Luis Hector Chavez | 05392b8 | 2018-10-28 21:40:10 -0700 | [diff] [blame] | 186 | class PolicyCompiler: |
| 187 | """A parser for the Minijail seccomp policy file format.""" |
| 188 | |
| 189 | def __init__(self, arch): |
| 190 | self._arch = arch |
| 191 | |
Luis Hector Chavez | 7a21ffe | 2018-12-05 16:54:30 -0800 | [diff] [blame^] | 192 | def compile_file(self, |
| 193 | policy_filename, |
| 194 | *, |
| 195 | optimization_strategy, |
| 196 | kill_action, |
| 197 | include_depth_limit=10): |
| 198 | """Return a compiled BPF program from the provided policy file.""" |
| 199 | policy_parser = parser.PolicyParser( |
| 200 | self._arch, |
| 201 | kill_action=kill_action, |
| 202 | include_depth_limit=include_depth_limit) |
| 203 | parsed_policy = policy_parser.parse_file(policy_filename) |
| 204 | entries = [ |
| 205 | self.compile_filter_statement( |
| 206 | filter_statement, kill_action=kill_action) |
| 207 | for filter_statement in parsed_policy.filter_statements |
| 208 | ] |
| 209 | |
| 210 | visitor = bpf.FlatteningVisitor( |
| 211 | arch=self._arch, kill_action=kill_action) |
| 212 | accept_action = bpf.Allow() |
| 213 | reject_action = parsed_policy.default_action |
| 214 | if entries: |
| 215 | if optimization_strategy == OptimizationStrategy.BST: |
| 216 | next_action = _compile_entries_bst(entries, accept_action, |
| 217 | reject_action, visitor) |
| 218 | else: |
| 219 | next_action = _compile_entries_linear(entries, accept_action, |
| 220 | reject_action, visitor) |
| 221 | reject_action.accept(visitor) |
| 222 | accept_action.accept(visitor) |
| 223 | bpf.ValidateArch(next_action).accept(visitor) |
| 224 | else: |
| 225 | reject_action.accept(visitor) |
| 226 | bpf.ValidateArch(reject_action).accept(visitor) |
| 227 | return visitor.result |
| 228 | |
Luis Hector Chavez | 05392b8 | 2018-10-28 21:40:10 -0700 | [diff] [blame] | 229 | def compile_filter_statement(self, filter_statement, *, kill_action): |
| 230 | """Compile one parser.FilterStatement into BPF.""" |
| 231 | policy_entry = SyscallPolicyEntry(filter_statement.syscall.name, |
| 232 | filter_statement.syscall.number, |
| 233 | filter_statement.frequency) |
| 234 | # In each step of the way, the false action is the one that is taken if |
| 235 | # the immediate boolean condition does not match. This means that the |
| 236 | # false action taken here is the one that applies if the whole |
| 237 | # expression fails to match. |
| 238 | false_action = filter_statement.filters[-1].action |
| 239 | if false_action == bpf.Allow(): |
| 240 | return policy_entry |
| 241 | # We then traverse the list of filters backwards since we want |
| 242 | # the root of the DAG to be the very first boolean operation in |
| 243 | # the filter chain. |
| 244 | for filt in filter_statement.filters[:-1][::-1]: |
| 245 | for disjunction in filt.expression: |
| 246 | # This is the jump target of the very last comparison in the |
| 247 | # conjunction. Given that any conjunction that succeeds should |
| 248 | # make the whole expression succeed, make the very last |
| 249 | # comparison jump to the accept action if it succeeds. |
| 250 | true_action = filt.action |
| 251 | for atom in disjunction: |
| 252 | block = bpf.Atom(atom.argument_index, atom.op, atom.value, |
| 253 | true_action, false_action) |
| 254 | true_action = block |
| 255 | false_action = true_action |
| 256 | policy_filter = false_action |
| 257 | |
| 258 | # Lower all Atoms into WideAtoms. |
| 259 | lowering_visitor = bpf.LoweringVisitor(arch=self._arch) |
| 260 | policy_filter = lowering_visitor.process(policy_filter) |
| 261 | |
| 262 | # Flatten the IR DAG into a single BasicBlock. |
| 263 | flattening_visitor = bpf.FlatteningVisitor( |
| 264 | arch=self._arch, kill_action=kill_action) |
| 265 | policy_filter.accept(flattening_visitor) |
| 266 | policy_entry.filter = flattening_visitor.result |
| 267 | return policy_entry |