blob: 73053d2a0a36cbb28e6af257834890f56c80ed0c [file] [log] [blame]
Luis Hector Chavez05392b82018-10-28 21:40:10 -07001#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# Copyright (C) 2018 The Android Open Source Project
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17"""A BPF compiler for the Minijail policy file."""
18
19from __future__ import print_function
20
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -080021import enum
22
Luis Hector Chavez05392b82018-10-28 21:40:10 -070023import bpf
24import parser # pylint: disable=wrong-import-order
25
26
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -080027class OptimizationStrategy(enum.Enum):
28 """The available optimization strategies."""
29
30 # Generate a linear chain of syscall number checks. Works best for policies
31 # with very few syscalls.
32 LINEAR = 'linear'
33
34 # Generate a binary search tree for the syscalls. Works best for policies
35 # with a lot of syscalls, where no one syscall dominates.
36 BST = 'bst'
37
38 def __str__(self):
39 return self.value
40
41
Luis Hector Chavez05392b82018-10-28 21:40:10 -070042class SyscallPolicyEntry:
43 """The parsed version of a seccomp policy line."""
44
45 def __init__(self, name, number, frequency):
46 self.name = name
47 self.number = number
48 self.frequency = frequency
49 self.accumulated = 0
50 self.filter = None
51
52 def __repr__(self):
53 return ('SyscallPolicyEntry<name: %s, number: %d, '
54 'frequency: %d, filter: %r>') % (self.name, self.number,
55 self.frequency,
56 self.filter.instructions
57 if self.filter else None)
58
59 def simulate(self, arch, syscall_number, *args):
60 """Simulate the policy with the given arguments."""
61 if not self.filter:
62 return (0, 'ALLOW')
63 return bpf.simulate(self.filter.instructions, arch, syscall_number,
64 *args)
65
66
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -070067class SyscallPolicyRange:
68 """A contiguous range of SyscallPolicyEntries that have the same action."""
69
70 def __init__(self, *entries):
71 self.numbers = (entries[0].number, entries[-1].number + 1)
72 self.frequency = sum(e.frequency for e in entries)
73 self.accumulated = 0
74 self.filter = entries[0].filter
75
76 def __repr__(self):
77 return 'SyscallPolicyRange<numbers: %r, frequency: %d, filter: %r>' % (
78 self.numbers, self.frequency, self.filter.instructions
79 if self.filter else None)
80
81 def simulate(self, arch, syscall_number, *args):
82 """Simulate the policy with the given arguments."""
83 if not self.filter:
84 return (0, 'ALLOW')
85 return self.filter.simulate(arch, syscall_number, *args)
86
87
88def _convert_to_ranges(entries):
89 entries = list(sorted(entries, key=lambda r: r.number))
90 lower = 0
91 while lower < len(entries):
92 upper = lower + 1
93 while upper < len(entries):
94 if entries[upper - 1].filter != entries[upper].filter:
95 break
96 if entries[upper - 1].number + 1 != entries[upper].number:
97 break
98 upper += 1
99 yield SyscallPolicyRange(*entries[lower:upper])
100 lower = upper
101
102
103def _compile_single_range(entry,
104 accept_action,
105 reject_action,
106 visitor,
107 lower_bound=0,
108 upper_bound=1e99):
109 action = accept_action
110 if entry.filter:
111 entry.filter.accept(visitor)
112 action = entry.filter
113 if entry.numbers[1] - entry.numbers[0] == 1:
114 # Single syscall.
115 # Accept if |X == nr|.
116 return bpf.SyscallEntry(
117 entry.numbers[0], action, reject_action, op=bpf.BPF_JEQ)
118 elif entry.numbers[0] == lower_bound:
119 # Syscall range aligned with the lower bound.
120 # Accept if |X < nr[1]|.
121 return bpf.SyscallEntry(
122 entry.numbers[1], reject_action, action, op=bpf.BPF_JGE)
123 elif entry.numbers[1] == upper_bound:
124 # Syscall range aligned with the upper bound.
125 # Accept if |X >= nr[0]|.
126 return bpf.SyscallEntry(
127 entry.numbers[0], action, reject_action, op=bpf.BPF_JGE)
128 # Syscall range in the middle.
129 # Accept if |nr[0] <= X < nr[1]|.
130 upper_entry = bpf.SyscallEntry(
131 entry.numbers[1], reject_action, action, op=bpf.BPF_JGE)
132 return bpf.SyscallEntry(
133 entry.numbers[0], upper_entry, reject_action, op=bpf.BPF_JGE)
134
135
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800136def _compile_entries_linear(entries, accept_action, reject_action, visitor):
137 # Compiles the list of entries into a simple linear list of comparisons. In
138 # order to make the generated code a bit more efficient, we sort the
139 # entries by frequency, so that the most frequently-called syscalls appear
140 # earlier in the chain.
141 next_action = reject_action
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700142 ranges = sorted(_convert_to_ranges(entries), key=lambda r: -r.frequency)
143 for entry in ranges[::-1]:
144 next_action = _compile_single_range(entry, accept_action, next_action,
145 visitor)
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800146 return next_action
147
148
149def _compile_entries_bst(entries, accept_action, reject_action, visitor):
150 # Instead of generating a linear list of comparisons, this method generates
151 # a binary search tree.
152 #
153 # Even though we are going to perform a binary search over the syscall
154 # number, we would still like to rotate some of the internal nodes of the
155 # binary search tree so that more frequently-used syscalls can be accessed
156 # more cheaply (i.e. fewer internal nodes need to be traversed to reach
157 # them).
158 #
159 # The overall idea then is to, at any step, instead of naively partitioning
160 # the list of syscalls by the midpoint of the interval, we choose a
161 # midpoint that minimizes the difference of the sum of all frequencies
162 # between the left and right subtrees. For that, we need to sort the
163 # entries by syscall number and keep track of the accumulated frequency of
164 # all entries prior to the current one so that we can compue the midpoint
165 # efficiently.
166 #
167 # TODO(lhchavez): There is one further possible optimization, which is to
168 # hoist any syscalls that are more frequent than all other syscalls in the
169 # BST combined into a linear chain before entering the BST.
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700170 ranges = list(_convert_to_ranges(entries))
171
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800172 accumulated = 0
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700173 for entry in ranges:
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800174 accumulated += entry.frequency
175 entry.accumulated = accumulated
176
177 # Recursively create the internal nodes.
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700178 def _generate_syscall_bst(ranges, lower_bound=0, upper_bound=2**64 - 1):
179 assert ranges
180 if len(ranges) == 1:
181 # This is a single syscall entry range, but the interval we are
182 # currently considering contains other syscalls that we want to
183 # reject. So instead of an internal node, create one or more leaf
184 # nodes that check the range.
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800185 assert lower_bound < upper_bound
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700186 return _compile_single_range(ranges[0], accept_action,
187 reject_action, visitor, lower_bound,
188 upper_bound)
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800189
190 # Find the midpoint that minimizes the difference between accumulated
191 # costs in the left and right subtrees.
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700192 previous_accumulated = ranges[0].accumulated - ranges[0].frequency
193 last_accumulated = ranges[-1].accumulated - previous_accumulated
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800194 best = (1e99, -1)
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700195 for i, entry in enumerate(ranges):
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800196 if not i:
197 continue
198 left_accumulated = entry.accumulated - previous_accumulated
199 right_accumulated = last_accumulated - left_accumulated
200 best = min(best, (abs(left_accumulated - right_accumulated), i))
201 midpoint = best[1]
202 assert midpoint >= 1, best
203
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700204 cutoff_bound = ranges[midpoint].numbers[0]
205
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800206 # Now we build the right and left subtrees independently. If any of the
207 # subtrees consist of a single entry _and_ the bounds are tight around
208 # that entry (that is, the bounds contain _only_ the syscall we are
209 # going to consider), we can avoid emitting a leaf node and instead
210 # have the comparison jump directly into the action that would be taken
211 # by the entry.
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700212 if (cutoff_bound, upper_bound) == ranges[midpoint].numbers:
213 if ranges[midpoint].filter:
214 ranges[midpoint].filter.accept(visitor)
215 right_subtree = ranges[midpoint].filter
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800216 else:
217 right_subtree = accept_action
218 else:
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700219 right_subtree = _generate_syscall_bst(ranges[midpoint:],
220 cutoff_bound, upper_bound)
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800221
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700222 if (lower_bound, cutoff_bound) == ranges[midpoint - 1].numbers:
223 if ranges[midpoint - 1].filter:
224 ranges[midpoint - 1].filter.accept(visitor)
225 left_subtree = ranges[midpoint - 1].filter
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800226 else:
227 left_subtree = accept_action
228 else:
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700229 left_subtree = _generate_syscall_bst(ranges[:midpoint],
230 lower_bound, cutoff_bound)
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800231
232 # Finally, now that both subtrees have been generated, we can create
233 # the internal node of the binary search tree.
234 return bpf.SyscallEntry(
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700235 cutoff_bound, right_subtree, left_subtree, op=bpf.BPF_JGE)
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800236
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700237 return _generate_syscall_bst(ranges)
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800238
239
Luis Hector Chavez05392b82018-10-28 21:40:10 -0700240class PolicyCompiler:
241 """A parser for the Minijail seccomp policy file format."""
242
243 def __init__(self, arch):
244 self._arch = arch
245
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800246 def compile_file(self,
247 policy_filename,
248 *,
249 optimization_strategy,
250 kill_action,
251 include_depth_limit=10):
252 """Return a compiled BPF program from the provided policy file."""
253 policy_parser = parser.PolicyParser(
254 self._arch,
255 kill_action=kill_action,
256 include_depth_limit=include_depth_limit)
257 parsed_policy = policy_parser.parse_file(policy_filename)
258 entries = [
259 self.compile_filter_statement(
260 filter_statement, kill_action=kill_action)
261 for filter_statement in parsed_policy.filter_statements
262 ]
263
264 visitor = bpf.FlatteningVisitor(
265 arch=self._arch, kill_action=kill_action)
266 accept_action = bpf.Allow()
267 reject_action = parsed_policy.default_action
268 if entries:
269 if optimization_strategy == OptimizationStrategy.BST:
270 next_action = _compile_entries_bst(entries, accept_action,
271 reject_action, visitor)
272 else:
273 next_action = _compile_entries_linear(entries, accept_action,
274 reject_action, visitor)
275 reject_action.accept(visitor)
276 accept_action.accept(visitor)
277 bpf.ValidateArch(next_action).accept(visitor)
278 else:
279 reject_action.accept(visitor)
280 bpf.ValidateArch(reject_action).accept(visitor)
281 return visitor.result
282
Luis Hector Chavez05392b82018-10-28 21:40:10 -0700283 def compile_filter_statement(self, filter_statement, *, kill_action):
284 """Compile one parser.FilterStatement into BPF."""
285 policy_entry = SyscallPolicyEntry(filter_statement.syscall.name,
286 filter_statement.syscall.number,
287 filter_statement.frequency)
288 # In each step of the way, the false action is the one that is taken if
289 # the immediate boolean condition does not match. This means that the
290 # false action taken here is the one that applies if the whole
291 # expression fails to match.
292 false_action = filter_statement.filters[-1].action
293 if false_action == bpf.Allow():
294 return policy_entry
295 # We then traverse the list of filters backwards since we want
296 # the root of the DAG to be the very first boolean operation in
297 # the filter chain.
298 for filt in filter_statement.filters[:-1][::-1]:
299 for disjunction in filt.expression:
300 # This is the jump target of the very last comparison in the
301 # conjunction. Given that any conjunction that succeeds should
302 # make the whole expression succeed, make the very last
303 # comparison jump to the accept action if it succeeds.
304 true_action = filt.action
305 for atom in disjunction:
306 block = bpf.Atom(atom.argument_index, atom.op, atom.value,
307 true_action, false_action)
308 true_action = block
309 false_action = true_action
310 policy_filter = false_action
311
312 # Lower all Atoms into WideAtoms.
313 lowering_visitor = bpf.LoweringVisitor(arch=self._arch)
314 policy_filter = lowering_visitor.process(policy_filter)
315
316 # Flatten the IR DAG into a single BasicBlock.
317 flattening_visitor = bpf.FlatteningVisitor(
318 arch=self._arch, kill_action=kill_action)
319 policy_filter.accept(flattening_visitor)
320 policy_entry.filter = flattening_visitor.result
321 return policy_entry