blob: 9895df86973b54c3556c97aa45186f0d85b2a5bc [file] [log] [blame]
Luis Hector Chavez05392b82018-10-28 21:40:10 -07001#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# Copyright (C) 2018 The Android Open Source Project
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17"""A BPF compiler for the Minijail policy file."""
18
19from __future__ import print_function
20
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -080021import enum
22
Luis Hector Chavez05392b82018-10-28 21:40:10 -070023import bpf
24import parser # pylint: disable=wrong-import-order
25
26
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -080027class OptimizationStrategy(enum.Enum):
28 """The available optimization strategies."""
29
30 # Generate a linear chain of syscall number checks. Works best for policies
31 # with very few syscalls.
32 LINEAR = 'linear'
33
34 # Generate a binary search tree for the syscalls. Works best for policies
35 # with a lot of syscalls, where no one syscall dominates.
36 BST = 'bst'
37
38 def __str__(self):
39 return self.value
40
41
Luis Hector Chavez05392b82018-10-28 21:40:10 -070042class SyscallPolicyEntry:
43 """The parsed version of a seccomp policy line."""
44
45 def __init__(self, name, number, frequency):
46 self.name = name
47 self.number = number
48 self.frequency = frequency
49 self.accumulated = 0
50 self.filter = None
51
52 def __repr__(self):
53 return ('SyscallPolicyEntry<name: %s, number: %d, '
54 'frequency: %d, filter: %r>') % (self.name, self.number,
55 self.frequency,
56 self.filter.instructions
57 if self.filter else None)
58
59 def simulate(self, arch, syscall_number, *args):
60 """Simulate the policy with the given arguments."""
61 if not self.filter:
62 return (0, 'ALLOW')
63 return bpf.simulate(self.filter.instructions, arch, syscall_number,
64 *args)
65
66
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -070067class SyscallPolicyRange:
68 """A contiguous range of SyscallPolicyEntries that have the same action."""
69
70 def __init__(self, *entries):
71 self.numbers = (entries[0].number, entries[-1].number + 1)
72 self.frequency = sum(e.frequency for e in entries)
73 self.accumulated = 0
74 self.filter = entries[0].filter
75
76 def __repr__(self):
77 return 'SyscallPolicyRange<numbers: %r, frequency: %d, filter: %r>' % (
78 self.numbers, self.frequency, self.filter.instructions
79 if self.filter else None)
80
81 def simulate(self, arch, syscall_number, *args):
82 """Simulate the policy with the given arguments."""
83 if not self.filter:
84 return (0, 'ALLOW')
85 return self.filter.simulate(arch, syscall_number, *args)
86
87
88def _convert_to_ranges(entries):
89 entries = list(sorted(entries, key=lambda r: r.number))
90 lower = 0
91 while lower < len(entries):
92 upper = lower + 1
93 while upper < len(entries):
94 if entries[upper - 1].filter != entries[upper].filter:
95 break
96 if entries[upper - 1].number + 1 != entries[upper].number:
97 break
98 upper += 1
99 yield SyscallPolicyRange(*entries[lower:upper])
100 lower = upper
101
102
103def _compile_single_range(entry,
104 accept_action,
105 reject_action,
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700106 lower_bound=0,
107 upper_bound=1e99):
108 action = accept_action
109 if entry.filter:
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700110 action = entry.filter
111 if entry.numbers[1] - entry.numbers[0] == 1:
112 # Single syscall.
113 # Accept if |X == nr|.
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700114 return (1,
115 bpf.SyscallEntry(
116 entry.numbers[0], action, reject_action, op=bpf.BPF_JEQ))
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700117 elif entry.numbers[0] == lower_bound:
118 # Syscall range aligned with the lower bound.
119 # Accept if |X < nr[1]|.
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700120 return (1,
121 bpf.SyscallEntry(
122 entry.numbers[1], reject_action, action, op=bpf.BPF_JGE))
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700123 elif entry.numbers[1] == upper_bound:
124 # Syscall range aligned with the upper bound.
125 # Accept if |X >= nr[0]|.
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700126 return (1,
127 bpf.SyscallEntry(
128 entry.numbers[0], action, reject_action, op=bpf.BPF_JGE))
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700129 # Syscall range in the middle.
130 # Accept if |nr[0] <= X < nr[1]|.
131 upper_entry = bpf.SyscallEntry(
132 entry.numbers[1], reject_action, action, op=bpf.BPF_JGE)
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700133 return (2,
134 bpf.SyscallEntry(
135 entry.numbers[0], upper_entry, reject_action, op=bpf.BPF_JGE))
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700136
137
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700138def _compile_ranges_linear(ranges, accept_action, reject_action):
139 # Compiles the list of ranges into a simple linear list of comparisons. In
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800140 # order to make the generated code a bit more efficient, we sort the
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700141 # ranges by frequency, so that the most frequently-called syscalls appear
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800142 # earlier in the chain.
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700143 cost = 0
144 accumulated_frequencies = 0
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800145 next_action = reject_action
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700146 for entry in sorted(ranges, key=lambda r: r.frequency):
147 current_cost, next_action = _compile_single_range(
148 entry, accept_action, next_action)
149 accumulated_frequencies += entry.frequency
150 cost += accumulated_frequencies * current_cost
151 return (cost, next_action)
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800152
153
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700154def _compile_entries_linear(entries, accept_action, reject_action):
155 return _compile_ranges_linear(
156 _convert_to_ranges(entries), accept_action, reject_action)[1]
157
158
159def _compile_entries_bst(entries, accept_action, reject_action):
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800160 # Instead of generating a linear list of comparisons, this method generates
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700161 # a binary search tree, where some of the leaves can be linear chains of
162 # comparisons.
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800163 #
164 # Even though we are going to perform a binary search over the syscall
165 # number, we would still like to rotate some of the internal nodes of the
166 # binary search tree so that more frequently-used syscalls can be accessed
167 # more cheaply (i.e. fewer internal nodes need to be traversed to reach
168 # them).
169 #
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700170 # This uses Dynamic Programming to generate all possible BSTs efficiently
171 # (in O(n^3)) so that we can get the absolute minimum-cost tree that matches
172 # all syscall entries. It does so by considering all of the O(n^2) possible
173 # sub-intervals, and for each one of those try all of the O(n) partitions of
174 # that sub-interval. At each step, it considers putting the remaining
175 # entries in a linear comparison chain as well as another BST, and chooses
176 # the option that minimizes the total overall cost.
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800177 #
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700178 # Between every pair of non-contiguous allowed syscalls, there are two
179 # locally optimal options as to where to set the partition for the
180 # subsequent ranges: aligned to the end of the left subrange or to the
181 # beginning of the right subrange. The fact that these two options have
182 # slightly different costs, combined with the possibility of a subtree to
183 # use the linear chain strategy (which has a completely different cost
184 # model), causes the target cost function that we are trying to optimize to
185 # not be unimodal / convex. This unfortunately means that more clever
186 # techniques like using ternary search (which would reduce the overall
187 # complexity to O(n^2 log n)) do not work in all cases.
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700188 ranges = list(_convert_to_ranges(entries))
189
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800190 accumulated = 0
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700191 for entry in ranges:
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800192 accumulated += entry.frequency
193 entry.accumulated = accumulated
194
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700195 # Memoization cache to build the DP table top-down, which is easier to
196 # understand.
197 memoized_costs = {}
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800198
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700199 def _generate_syscall_bst(ranges, indices, bounds=(0, 2**64 - 1)):
200 assert bounds[0] <= ranges[indices[0]].numbers[0], (indices, bounds)
201 assert ranges[indices[1] - 1].numbers[1] <= bounds[1], (indices,
202 bounds)
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800203
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700204 if bounds in memoized_costs:
205 return memoized_costs[bounds]
206 if indices[1] - indices[0] == 1:
207 if bounds == ranges[indices[0]].numbers:
208 # If bounds are tight around the syscall, it costs nothing.
209 memoized_costs[bounds] = (0, ranges[indices[0]].filter
210 or accept_action)
211 return memoized_costs[bounds]
212 result = _compile_single_range(ranges[indices[0]], accept_action,
213 reject_action)
214 memoized_costs[bounds] = (result[0] * ranges[indices[0]].frequency,
215 result[1])
216 return memoized_costs[bounds]
Luis Hector Chavezb21da7a2018-11-01 16:50:36 -0700217
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700218 # Try the linear model first and use that as the best estimate so far.
219 best_cost = _compile_ranges_linear(ranges[slice(*indices)],
220 accept_action, reject_action)
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800221
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700222 # Now recursively go through all possible partitions of the interval
223 # currently being considered.
224 previous_accumulated = ranges[indices[0]].accumulated - ranges[indices[0]].frequency
225 bst_comparison_cost = (
226 ranges[indices[1] - 1].accumulated - previous_accumulated)
227 for i, entry in enumerate(ranges[slice(*indices)]):
228 candidates = [entry.numbers[0]]
229 if i:
230 candidates.append(ranges[i - 1 + indices[0]].numbers[1])
231 for cutoff_bound in candidates:
232 if not bounds[0] < cutoff_bound < bounds[1]:
233 continue
234 if not indices[0] < i + indices[0] < indices[1]:
235 continue
236 left_subtree = _generate_syscall_bst(
237 ranges, (indices[0], i + indices[0]),
238 (bounds[0], cutoff_bound))
239 right_subtree = _generate_syscall_bst(
240 ranges, (i + indices[0], indices[1]),
241 (cutoff_bound, bounds[1]))
242 best_cost = min(
243 best_cost,
244 (bst_comparison_cost + left_subtree[0] + right_subtree[0],
245 bpf.SyscallEntry(
246 cutoff_bound,
247 right_subtree[1],
248 left_subtree[1],
249 op=bpf.BPF_JGE)))
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800250
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700251 memoized_costs[bounds] = best_cost
252 return memoized_costs[bounds]
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800253
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700254 return _generate_syscall_bst(ranges, (0, len(ranges)))[1]
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800255
256
Luis Hector Chavez05392b82018-10-28 21:40:10 -0700257class PolicyCompiler:
258 """A parser for the Minijail seccomp policy file format."""
259
260 def __init__(self, arch):
261 self._arch = arch
262
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800263 def compile_file(self,
264 policy_filename,
265 *,
266 optimization_strategy,
267 kill_action,
268 include_depth_limit=10):
269 """Return a compiled BPF program from the provided policy file."""
270 policy_parser = parser.PolicyParser(
271 self._arch,
272 kill_action=kill_action,
273 include_depth_limit=include_depth_limit)
274 parsed_policy = policy_parser.parse_file(policy_filename)
275 entries = [
276 self.compile_filter_statement(
277 filter_statement, kill_action=kill_action)
278 for filter_statement in parsed_policy.filter_statements
279 ]
280
281 visitor = bpf.FlatteningVisitor(
282 arch=self._arch, kill_action=kill_action)
283 accept_action = bpf.Allow()
284 reject_action = parsed_policy.default_action
285 if entries:
286 if optimization_strategy == OptimizationStrategy.BST:
287 next_action = _compile_entries_bst(entries, accept_action,
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700288 reject_action)
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800289 else:
290 next_action = _compile_entries_linear(entries, accept_action,
Luis Hector Chaveza54812b2018-11-01 20:02:22 -0700291 reject_action)
292 next_action.accept(bpf.ArgFilterForwardingVisitor(visitor))
Luis Hector Chavez7a21ffe2018-12-05 16:54:30 -0800293 reject_action.accept(visitor)
294 accept_action.accept(visitor)
295 bpf.ValidateArch(next_action).accept(visitor)
296 else:
297 reject_action.accept(visitor)
298 bpf.ValidateArch(reject_action).accept(visitor)
299 return visitor.result
300
Luis Hector Chavez05392b82018-10-28 21:40:10 -0700301 def compile_filter_statement(self, filter_statement, *, kill_action):
302 """Compile one parser.FilterStatement into BPF."""
303 policy_entry = SyscallPolicyEntry(filter_statement.syscall.name,
304 filter_statement.syscall.number,
305 filter_statement.frequency)
306 # In each step of the way, the false action is the one that is taken if
307 # the immediate boolean condition does not match. This means that the
308 # false action taken here is the one that applies if the whole
309 # expression fails to match.
310 false_action = filter_statement.filters[-1].action
311 if false_action == bpf.Allow():
312 return policy_entry
313 # We then traverse the list of filters backwards since we want
314 # the root of the DAG to be the very first boolean operation in
315 # the filter chain.
316 for filt in filter_statement.filters[:-1][::-1]:
317 for disjunction in filt.expression:
318 # This is the jump target of the very last comparison in the
319 # conjunction. Given that any conjunction that succeeds should
320 # make the whole expression succeed, make the very last
321 # comparison jump to the accept action if it succeeds.
322 true_action = filt.action
323 for atom in disjunction:
324 block = bpf.Atom(atom.argument_index, atom.op, atom.value,
325 true_action, false_action)
326 true_action = block
327 false_action = true_action
328 policy_filter = false_action
329
330 # Lower all Atoms into WideAtoms.
331 lowering_visitor = bpf.LoweringVisitor(arch=self._arch)
332 policy_filter = lowering_visitor.process(policy_filter)
333
334 # Flatten the IR DAG into a single BasicBlock.
335 flattening_visitor = bpf.FlatteningVisitor(
336 arch=self._arch, kill_action=kill_action)
337 policy_filter.accept(flattening_visitor)
338 policy_entry.filter = flattening_visitor.result
339 return policy_entry