Blame - tools/compiler.py - android.googlesource.com/platform/external/minijail

2018-10-28 21:40:10 -0700

[diff] [blame]

1

#!/usr/bin/env python3

2

# -*- coding: utf-8 -*-

#

#

# Licensed under the Apache License, Version 2.0 (the "License");

7

# you may not use this file except in compliance with the License.

8

# You may obtain a copy of the License at

9

#

10

# http://www.apache.org/licenses/LICENSE-2.0

11

#

12

# Unless required by applicable law or agreed to in writing, software

13

# distributed under the License is distributed on an "AS IS" BASIS,

14

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

15

# See the License for the specific language governing permissions and

16

# limitations under the License.

17

"""A BPF compiler for the Minijail policy file."""

18

19

from __future__ import print_function

20

Luis Hector Chavez

2018-12-05 16:54:30 -0800

[diff] [blame]

21

import enum

22

Luis Hector Chavez

2018-10-28 21:40:10 -0700

[diff] [blame]

23

import bpf

24

import parser # pylint: disable=wrong-import-order

25

26

Luis Hector Chavez

2018-12-05 16:54:30 -0800

[diff] [blame]

27

class OptimizationStrategy(enum.Enum):

28

"""The available optimization strategies."""

29

30

# Generate a linear chain of syscall number checks. Works best for policies

31

# with very few syscalls.

32

LINEAR = 'linear'

33

34

# Generate a binary search tree for the syscalls. Works best for policies

35

# with a lot of syscalls, where no one syscall dominates.

BST = 'bst'

def __str__(self):

return self.value

Luis Hector Chavez

2018-10-28 21:40:10 -0700

[diff] [blame]

42

class SyscallPolicyEntry:

43

"""The parsed version of a seccomp policy line."""

44

45

def __init__(self, name, number, frequency):

46

self.name = name

47

self.number = number

48

self.frequency = frequency

self.accumulated = 0

self.filter = None

def __repr__(self):

return ('SyscallPolicyEntry<name: %s, number: %d, '

54

'frequency: %d, filter: %r>') % (self.name, self.number,

55

self.frequency,

56

self.filter.instructions

57

if self.filter else None)

58

59

def simulate(self, arch, syscall_number, *args):

60

"""Simulate the policy with the given arguments."""

61

if not self.filter:

62

return (0, 'ALLOW')

63

return bpf.simulate(self.filter.instructions, arch, syscall_number,

*args)

Luis Hector Chavez

2018-11-01 16:50:36 -0700

[diff] [blame^]

67

class SyscallPolicyRange:

68

"""A contiguous range of SyscallPolicyEntries that have the same action."""

69

70

def __init__(self, *entries):

71

self.numbers = (entries[0].number, entries[-1].number + 1)

72

self.frequency = sum(e.frequency for e in entries)

73

self.accumulated = 0

74

self.filter = entries[0].filter

75

76

def __repr__(self):

77

return 'SyscallPolicyRange<numbers: %r, frequency: %d, filter: %r>' % (

78

self.numbers, self.frequency, self.filter.instructions

79

if self.filter else None)

80

81

def simulate(self, arch, syscall_number, *args):

82

"""Simulate the policy with the given arguments."""

83

if not self.filter:

84

return (0, 'ALLOW')

85

return self.filter.simulate(arch, syscall_number, *args)

86

87

88

def _convert_to_ranges(entries):

89

entries = list(sorted(entries, key=lambda r: r.number))

90

lower = 0

91

while lower < len(entries):

92

upper = lower + 1

93

while upper < len(entries):

94

if entries[upper - 1].filter != entries[upper].filter:

95

break

96

if entries[upper - 1].number + 1 != entries[upper].number:

97

break

98

upper += 1

99

yield SyscallPolicyRange(*entries[lower:upper])

lower = upper

def _compile_single_range(entry,

accept_action,

reject_action,

visitor,

lower_bound=0,

upper_bound=1e99):

action = accept_action

110

if entry.filter:

111

entry.filter.accept(visitor)

112

action = entry.filter

113

if entry.numbers[1] - entry.numbers[0] == 1:

114

# Single syscall.

115

# Accept if |X == nr|.

116

return bpf.SyscallEntry(

117

entry.numbers[0], action, reject_action, op=bpf.BPF_JEQ)

118

elif entry.numbers[0] == lower_bound:

119

# Syscall range aligned with the lower bound.

120

# Accept if |X < nr[1]|.

121

return bpf.SyscallEntry(

122

entry.numbers[1], reject_action, action, op=bpf.BPF_JGE)

123

elif entry.numbers[1] == upper_bound:

124

# Syscall range aligned with the upper bound.

125

# Accept if |X >= nr[0]|.

126

return bpf.SyscallEntry(

127

entry.numbers[0], action, reject_action, op=bpf.BPF_JGE)

128

# Syscall range in the middle.

129

# Accept if |nr[0] <= X < nr[1]|.

130

upper_entry = bpf.SyscallEntry(

131

entry.numbers[1], reject_action, action, op=bpf.BPF_JGE)

132

return bpf.SyscallEntry(

133

entry.numbers[0], upper_entry, reject_action, op=bpf.BPF_JGE)

134

135

Luis Hector Chavez

2018-12-05 16:54:30 -0800

[diff] [blame]

136

def _compile_entries_linear(entries, accept_action, reject_action, visitor):

137

# Compiles the list of entries into a simple linear list of comparisons. In

138

# order to make the generated code a bit more efficient, we sort the

139

# entries by frequency, so that the most frequently-called syscalls appear

140

# earlier in the chain.

141

next_action = reject_action

Luis Hector Chavez

2018-11-01 16:50:36 -0700

[diff] [blame^]

142

ranges = sorted(_convert_to_ranges(entries), key=lambda r: -r.frequency)

143

for entry in ranges[::-1]:

144

next_action = _compile_single_range(entry, accept_action, next_action,

145

visitor)

Luis Hector Chavez

2018-12-05 16:54:30 -0800

[diff] [blame]

return next_action

def _compile_entries_bst(entries, accept_action, reject_action, visitor):

150

# Instead of generating a linear list of comparisons, this method generates

151

# a binary search tree.

152

#

153

# Even though we are going to perform a binary search over the syscall

154

# number, we would still like to rotate some of the internal nodes of the

155

# binary search tree so that more frequently-used syscalls can be accessed

156

# more cheaply (i.e. fewer internal nodes need to be traversed to reach

157

# them).

158

#

159

# The overall idea then is to, at any step, instead of naively partitioning

160

# the list of syscalls by the midpoint of the interval, we choose a

161

# midpoint that minimizes the difference of the sum of all frequencies

162

# between the left and right subtrees. For that, we need to sort the

163

# entries by syscall number and keep track of the accumulated frequency of

164

# all entries prior to the current one so that we can compue the midpoint

165

# efficiently.

166

#

167

# TODO(lhchavez): There is one further possible optimization, which is to

168

# hoist any syscalls that are more frequent than all other syscalls in the

169

# BST combined into a linear chain before entering the BST.

Luis Hector Chavez

2018-11-01 16:50:36 -0700

[diff] [blame^]

170

ranges = list(_convert_to_ranges(entries))

171

Luis Hector Chavez

2018-12-05 16:54:30 -0800

[diff] [blame]

172

accumulated = 0

Luis Hector Chavez

2018-11-01 16:50:36 -0700

[diff] [blame^]

173

for entry in ranges:

Luis Hector Chavez

2018-12-05 16:54:30 -0800

[diff] [blame]

174

accumulated += entry.frequency

175

entry.accumulated = accumulated

176

177

# Recursively create the internal nodes.

Luis Hector Chavez

2018-11-01 16:50:36 -0700

[diff] [blame^]

178

def _generate_syscall_bst(ranges, lower_bound=0, upper_bound=2**64 - 1):

179

assert ranges

180

if len(ranges) == 1:

181

# This is a single syscall entry range, but the interval we are

182

# currently considering contains other syscalls that we want to

183

# reject. So instead of an internal node, create one or more leaf

184

# nodes that check the range.

Luis Hector Chavez

2018-12-05 16:54:30 -0800

[diff] [blame]

185

assert lower_bound < upper_bound

Luis Hector Chavez

2018-11-01 16:50:36 -0700

[diff] [blame^]

186

return _compile_single_range(ranges[0], accept_action,

187

reject_action, visitor, lower_bound,

188

upper_bound)

Luis Hector Chavez

2018-12-05 16:54:30 -0800

[diff] [blame]

189

190

# Find the midpoint that minimizes the difference between accumulated

191

# costs in the left and right subtrees.

Luis Hector Chavez

2018-11-01 16:50:36 -0700

[diff] [blame^]

192

previous_accumulated = ranges[0].accumulated - ranges[0].frequency

193

last_accumulated = ranges[-1].accumulated - previous_accumulated

Luis Hector Chavez

2018-12-05 16:54:30 -0800

[diff] [blame]

194

best = (1e99, -1)

Luis Hector Chavez

2018-11-01 16:50:36 -0700

[diff] [blame^]

195

for i, entry in enumerate(ranges):

Luis Hector Chavez

2018-12-05 16:54:30 -0800

[diff] [blame]

196

if not i:

197

continue

198

left_accumulated = entry.accumulated - previous_accumulated

199

right_accumulated = last_accumulated - left_accumulated

200

best = min(best, (abs(left_accumulated - right_accumulated), i))

201

midpoint = best[1]

202

assert midpoint >= 1, best

203

Luis Hector Chavez

2018-11-01 16:50:36 -0700

[diff] [blame^]

204

cutoff_bound = ranges[midpoint].numbers[0]

205

Luis Hector Chavez

2018-12-05 16:54:30 -0800

[diff] [blame]

206

# Now we build the right and left subtrees independently. If any of the

207

# subtrees consist of a single entry _and_ the bounds are tight around

208

# that entry (that is, the bounds contain _only_ the syscall we are

209

# going to consider), we can avoid emitting a leaf node and instead

210

# have the comparison jump directly into the action that would be taken

211

# by the entry.

Luis Hector Chavez

2018-11-01 16:50:36 -0700

[diff] [blame^]

212

if (cutoff_bound, upper_bound) == ranges[midpoint].numbers:

213

if ranges[midpoint].filter:

214

ranges[midpoint].filter.accept(visitor)

215

right_subtree = ranges[midpoint].filter

Luis Hector Chavez

2018-12-05 16:54:30 -0800

[diff] [blame]

216

else:

217

right_subtree = accept_action

218

else:

Luis Hector Chavez

2018-11-01 16:50:36 -0700

[diff] [blame^]

219

right_subtree = _generate_syscall_bst(ranges[midpoint:],

220

cutoff_bound, upper_bound)

Luis Hector Chavez

2018-12-05 16:54:30 -0800

[diff] [blame]

221

Luis Hector Chavez

2018-11-01 16:50:36 -0700

[diff] [blame^]

222

if (lower_bound, cutoff_bound) == ranges[midpoint - 1].numbers:

223

if ranges[midpoint - 1].filter:

224

ranges[midpoint - 1].filter.accept(visitor)

225

left_subtree = ranges[midpoint - 1].filter

Luis Hector Chavez

2018-12-05 16:54:30 -0800

[diff] [blame]

226

else:

227

left_subtree = accept_action

228

else:

Luis Hector Chavez

2018-11-01 16:50:36 -0700

[diff] [blame^]

229

left_subtree = _generate_syscall_bst(ranges[:midpoint],

230

lower_bound, cutoff_bound)

Luis Hector Chavez

2018-12-05 16:54:30 -0800

[diff] [blame]

231

232

# Finally, now that both subtrees have been generated, we can create

233

# the internal node of the binary search tree.

234

return bpf.SyscallEntry(

Luis Hector Chavez

2018-11-01 16:50:36 -0700

[diff] [blame^]

235

cutoff_bound, right_subtree, left_subtree, op=bpf.BPF_JGE)

Luis Hector Chavez

2018-12-05 16:54:30 -0800

[diff] [blame]

236

Luis Hector Chavez

2018-11-01 16:50:36 -0700

[diff] [blame^]

237

return _generate_syscall_bst(ranges)

Luis Hector Chavez

2018-12-05 16:54:30 -0800

[diff] [blame]

238

239

Luis Hector Chavez

2018-10-28 21:40:10 -0700

[diff] [blame]

240

class PolicyCompiler:

241

"""A parser for the Minijail seccomp policy file format."""

242

243

def __init__(self, arch):

244

self._arch = arch

245

Luis Hector Chavez

2018-12-05 16:54:30 -0800

[diff] [blame]

246

def compile_file(self,

247

policy_filename,

248

*,

249

optimization_strategy,

250

kill_action,

251

include_depth_limit=10):

252

"""Return a compiled BPF program from the provided policy file."""

253

policy_parser = parser.PolicyParser(

254

self._arch,

255

kill_action=kill_action,

256

include_depth_limit=include_depth_limit)

257

parsed_policy = policy_parser.parse_file(policy_filename)

258

entries = [

259

self.compile_filter_statement(

260

filter_statement, kill_action=kill_action)

261

for filter_statement in parsed_policy.filter_statements

262

]

263

264

visitor = bpf.FlatteningVisitor(

265

arch=self._arch, kill_action=kill_action)

266

accept_action = bpf.Allow()

267

reject_action = parsed_policy.default_action

268

if entries:

269

if optimization_strategy == OptimizationStrategy.BST:

270

next_action = _compile_entries_bst(entries, accept_action,

271

reject_action, visitor)

272

else:

273

next_action = _compile_entries_linear(entries, accept_action,

274

reject_action, visitor)

275

reject_action.accept(visitor)

276

accept_action.accept(visitor)

277

bpf.ValidateArch(next_action).accept(visitor)

278

else:

279

reject_action.accept(visitor)

280

bpf.ValidateArch(reject_action).accept(visitor)

281

return visitor.result

282

Luis Hector Chavez