blob: 610491b5f0aa1db52d51d2153fae6a1fa91792b4 [file] [log] [blame]
Daniel Mack30070982016-11-23 16:52:26 +01001/*
2 * Functions to manage eBPF programs attached to cgroups
3 *
4 * Copyright (c) 2016 Daniel Mack
5 *
6 * This file is subject to the terms and conditions of version 2 of the GNU
7 * General Public License. See the file COPYING in the main directory of the
8 * Linux distribution for more details.
9 */
10
11#include <linux/kernel.h>
12#include <linux/atomic.h>
13#include <linux/cgroup.h>
Andrey Ignatov7b146ce2019-02-27 12:59:24 -080014#include <linux/filter.h>
Daniel Mack30070982016-11-23 16:52:26 +010015#include <linux/slab.h>
Andrey Ignatov7b146ce2019-02-27 12:59:24 -080016#include <linux/sysctl.h>
Daniel Mack30070982016-11-23 16:52:26 +010017#include <linux/bpf.h>
18#include <linux/bpf-cgroup.h>
19#include <net/sock.h>
20
21DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
22EXPORT_SYMBOL(cgroup_bpf_enabled_key);
23
24/**
25 * cgroup_bpf_put() - put references of all bpf programs
26 * @cgrp: the cgroup to modify
27 */
28void cgroup_bpf_put(struct cgroup *cgrp)
29{
Roman Gushchin8bad74f2018-09-28 14:45:36 +000030 enum bpf_cgroup_storage_type stype;
Daniel Mack30070982016-11-23 16:52:26 +010031 unsigned int type;
32
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -070033 for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
34 struct list_head *progs = &cgrp->bpf.progs[type];
35 struct bpf_prog_list *pl, *tmp;
Daniel Mack30070982016-11-23 16:52:26 +010036
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -070037 list_for_each_entry_safe(pl, tmp, progs, node) {
38 list_del(&pl->node);
39 bpf_prog_put(pl->prog);
Roman Gushchin8bad74f2018-09-28 14:45:36 +000040 for_each_cgroup_storage_type(stype) {
41 bpf_cgroup_storage_unlink(pl->storage[stype]);
42 bpf_cgroup_storage_free(pl->storage[stype]);
43 }
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -070044 kfree(pl);
Daniel Mack30070982016-11-23 16:52:26 +010045 static_branch_dec(&cgroup_bpf_enabled_key);
46 }
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -070047 bpf_prog_array_free(cgrp->bpf.effective[type]);
Daniel Mack30070982016-11-23 16:52:26 +010048 }
49}
50
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -070051/* count number of elements in the list.
52 * it's slow but the list cannot be long
53 */
54static u32 prog_list_length(struct list_head *head)
55{
56 struct bpf_prog_list *pl;
57 u32 cnt = 0;
58
59 list_for_each_entry(pl, head, node) {
60 if (!pl->prog)
61 continue;
62 cnt++;
63 }
64 return cnt;
65}
66
67/* if parent has non-overridable prog attached,
68 * disallow attaching new programs to the descendent cgroup.
69 * if parent has overridable or multi-prog, allow attaching
70 */
71static bool hierarchy_allows_attach(struct cgroup *cgrp,
72 enum bpf_attach_type type,
73 u32 new_flags)
74{
75 struct cgroup *p;
76
77 p = cgroup_parent(cgrp);
78 if (!p)
79 return true;
80 do {
81 u32 flags = p->bpf.flags[type];
82 u32 cnt;
83
84 if (flags & BPF_F_ALLOW_MULTI)
85 return true;
86 cnt = prog_list_length(&p->bpf.progs[type]);
87 WARN_ON_ONCE(cnt > 1);
88 if (cnt == 1)
89 return !!(flags & BPF_F_ALLOW_OVERRIDE);
90 p = cgroup_parent(p);
91 } while (p);
92 return true;
93}
94
95/* compute a chain of effective programs for a given cgroup:
96 * start from the list of programs in this cgroup and add
97 * all parent programs.
98 * Note that parent's F_ALLOW_OVERRIDE-type program is yielding
99 * to programs in this cgroup
100 */
101static int compute_effective_progs(struct cgroup *cgrp,
102 enum bpf_attach_type type,
103 struct bpf_prog_array __rcu **array)
104{
Roman Gushchin8bad74f2018-09-28 14:45:36 +0000105 enum bpf_cgroup_storage_type stype;
Roman Gushchin3960f4f2018-07-13 12:41:11 -0700106 struct bpf_prog_array *progs;
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700107 struct bpf_prog_list *pl;
108 struct cgroup *p = cgrp;
109 int cnt = 0;
110
111 /* count number of effective programs by walking parents */
112 do {
113 if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
114 cnt += prog_list_length(&p->bpf.progs[type]);
115 p = cgroup_parent(p);
116 } while (p);
117
118 progs = bpf_prog_array_alloc(cnt, GFP_KERNEL);
119 if (!progs)
120 return -ENOMEM;
121
122 /* populate the array with effective progs */
123 cnt = 0;
124 p = cgrp;
125 do {
Roman Gushchin394e40a2018-08-02 14:27:21 -0700126 if (cnt > 0 && !(p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
127 continue;
128
129 list_for_each_entry(pl, &p->bpf.progs[type], node) {
130 if (!pl->prog)
131 continue;
132
133 progs->items[cnt].prog = pl->prog;
Roman Gushchin8bad74f2018-09-28 14:45:36 +0000134 for_each_cgroup_storage_type(stype)
135 progs->items[cnt].cgroup_storage[stype] =
136 pl->storage[stype];
Roman Gushchin394e40a2018-08-02 14:27:21 -0700137 cnt++;
138 }
139 } while ((p = cgroup_parent(p)));
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700140
Roman Gushchin3960f4f2018-07-13 12:41:11 -0700141 rcu_assign_pointer(*array, progs);
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700142 return 0;
143}
144
145static void activate_effective_progs(struct cgroup *cgrp,
146 enum bpf_attach_type type,
147 struct bpf_prog_array __rcu *array)
148{
149 struct bpf_prog_array __rcu *old_array;
150
151 old_array = xchg(&cgrp->bpf.effective[type], array);
152 /* free prog array after grace period, since __cgroup_bpf_run_*()
153 * might be still walking the array
154 */
155 bpf_prog_array_free(old_array);
156}
157
Daniel Mack30070982016-11-23 16:52:26 +0100158/**
159 * cgroup_bpf_inherit() - inherit effective programs from parent
160 * @cgrp: the cgroup to modify
Daniel Mack30070982016-11-23 16:52:26 +0100161 */
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700162int cgroup_bpf_inherit(struct cgroup *cgrp)
Daniel Mack30070982016-11-23 16:52:26 +0100163{
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700164/* has to use marco instead of const int, since compiler thinks
165 * that array below is variable length
166 */
167#define NR ARRAY_SIZE(cgrp->bpf.effective)
168 struct bpf_prog_array __rcu *arrays[NR] = {};
169 int i;
Daniel Mack30070982016-11-23 16:52:26 +0100170
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700171 for (i = 0; i < NR; i++)
172 INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
Daniel Mack30070982016-11-23 16:52:26 +0100173
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700174 for (i = 0; i < NR; i++)
175 if (compute_effective_progs(cgrp, i, &arrays[i]))
176 goto cleanup;
177
178 for (i = 0; i < NR; i++)
179 activate_effective_progs(cgrp, i, arrays[i]);
180
181 return 0;
182cleanup:
183 for (i = 0; i < NR; i++)
184 bpf_prog_array_free(arrays[i]);
185 return -ENOMEM;
Daniel Mack30070982016-11-23 16:52:26 +0100186}
187
Roman Gushchin85fc4b12018-08-06 14:27:28 -0700188static int update_effective_progs(struct cgroup *cgrp,
189 enum bpf_attach_type type)
190{
191 struct cgroup_subsys_state *css;
192 int err;
193
194 /* allocate and recompute effective prog arrays */
195 css_for_each_descendant_pre(css, &cgrp->self) {
196 struct cgroup *desc = container_of(css, struct cgroup, self);
197
198 err = compute_effective_progs(desc, type, &desc->bpf.inactive);
199 if (err)
200 goto cleanup;
201 }
202
203 /* all allocations were successful. Activate all prog arrays */
204 css_for_each_descendant_pre(css, &cgrp->self) {
205 struct cgroup *desc = container_of(css, struct cgroup, self);
206
207 activate_effective_progs(desc, type, desc->bpf.inactive);
208 desc->bpf.inactive = NULL;
209 }
210
211 return 0;
212
213cleanup:
214 /* oom while computing effective. Free all computed effective arrays
215 * since they were not activated
216 */
217 css_for_each_descendant_pre(css, &cgrp->self) {
218 struct cgroup *desc = container_of(css, struct cgroup, self);
219
220 bpf_prog_array_free(desc->bpf.inactive);
221 desc->bpf.inactive = NULL;
222 }
223
224 return err;
225}
226
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700227#define BPF_CGROUP_MAX_PROGS 64
228
Daniel Mack30070982016-11-23 16:52:26 +0100229/**
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700230 * __cgroup_bpf_attach() - Attach the program to a cgroup, and
Daniel Mack30070982016-11-23 16:52:26 +0100231 * propagate the change to descendants
232 * @cgrp: The cgroup which descendants to traverse
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700233 * @prog: A program to attach
234 * @type: Type of attach operation
Valdis Kletnieks1832f4e2019-01-29 01:47:06 -0500235 * @flags: Option flags
Daniel Mack30070982016-11-23 16:52:26 +0100236 *
237 * Must be called with cgroup_mutex held.
238 */
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700239int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
240 enum bpf_attach_type type, u32 flags)
Daniel Mack30070982016-11-23 16:52:26 +0100241{
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700242 struct list_head *progs = &cgrp->bpf.progs[type];
243 struct bpf_prog *old_prog = NULL;
Roman Gushchin8bad74f2018-09-28 14:45:36 +0000244 struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE],
245 *old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {NULL};
246 enum bpf_cgroup_storage_type stype;
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700247 struct bpf_prog_list *pl;
248 bool pl_was_allocated;
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700249 int err;
Daniel Mack30070982016-11-23 16:52:26 +0100250
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700251 if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI))
252 /* invalid combination */
253 return -EINVAL;
Daniel Mack30070982016-11-23 16:52:26 +0100254
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700255 if (!hierarchy_allows_attach(cgrp, type, flags))
256 return -EPERM;
257
258 if (!list_empty(progs) && cgrp->bpf.flags[type] != flags)
259 /* Disallow attaching non-overridable on top
260 * of existing overridable in this cgroup.
261 * Disallow attaching multi-prog if overridable or none
Alexei Starovoitov7f677632017-02-10 20:28:24 -0800262 */
263 return -EPERM;
264
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700265 if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
266 return -E2BIG;
Alexei Starovoitov7f677632017-02-10 20:28:24 -0800267
Roman Gushchin8bad74f2018-09-28 14:45:36 +0000268 for_each_cgroup_storage_type(stype) {
269 storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
270 if (IS_ERR(storage[stype])) {
271 storage[stype] = NULL;
272 for_each_cgroup_storage_type(stype)
273 bpf_cgroup_storage_free(storage[stype]);
274 return -ENOMEM;
275 }
276 }
Roman Gushchind7bf2c12018-08-02 14:27:20 -0700277
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700278 if (flags & BPF_F_ALLOW_MULTI) {
Roman Gushchind7bf2c12018-08-02 14:27:20 -0700279 list_for_each_entry(pl, progs, node) {
280 if (pl->prog == prog) {
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700281 /* disallow attaching the same prog twice */
Roman Gushchin8bad74f2018-09-28 14:45:36 +0000282 for_each_cgroup_storage_type(stype)
283 bpf_cgroup_storage_free(storage[stype]);
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700284 return -EINVAL;
Roman Gushchind7bf2c12018-08-02 14:27:20 -0700285 }
286 }
Alexei Starovoitov7f677632017-02-10 20:28:24 -0800287
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700288 pl = kmalloc(sizeof(*pl), GFP_KERNEL);
Roman Gushchind7bf2c12018-08-02 14:27:20 -0700289 if (!pl) {
Roman Gushchin8bad74f2018-09-28 14:45:36 +0000290 for_each_cgroup_storage_type(stype)
291 bpf_cgroup_storage_free(storage[stype]);
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700292 return -ENOMEM;
Roman Gushchind7bf2c12018-08-02 14:27:20 -0700293 }
294
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700295 pl_was_allocated = true;
296 pl->prog = prog;
Roman Gushchin8bad74f2018-09-28 14:45:36 +0000297 for_each_cgroup_storage_type(stype)
298 pl->storage[stype] = storage[stype];
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700299 list_add_tail(&pl->node, progs);
300 } else {
301 if (list_empty(progs)) {
302 pl = kmalloc(sizeof(*pl), GFP_KERNEL);
Roman Gushchind7bf2c12018-08-02 14:27:20 -0700303 if (!pl) {
Roman Gushchin8bad74f2018-09-28 14:45:36 +0000304 for_each_cgroup_storage_type(stype)
305 bpf_cgroup_storage_free(storage[stype]);
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700306 return -ENOMEM;
Roman Gushchind7bf2c12018-08-02 14:27:20 -0700307 }
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700308 pl_was_allocated = true;
309 list_add_tail(&pl->node, progs);
Alexei Starovoitov7f677632017-02-10 20:28:24 -0800310 } else {
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700311 pl = list_first_entry(progs, typeof(*pl), node);
312 old_prog = pl->prog;
Roman Gushchin8bad74f2018-09-28 14:45:36 +0000313 for_each_cgroup_storage_type(stype) {
314 old_storage[stype] = pl->storage[stype];
315 bpf_cgroup_storage_unlink(old_storage[stype]);
316 }
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700317 pl_was_allocated = false;
Alexei Starovoitov7f677632017-02-10 20:28:24 -0800318 }
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700319 pl->prog = prog;
Roman Gushchin8bad74f2018-09-28 14:45:36 +0000320 for_each_cgroup_storage_type(stype)
321 pl->storage[stype] = storage[stype];
Daniel Mack30070982016-11-23 16:52:26 +0100322 }
323
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700324 cgrp->bpf.flags[type] = flags;
Daniel Mack30070982016-11-23 16:52:26 +0100325
Roman Gushchin85fc4b12018-08-06 14:27:28 -0700326 err = update_effective_progs(cgrp, type);
327 if (err)
328 goto cleanup;
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700329
330 static_branch_inc(&cgroup_bpf_enabled_key);
Roman Gushchin8bad74f2018-09-28 14:45:36 +0000331 for_each_cgroup_storage_type(stype) {
332 if (!old_storage[stype])
333 continue;
334 bpf_cgroup_storage_free(old_storage[stype]);
335 }
Daniel Mack30070982016-11-23 16:52:26 +0100336 if (old_prog) {
337 bpf_prog_put(old_prog);
338 static_branch_dec(&cgroup_bpf_enabled_key);
339 }
Roman Gushchin8bad74f2018-09-28 14:45:36 +0000340 for_each_cgroup_storage_type(stype)
341 bpf_cgroup_storage_link(storage[stype], cgrp, type);
Alexei Starovoitov7f677632017-02-10 20:28:24 -0800342 return 0;
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700343
344cleanup:
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700345 /* and cleanup the prog list */
346 pl->prog = old_prog;
Roman Gushchin8bad74f2018-09-28 14:45:36 +0000347 for_each_cgroup_storage_type(stype) {
348 bpf_cgroup_storage_free(pl->storage[stype]);
349 pl->storage[stype] = old_storage[stype];
350 bpf_cgroup_storage_link(old_storage[stype], cgrp, type);
351 }
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700352 if (pl_was_allocated) {
353 list_del(&pl->node);
354 kfree(pl);
355 }
356 return err;
357}
358
359/**
360 * __cgroup_bpf_detach() - Detach the program from a cgroup, and
361 * propagate the change to descendants
362 * @cgrp: The cgroup which descendants to traverse
363 * @prog: A program to detach or NULL
364 * @type: Type of detach operation
365 *
366 * Must be called with cgroup_mutex held.
367 */
368int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
Valdis Kletnieks1832f4e2019-01-29 01:47:06 -0500369 enum bpf_attach_type type)
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700370{
371 struct list_head *progs = &cgrp->bpf.progs[type];
Roman Gushchin8bad74f2018-09-28 14:45:36 +0000372 enum bpf_cgroup_storage_type stype;
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700373 u32 flags = cgrp->bpf.flags[type];
374 struct bpf_prog *old_prog = NULL;
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700375 struct bpf_prog_list *pl;
376 int err;
377
378 if (flags & BPF_F_ALLOW_MULTI) {
379 if (!prog)
380 /* to detach MULTI prog the user has to specify valid FD
381 * of the program to be detached
382 */
383 return -EINVAL;
384 } else {
385 if (list_empty(progs))
386 /* report error when trying to detach and nothing is attached */
387 return -ENOENT;
388 }
389
390 if (flags & BPF_F_ALLOW_MULTI) {
391 /* find the prog and detach it */
392 list_for_each_entry(pl, progs, node) {
393 if (pl->prog != prog)
394 continue;
395 old_prog = prog;
396 /* mark it deleted, so it's ignored while
397 * recomputing effective
398 */
399 pl->prog = NULL;
400 break;
401 }
402 if (!old_prog)
403 return -ENOENT;
404 } else {
405 /* to maintain backward compatibility NONE and OVERRIDE cgroups
406 * allow detaching with invalid FD (prog==NULL)
407 */
408 pl = list_first_entry(progs, typeof(*pl), node);
409 old_prog = pl->prog;
410 pl->prog = NULL;
411 }
412
Roman Gushchin85fc4b12018-08-06 14:27:28 -0700413 err = update_effective_progs(cgrp, type);
414 if (err)
415 goto cleanup;
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700416
417 /* now can actually delete it from this cgroup list */
418 list_del(&pl->node);
Roman Gushchin8bad74f2018-09-28 14:45:36 +0000419 for_each_cgroup_storage_type(stype) {
420 bpf_cgroup_storage_unlink(pl->storage[stype]);
421 bpf_cgroup_storage_free(pl->storage[stype]);
422 }
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700423 kfree(pl);
424 if (list_empty(progs))
425 /* last program was detached, reset flags to zero */
426 cgrp->bpf.flags[type] = 0;
427
428 bpf_prog_put(old_prog);
429 static_branch_dec(&cgroup_bpf_enabled_key);
430 return 0;
431
432cleanup:
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700433 /* and restore back old_prog */
434 pl->prog = old_prog;
435 return err;
Daniel Mack30070982016-11-23 16:52:26 +0100436}
437
Alexei Starovoitov468e2f62017-10-02 22:50:22 -0700438/* Must be called with cgroup_mutex held to avoid races. */
439int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
440 union bpf_attr __user *uattr)
441{
442 __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
443 enum bpf_attach_type type = attr->query.attach_type;
444 struct list_head *progs = &cgrp->bpf.progs[type];
445 u32 flags = cgrp->bpf.flags[type];
446 int cnt, ret = 0, i;
447
448 if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
449 cnt = bpf_prog_array_length(cgrp->bpf.effective[type]);
450 else
451 cnt = prog_list_length(progs);
452
453 if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
454 return -EFAULT;
455 if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt)))
456 return -EFAULT;
457 if (attr->query.prog_cnt == 0 || !prog_ids || !cnt)
458 /* return early if user requested only program count + flags */
459 return 0;
460 if (attr->query.prog_cnt < cnt) {
461 cnt = attr->query.prog_cnt;
462 ret = -ENOSPC;
463 }
464
465 if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
466 return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type],
467 prog_ids, cnt);
468 } else {
469 struct bpf_prog_list *pl;
470 u32 id;
471
472 i = 0;
473 list_for_each_entry(pl, progs, node) {
474 id = pl->prog->aux->id;
475 if (copy_to_user(prog_ids + i, &id, sizeof(id)))
476 return -EFAULT;
477 if (++i == cnt)
478 break;
479 }
480 }
481 return ret;
482}
483
Sean Youngfdb5c452018-06-19 00:04:24 +0100484int cgroup_bpf_prog_attach(const union bpf_attr *attr,
485 enum bpf_prog_type ptype, struct bpf_prog *prog)
486{
487 struct cgroup *cgrp;
488 int ret;
489
490 cgrp = cgroup_get_from_fd(attr->target_fd);
491 if (IS_ERR(cgrp))
492 return PTR_ERR(cgrp);
493
494 ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
495 attr->attach_flags);
496 cgroup_put(cgrp);
497 return ret;
498}
499
500int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
501{
502 struct bpf_prog *prog;
503 struct cgroup *cgrp;
504 int ret;
505
506 cgrp = cgroup_get_from_fd(attr->target_fd);
507 if (IS_ERR(cgrp))
508 return PTR_ERR(cgrp);
509
510 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
511 if (IS_ERR(prog))
512 prog = NULL;
513
514 ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
515 if (prog)
516 bpf_prog_put(prog);
517
518 cgroup_put(cgrp);
519 return ret;
520}
521
522int cgroup_bpf_prog_query(const union bpf_attr *attr,
523 union bpf_attr __user *uattr)
524{
525 struct cgroup *cgrp;
526 int ret;
527
528 cgrp = cgroup_get_from_fd(attr->query.target_fd);
529 if (IS_ERR(cgrp))
530 return PTR_ERR(cgrp);
531
532 ret = cgroup_bpf_query(cgrp, attr, uattr);
533
534 cgroup_put(cgrp);
535 return ret;
536}
537
Daniel Mack30070982016-11-23 16:52:26 +0100538/**
David Ahernb2cd1252016-12-01 08:48:03 -0800539 * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
Willem de Bruijn8f917bb2017-04-11 14:08:08 -0400540 * @sk: The socket sending or receiving traffic
Daniel Mack30070982016-11-23 16:52:26 +0100541 * @skb: The skb that is being sent or received
542 * @type: The type of program to be exectuted
543 *
544 * If no socket is passed, or the socket is not of type INET or INET6,
545 * this function does nothing and returns 0.
546 *
547 * The program type passed in via @type must be suitable for network
548 * filtering. No further check is performed to assert that.
549 *
550 * This function will return %-EPERM if any if an attached program was found
551 * and if it returned != 1 during execution. In all other cases, 0 is returned.
552 */
David Ahernb2cd1252016-12-01 08:48:03 -0800553int __cgroup_bpf_run_filter_skb(struct sock *sk,
554 struct sk_buff *skb,
555 enum bpf_attach_type type)
Daniel Mack30070982016-11-23 16:52:26 +0100556{
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700557 unsigned int offset = skb->data - skb_network_header(skb);
558 struct sock *save_sk;
Song Liub39b5f42018-10-19 09:57:57 -0700559 void *saved_data_end;
Daniel Mack30070982016-11-23 16:52:26 +0100560 struct cgroup *cgrp;
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700561 int ret;
Daniel Mack30070982016-11-23 16:52:26 +0100562
563 if (!sk || !sk_fullsock(sk))
564 return 0;
565
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700566 if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
Daniel Mack30070982016-11-23 16:52:26 +0100567 return 0;
568
569 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700570 save_sk = skb->sk;
571 skb->sk = sk;
572 __skb_push(skb, offset);
Song Liub39b5f42018-10-19 09:57:57 -0700573
574 /* compute pointers for the bpf prog */
575 bpf_compute_and_save_data_end(skb, &saved_data_end);
576
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700577 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
Alexei Starovoitov6cab5e92019-01-28 18:43:34 -0800578 __bpf_prog_run_save_cb);
Song Liub39b5f42018-10-19 09:57:57 -0700579 bpf_restore_data_end(skb, saved_data_end);
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700580 __skb_pull(skb, offset);
581 skb->sk = save_sk;
582 return ret == 1 ? 0 : -EPERM;
Daniel Mack30070982016-11-23 16:52:26 +0100583}
David Ahernb2cd1252016-12-01 08:48:03 -0800584EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
David Ahern610236582016-12-01 08:48:04 -0800585
586/**
587 * __cgroup_bpf_run_filter_sk() - Run a program on a sock
588 * @sk: sock structure to manipulate
589 * @type: The type of program to be exectuted
590 *
591 * socket is passed is expected to be of type INET or INET6.
592 *
593 * The program type passed in via @type must be suitable for sock
594 * filtering. No further check is performed to assert that.
595 *
596 * This function will return %-EPERM if any if an attached program was found
597 * and if it returned != 1 during execution. In all other cases, 0 is returned.
598 */
599int __cgroup_bpf_run_filter_sk(struct sock *sk,
600 enum bpf_attach_type type)
601{
602 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700603 int ret;
David Ahern610236582016-12-01 08:48:04 -0800604
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700605 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sk, BPF_PROG_RUN);
606 return ret == 1 ? 0 : -EPERM;
David Ahern610236582016-12-01 08:48:04 -0800607}
608EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
Lawrence Brakmo40304b22017-06-30 20:02:40 -0700609
610/**
Andrey Ignatov4fbac772018-03-30 15:08:02 -0700611 * __cgroup_bpf_run_filter_sock_addr() - Run a program on a sock and
612 * provided by user sockaddr
613 * @sk: sock struct that will use sockaddr
614 * @uaddr: sockaddr struct provided by user
615 * @type: The type of program to be exectuted
Andrey Ignatov1cedee12018-05-25 08:55:23 -0700616 * @t_ctx: Pointer to attach type specific context
Andrey Ignatov4fbac772018-03-30 15:08:02 -0700617 *
618 * socket is expected to be of type INET or INET6.
619 *
620 * This function will return %-EPERM if an attached program is found and
621 * returned value != 1 during execution. In all other cases, 0 is returned.
622 */
623int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
624 struct sockaddr *uaddr,
Andrey Ignatov1cedee12018-05-25 08:55:23 -0700625 enum bpf_attach_type type,
626 void *t_ctx)
Andrey Ignatov4fbac772018-03-30 15:08:02 -0700627{
628 struct bpf_sock_addr_kern ctx = {
629 .sk = sk,
630 .uaddr = uaddr,
Andrey Ignatov1cedee12018-05-25 08:55:23 -0700631 .t_ctx = t_ctx,
Andrey Ignatov4fbac772018-03-30 15:08:02 -0700632 };
Andrey Ignatov1cedee12018-05-25 08:55:23 -0700633 struct sockaddr_storage unspec;
Andrey Ignatov4fbac772018-03-30 15:08:02 -0700634 struct cgroup *cgrp;
635 int ret;
636
637 /* Check socket family since not all sockets represent network
638 * endpoint (e.g. AF_UNIX).
639 */
640 if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
641 return 0;
642
Andrey Ignatov1cedee12018-05-25 08:55:23 -0700643 if (!ctx.uaddr) {
644 memset(&unspec, 0, sizeof(unspec));
645 ctx.uaddr = (struct sockaddr *)&unspec;
646 }
647
Andrey Ignatov4fbac772018-03-30 15:08:02 -0700648 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
649 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
650
651 return ret == 1 ? 0 : -EPERM;
652}
653EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
654
655/**
Lawrence Brakmo40304b22017-06-30 20:02:40 -0700656 * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
657 * @sk: socket to get cgroup from
658 * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
659 * sk with connection information (IP addresses, etc.) May not contain
660 * cgroup info if it is a req sock.
661 * @type: The type of program to be exectuted
662 *
663 * socket passed is expected to be of type INET or INET6.
664 *
665 * The program type passed in via @type must be suitable for sock_ops
666 * filtering. No further check is performed to assert that.
667 *
668 * This function will return %-EPERM if any if an attached program was found
669 * and if it returned != 1 during execution. In all other cases, 0 is returned.
670 */
671int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
672 struct bpf_sock_ops_kern *sock_ops,
673 enum bpf_attach_type type)
674{
675 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700676 int ret;
Lawrence Brakmo40304b22017-06-30 20:02:40 -0700677
Alexei Starovoitov324bda9e62017-10-02 22:50:21 -0700678 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sock_ops,
679 BPF_PROG_RUN);
680 return ret == 1 ? 0 : -EPERM;
Lawrence Brakmo40304b22017-06-30 20:02:40 -0700681}
682EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
Roman Gushchinebc614f2017-11-05 08:15:32 -0500683
684int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
685 short access, enum bpf_attach_type type)
686{
687 struct cgroup *cgrp;
688 struct bpf_cgroup_dev_ctx ctx = {
689 .access_type = (access << 16) | dev_type,
690 .major = major,
691 .minor = minor,
692 };
693 int allow = 1;
694
695 rcu_read_lock();
696 cgrp = task_dfl_cgroup(current);
697 allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx,
698 BPF_PROG_RUN);
699 rcu_read_unlock();
700
701 return !allow;
702}
703EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);
704
705static const struct bpf_func_proto *
Andrey Ignatovb1cd6092019-03-12 09:27:09 -0700706cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
Roman Gushchinebc614f2017-11-05 08:15:32 -0500707{
708 switch (func_id) {
709 case BPF_FUNC_map_lookup_elem:
710 return &bpf_map_lookup_elem_proto;
711 case BPF_FUNC_map_update_elem:
712 return &bpf_map_update_elem_proto;
713 case BPF_FUNC_map_delete_elem:
714 return &bpf_map_delete_elem_proto;
715 case BPF_FUNC_get_current_uid_gid:
716 return &bpf_get_current_uid_gid_proto;
Roman Gushchincd339432018-08-02 14:27:24 -0700717 case BPF_FUNC_get_local_storage:
718 return &bpf_get_local_storage_proto;
Yonghong Song5bf7a602018-09-27 14:37:30 -0700719 case BPF_FUNC_get_current_cgroup_id:
720 return &bpf_get_current_cgroup_id_proto;
Roman Gushchinebc614f2017-11-05 08:15:32 -0500721 case BPF_FUNC_trace_printk:
722 if (capable(CAP_SYS_ADMIN))
723 return bpf_get_trace_printk_proto();
Mathieu Malaterrec8dc7982019-01-16 20:35:29 +0100724 /* fall through */
Roman Gushchinebc614f2017-11-05 08:15:32 -0500725 default:
726 return NULL;
727 }
728}
729
Andrey Ignatovb1cd6092019-03-12 09:27:09 -0700730static const struct bpf_func_proto *
731cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
732{
733 return cgroup_base_func_proto(func_id, prog);
734}
735
Roman Gushchinebc614f2017-11-05 08:15:32 -0500736static bool cgroup_dev_is_valid_access(int off, int size,
737 enum bpf_access_type type,
Andrey Ignatov5e43f892018-03-30 15:08:00 -0700738 const struct bpf_prog *prog,
Roman Gushchinebc614f2017-11-05 08:15:32 -0500739 struct bpf_insn_access_aux *info)
740{
Yonghong Song06ef0cc2017-12-18 10:13:44 -0800741 const int size_default = sizeof(__u32);
742
Roman Gushchinebc614f2017-11-05 08:15:32 -0500743 if (type == BPF_WRITE)
744 return false;
745
746 if (off < 0 || off + size > sizeof(struct bpf_cgroup_dev_ctx))
747 return false;
748 /* The verifier guarantees that size > 0. */
749 if (off % size != 0)
750 return false;
Yonghong Song06ef0cc2017-12-18 10:13:44 -0800751
752 switch (off) {
753 case bpf_ctx_range(struct bpf_cgroup_dev_ctx, access_type):
754 bpf_ctx_record_field_size(info, size_default);
755 if (!bpf_ctx_narrow_access_ok(off, size, size_default))
756 return false;
757 break;
758 default:
759 if (size != size_default)
760 return false;
761 }
Roman Gushchinebc614f2017-11-05 08:15:32 -0500762
763 return true;
764}
765
766const struct bpf_prog_ops cg_dev_prog_ops = {
767};
768
769const struct bpf_verifier_ops cg_dev_verifier_ops = {
770 .get_func_proto = cgroup_dev_func_proto,
771 .is_valid_access = cgroup_dev_is_valid_access,
772};
Andrey Ignatov7b146ce2019-02-27 12:59:24 -0800773
774/**
775 * __cgroup_bpf_run_filter_sysctl - Run a program on sysctl
776 *
777 * @head: sysctl table header
778 * @table: sysctl table
779 * @write: sysctl is being read (= 0) or written (= 1)
780 * @type: type of program to be executed
781 *
782 * Program is run when sysctl is being accessed, either read or written, and
783 * can allow or deny such access.
784 *
785 * This function will return %-EPERM if an attached program is found and
786 * returned value != 1 during execution. In all other cases 0 is returned.
787 */
788int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
789 struct ctl_table *table, int write,
790 enum bpf_attach_type type)
791{
792 struct bpf_sysctl_kern ctx = {
793 .head = head,
794 .table = table,
795 .write = write,
796 };
797 struct cgroup *cgrp;
798 int ret;
799
800 rcu_read_lock();
801 cgrp = task_dfl_cgroup(current);
802 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
803 rcu_read_unlock();
804
805 return ret == 1 ? 0 : -EPERM;
806}
807EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl);
808
809static const struct bpf_func_proto *
810sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
811{
812 return cgroup_base_func_proto(func_id, prog);
813}
814
815static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type,
816 const struct bpf_prog *prog,
817 struct bpf_insn_access_aux *info)
818{
819 const int size_default = sizeof(__u32);
820
821 if (off < 0 || off + size > sizeof(struct bpf_sysctl) ||
822 off % size || type != BPF_READ)
823 return false;
824
825 switch (off) {
826 case offsetof(struct bpf_sysctl, write):
827 bpf_ctx_record_field_size(info, size_default);
828 return bpf_ctx_narrow_access_ok(off, size, size_default);
829 default:
830 return false;
831 }
832}
833
834static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
835 const struct bpf_insn *si,
836 struct bpf_insn *insn_buf,
837 struct bpf_prog *prog, u32 *target_size)
838{
839 struct bpf_insn *insn = insn_buf;
840
841 switch (si->off) {
842 case offsetof(struct bpf_sysctl, write):
843 *insn++ = BPF_LDX_MEM(
844 BPF_SIZE(si->code), si->dst_reg, si->src_reg,
845 bpf_target_off(struct bpf_sysctl_kern, write,
846 FIELD_SIZEOF(struct bpf_sysctl_kern,
847 write),
848 target_size));
849 break;
850 }
851
852 return insn - insn_buf;
853}
854
855const struct bpf_verifier_ops cg_sysctl_verifier_ops = {
856 .get_func_proto = sysctl_func_proto,
857 .is_valid_access = sysctl_is_valid_access,
858 .convert_ctx_access = sysctl_convert_ctx_access,
859};
860
861const struct bpf_prog_ops cg_sysctl_prog_ops = {
862};