Blame - kernel/bpf/cgroup.c - github.com/raspberrypi/raspberrypi-kernel

blob: 935274c86bfedaf56d2b0682277be8ab408aafd9 [file] [log] [blame]

Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	1	/*
				2	* Functions to manage eBPF programs attached to cgroups
				3	*
				4	* Copyright (c) 2016 Daniel Mack
				5	*
				6	* This file is subject to the terms and conditions of version 2 of the GNU
				7	* General Public License. See the file COPYING in the main directory of the
				8	* Linux distribution for more details.
				9	*/
				10
				11	#include <linux/kernel.h>
				12	#include <linux/atomic.h>
				13	#include <linux/cgroup.h>
				14	#include <linux/slab.h>
				15	#include <linux/bpf.h>
				16	#include <linux/bpf-cgroup.h>
				17	#include <net/sock.h>
				18
				19	DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
				20	EXPORT_SYMBOL(cgroup_bpf_enabled_key);
				21
				22	/**
				23	* cgroup_bpf_put() - put references of all bpf programs
				24	* @cgrp: the cgroup to modify
				25	*/
				26	void cgroup_bpf_put(struct cgroup *cgrp)
				27	{
				28	unsigned int type;
				29
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	30	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
				31	struct list_head *progs = &cgrp->bpf.progs[type];
				32	struct bpf_prog_list pl, tmp;
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	33
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	34	list_for_each_entry_safe(pl, tmp, progs, node) {
				35	list_del(&pl->node);
				36	bpf_prog_put(pl->prog);
Roman Gushchin	d7bf2c1	2018-08-02 14:27:20 -0700	[diff] [blame^]	37	bpf_cgroup_storage_unlink(pl->storage);
				38	bpf_cgroup_storage_free(pl->storage);
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	39	kfree(pl);
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	40	static_branch_dec(&cgroup_bpf_enabled_key);
				41	}
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	42	bpf_prog_array_free(cgrp->bpf.effective[type]);
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	43	}
				44	}
				45
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	46	/* count number of elements in the list.
				47	* it's slow but the list cannot be long
				48	*/
				49	static u32 prog_list_length(struct list_head *head)
				50	{
				51	struct bpf_prog_list *pl;
				52	u32 cnt = 0;
				53
				54	list_for_each_entry(pl, head, node) {
				55	if (!pl->prog)
				56	continue;
				57	cnt++;
				58	}
				59	return cnt;
				60	}
				61
				62	/* if parent has non-overridable prog attached,
				63	* disallow attaching new programs to the descendent cgroup.
				64	* if parent has overridable or multi-prog, allow attaching
				65	*/
				66	static bool hierarchy_allows_attach(struct cgroup *cgrp,
				67	enum bpf_attach_type type,
				68	u32 new_flags)
				69	{
				70	struct cgroup *p;
				71
				72	p = cgroup_parent(cgrp);
				73	if (!p)
				74	return true;
				75	do {
				76	u32 flags = p->bpf.flags[type];
				77	u32 cnt;
				78
				79	if (flags & BPF_F_ALLOW_MULTI)
				80	return true;
				81	cnt = prog_list_length(&p->bpf.progs[type]);
				82	WARN_ON_ONCE(cnt > 1);
				83	if (cnt == 1)
				84	return !!(flags & BPF_F_ALLOW_OVERRIDE);
				85	p = cgroup_parent(p);
				86	} while (p);
				87	return true;
				88	}
				89
				90	/* compute a chain of effective programs for a given cgroup:
				91	* start from the list of programs in this cgroup and add
				92	* all parent programs.
				93	* Note that parent's F_ALLOW_OVERRIDE-type program is yielding
				94	* to programs in this cgroup
				95	*/
				96	static int compute_effective_progs(struct cgroup *cgrp,
				97	enum bpf_attach_type type,
				98	struct bpf_prog_array __rcu **array)
				99	{
Roman Gushchin	3960f4f	2018-07-13 12:41:11 -0700	[diff] [blame]	100	struct bpf_prog_array *progs;
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	101	struct bpf_prog_list *pl;
				102	struct cgroup *p = cgrp;
				103	int cnt = 0;
				104
				105	/* count number of effective programs by walking parents */
				106	do {
				107	if (cnt == 0 \|\| (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
				108	cnt += prog_list_length(&p->bpf.progs[type]);
				109	p = cgroup_parent(p);
				110	} while (p);
				111
				112	progs = bpf_prog_array_alloc(cnt, GFP_KERNEL);
				113	if (!progs)
				114	return -ENOMEM;
				115
				116	/* populate the array with effective progs */
				117	cnt = 0;
				118	p = cgrp;
				119	do {
				120	if (cnt == 0 \|\| (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
				121	list_for_each_entry(pl,
				122	&p->bpf.progs[type], node) {
				123	if (!pl->prog)
				124	continue;
Roman Gushchin	3960f4f	2018-07-13 12:41:11 -0700	[diff] [blame]	125	progs->progs[cnt++] = pl->prog;
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	126	}
				127	p = cgroup_parent(p);
				128	} while (p);
				129
Roman Gushchin	3960f4f	2018-07-13 12:41:11 -0700	[diff] [blame]	130	rcu_assign_pointer(*array, progs);
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	131	return 0;
				132	}
				133
				134	static void activate_effective_progs(struct cgroup *cgrp,
				135	enum bpf_attach_type type,
				136	struct bpf_prog_array __rcu *array)
				137	{
				138	struct bpf_prog_array __rcu *old_array;
				139
				140	old_array = xchg(&cgrp->bpf.effective[type], array);
				141	/* free prog array after grace period, since __cgroup_bpf_run_*()
				142	* might be still walking the array
				143	*/
				144	bpf_prog_array_free(old_array);
				145	}
				146
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	147	/**
				148	* cgroup_bpf_inherit() - inherit effective programs from parent
				149	* @cgrp: the cgroup to modify
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	150	*/
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	151	int cgroup_bpf_inherit(struct cgroup *cgrp)
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	152	{
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	153	/* has to use marco instead of const int, since compiler thinks
				154	* that array below is variable length
				155	*/
				156	#define NR ARRAY_SIZE(cgrp->bpf.effective)
				157	struct bpf_prog_array __rcu *arrays[NR] = {};
				158	int i;
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	159
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	160	for (i = 0; i < NR; i++)
				161	INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	162
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	163	for (i = 0; i < NR; i++)
				164	if (compute_effective_progs(cgrp, i, &arrays[i]))
				165	goto cleanup;
				166
				167	for (i = 0; i < NR; i++)
				168	activate_effective_progs(cgrp, i, arrays[i]);
				169
				170	return 0;
				171	cleanup:
				172	for (i = 0; i < NR; i++)
				173	bpf_prog_array_free(arrays[i]);
				174	return -ENOMEM;
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	175	}
				176
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	177	#define BPF_CGROUP_MAX_PROGS 64
				178
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	179	/**
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	180	* __cgroup_bpf_attach() - Attach the program to a cgroup, and
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	181	* propagate the change to descendants
				182	* @cgrp: The cgroup which descendants to traverse
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	183	* @prog: A program to attach
				184	* @type: Type of attach operation
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	185	*
				186	* Must be called with cgroup_mutex held.
				187	*/
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	188	int __cgroup_bpf_attach(struct cgroup cgrp, struct bpf_prog prog,
				189	enum bpf_attach_type type, u32 flags)
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	190	{
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	191	struct list_head *progs = &cgrp->bpf.progs[type];
				192	struct bpf_prog *old_prog = NULL;
Roman Gushchin	d7bf2c1	2018-08-02 14:27:20 -0700	[diff] [blame^]	193	struct bpf_cgroup_storage storage, old_storage = NULL;
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	194	struct cgroup_subsys_state *css;
				195	struct bpf_prog_list *pl;
				196	bool pl_was_allocated;
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	197	int err;
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	198
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	199	if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI))
				200	/* invalid combination */
				201	return -EINVAL;
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	202
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	203	if (!hierarchy_allows_attach(cgrp, type, flags))
				204	return -EPERM;
				205
				206	if (!list_empty(progs) && cgrp->bpf.flags[type] != flags)
				207	/* Disallow attaching non-overridable on top
				208	* of existing overridable in this cgroup.
				209	* Disallow attaching multi-prog if overridable or none
Alexei Starovoitov	7f67763	2017-02-10 20:28:24 -0800	[diff] [blame]	210	*/
				211	return -EPERM;
				212
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	213	if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
				214	return -E2BIG;
Alexei Starovoitov	7f67763	2017-02-10 20:28:24 -0800	[diff] [blame]	215
Roman Gushchin	d7bf2c1	2018-08-02 14:27:20 -0700	[diff] [blame^]	216	storage = bpf_cgroup_storage_alloc(prog);
				217	if (IS_ERR(storage))
				218	return -ENOMEM;
				219
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	220	if (flags & BPF_F_ALLOW_MULTI) {
Roman Gushchin	d7bf2c1	2018-08-02 14:27:20 -0700	[diff] [blame^]	221	list_for_each_entry(pl, progs, node) {
				222	if (pl->prog == prog) {
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	223	/* disallow attaching the same prog twice */
Roman Gushchin	d7bf2c1	2018-08-02 14:27:20 -0700	[diff] [blame^]	224	bpf_cgroup_storage_free(storage);
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	225	return -EINVAL;
Roman Gushchin	d7bf2c1	2018-08-02 14:27:20 -0700	[diff] [blame^]	226	}
				227	}
Alexei Starovoitov	7f67763	2017-02-10 20:28:24 -0800	[diff] [blame]	228
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	229	pl = kmalloc(sizeof(*pl), GFP_KERNEL);
Roman Gushchin	d7bf2c1	2018-08-02 14:27:20 -0700	[diff] [blame^]	230	if (!pl) {
				231	bpf_cgroup_storage_free(storage);
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	232	return -ENOMEM;
Roman Gushchin	d7bf2c1	2018-08-02 14:27:20 -0700	[diff] [blame^]	233	}
				234
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	235	pl_was_allocated = true;
				236	pl->prog = prog;
Roman Gushchin	d7bf2c1	2018-08-02 14:27:20 -0700	[diff] [blame^]	237	pl->storage = storage;
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	238	list_add_tail(&pl->node, progs);
				239	} else {
				240	if (list_empty(progs)) {
				241	pl = kmalloc(sizeof(*pl), GFP_KERNEL);
Roman Gushchin	d7bf2c1	2018-08-02 14:27:20 -0700	[diff] [blame^]	242	if (!pl) {
				243	bpf_cgroup_storage_free(storage);
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	244	return -ENOMEM;
Roman Gushchin	d7bf2c1	2018-08-02 14:27:20 -0700	[diff] [blame^]	245	}
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	246	pl_was_allocated = true;
				247	list_add_tail(&pl->node, progs);
Alexei Starovoitov	7f67763	2017-02-10 20:28:24 -0800	[diff] [blame]	248	} else {
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	249	pl = list_first_entry(progs, typeof(*pl), node);
				250	old_prog = pl->prog;
Roman Gushchin	d7bf2c1	2018-08-02 14:27:20 -0700	[diff] [blame^]	251	old_storage = pl->storage;
				252	bpf_cgroup_storage_unlink(old_storage);
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	253	pl_was_allocated = false;
Alexei Starovoitov	7f67763	2017-02-10 20:28:24 -0800	[diff] [blame]	254	}
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	255	pl->prog = prog;
Roman Gushchin	d7bf2c1	2018-08-02 14:27:20 -0700	[diff] [blame^]	256	pl->storage = storage;
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	257	}
				258
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	259	cgrp->bpf.flags[type] = flags;
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	260
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	261	/* allocate and recompute effective prog arrays */
				262	css_for_each_descendant_pre(css, &cgrp->self) {
				263	struct cgroup *desc = container_of(css, struct cgroup, self);
				264
				265	err = compute_effective_progs(desc, type, &desc->bpf.inactive);
				266	if (err)
				267	goto cleanup;
				268	}
				269
				270	/* all allocations were successful. Activate all prog arrays */
				271	css_for_each_descendant_pre(css, &cgrp->self) {
				272	struct cgroup *desc = container_of(css, struct cgroup, self);
				273
				274	activate_effective_progs(desc, type, desc->bpf.inactive);
				275	desc->bpf.inactive = NULL;
				276	}
				277
				278	static_branch_inc(&cgroup_bpf_enabled_key);
Roman Gushchin	d7bf2c1	2018-08-02 14:27:20 -0700	[diff] [blame^]	279	if (old_storage)
				280	bpf_cgroup_storage_free(old_storage);
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	281	if (old_prog) {
				282	bpf_prog_put(old_prog);
				283	static_branch_dec(&cgroup_bpf_enabled_key);
				284	}
Roman Gushchin	d7bf2c1	2018-08-02 14:27:20 -0700	[diff] [blame^]	285	bpf_cgroup_storage_link(storage, cgrp, type);
Alexei Starovoitov	7f67763	2017-02-10 20:28:24 -0800	[diff] [blame]	286	return 0;
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	287
				288	cleanup:
				289	/* oom while computing effective. Free all computed effective arrays
				290	* since they were not activated
				291	*/
				292	css_for_each_descendant_pre(css, &cgrp->self) {
				293	struct cgroup *desc = container_of(css, struct cgroup, self);
				294
				295	bpf_prog_array_free(desc->bpf.inactive);
				296	desc->bpf.inactive = NULL;
				297	}
				298
				299	/* and cleanup the prog list */
				300	pl->prog = old_prog;
Roman Gushchin	d7bf2c1	2018-08-02 14:27:20 -0700	[diff] [blame^]	301	bpf_cgroup_storage_free(pl->storage);
				302	pl->storage = old_storage;
				303	bpf_cgroup_storage_link(old_storage, cgrp, type);
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	304	if (pl_was_allocated) {
				305	list_del(&pl->node);
				306	kfree(pl);
				307	}
				308	return err;
				309	}
				310
				311	/**
				312	* __cgroup_bpf_detach() - Detach the program from a cgroup, and
				313	* propagate the change to descendants
				314	* @cgrp: The cgroup which descendants to traverse
				315	* @prog: A program to detach or NULL
				316	* @type: Type of detach operation
				317	*
				318	* Must be called with cgroup_mutex held.
				319	*/
				320	int __cgroup_bpf_detach(struct cgroup cgrp, struct bpf_prog prog,
				321	enum bpf_attach_type type, u32 unused_flags)
				322	{
				323	struct list_head *progs = &cgrp->bpf.progs[type];
				324	u32 flags = cgrp->bpf.flags[type];
				325	struct bpf_prog *old_prog = NULL;
				326	struct cgroup_subsys_state *css;
				327	struct bpf_prog_list *pl;
				328	int err;
				329
				330	if (flags & BPF_F_ALLOW_MULTI) {
				331	if (!prog)
				332	/* to detach MULTI prog the user has to specify valid FD
				333	* of the program to be detached
				334	*/
				335	return -EINVAL;
				336	} else {
				337	if (list_empty(progs))
				338	/* report error when trying to detach and nothing is attached */
				339	return -ENOENT;
				340	}
				341
				342	if (flags & BPF_F_ALLOW_MULTI) {
				343	/* find the prog and detach it */
				344	list_for_each_entry(pl, progs, node) {
				345	if (pl->prog != prog)
				346	continue;
				347	old_prog = prog;
				348	/* mark it deleted, so it's ignored while
				349	* recomputing effective
				350	*/
				351	pl->prog = NULL;
				352	break;
				353	}
				354	if (!old_prog)
				355	return -ENOENT;
				356	} else {
				357	/* to maintain backward compatibility NONE and OVERRIDE cgroups
				358	* allow detaching with invalid FD (prog==NULL)
				359	*/
				360	pl = list_first_entry(progs, typeof(*pl), node);
				361	old_prog = pl->prog;
				362	pl->prog = NULL;
				363	}
				364
				365	/* allocate and recompute effective prog arrays */
				366	css_for_each_descendant_pre(css, &cgrp->self) {
				367	struct cgroup *desc = container_of(css, struct cgroup, self);
				368
				369	err = compute_effective_progs(desc, type, &desc->bpf.inactive);
				370	if (err)
				371	goto cleanup;
				372	}
				373
				374	/* all allocations were successful. Activate all prog arrays */
				375	css_for_each_descendant_pre(css, &cgrp->self) {
				376	struct cgroup *desc = container_of(css, struct cgroup, self);
				377
				378	activate_effective_progs(desc, type, desc->bpf.inactive);
				379	desc->bpf.inactive = NULL;
				380	}
				381
				382	/* now can actually delete it from this cgroup list */
				383	list_del(&pl->node);
Roman Gushchin	d7bf2c1	2018-08-02 14:27:20 -0700	[diff] [blame^]	384	bpf_cgroup_storage_unlink(pl->storage);
				385	bpf_cgroup_storage_free(pl->storage);
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	386	kfree(pl);
				387	if (list_empty(progs))
				388	/* last program was detached, reset flags to zero */
				389	cgrp->bpf.flags[type] = 0;
				390
				391	bpf_prog_put(old_prog);
				392	static_branch_dec(&cgroup_bpf_enabled_key);
				393	return 0;
				394
				395	cleanup:
				396	/* oom while computing effective. Free all computed effective arrays
				397	* since they were not activated
				398	*/
				399	css_for_each_descendant_pre(css, &cgrp->self) {
				400	struct cgroup *desc = container_of(css, struct cgroup, self);
				401
				402	bpf_prog_array_free(desc->bpf.inactive);
				403	desc->bpf.inactive = NULL;
				404	}
				405
				406	/* and restore back old_prog */
				407	pl->prog = old_prog;
				408	return err;
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	409	}
				410
Alexei Starovoitov	468e2f6	2017-10-02 22:50:22 -0700	[diff] [blame]	411	/* Must be called with cgroup_mutex held to avoid races. */
				412	int __cgroup_bpf_query(struct cgroup cgrp, const union bpf_attr attr,
				413	union bpf_attr __user *uattr)
				414	{
				415	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
				416	enum bpf_attach_type type = attr->query.attach_type;
				417	struct list_head *progs = &cgrp->bpf.progs[type];
				418	u32 flags = cgrp->bpf.flags[type];
				419	int cnt, ret = 0, i;
				420
				421	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
				422	cnt = bpf_prog_array_length(cgrp->bpf.effective[type]);
				423	else
				424	cnt = prog_list_length(progs);
				425
				426	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
				427	return -EFAULT;
				428	if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt)))
				429	return -EFAULT;
				430	if (attr->query.prog_cnt == 0 \|\| !prog_ids \|\| !cnt)
				431	/* return early if user requested only program count + flags */
				432	return 0;
				433	if (attr->query.prog_cnt < cnt) {
				434	cnt = attr->query.prog_cnt;
				435	ret = -ENOSPC;
				436	}
				437
				438	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
				439	return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type],
				440	prog_ids, cnt);
				441	} else {
				442	struct bpf_prog_list *pl;
				443	u32 id;
				444
				445	i = 0;
				446	list_for_each_entry(pl, progs, node) {
				447	id = pl->prog->aux->id;
				448	if (copy_to_user(prog_ids + i, &id, sizeof(id)))
				449	return -EFAULT;
				450	if (++i == cnt)
				451	break;
				452	}
				453	}
				454	return ret;
				455	}
				456
Sean Young	fdb5c45	2018-06-19 00:04:24 +0100	[diff] [blame]	457	int cgroup_bpf_prog_attach(const union bpf_attr *attr,
				458	enum bpf_prog_type ptype, struct bpf_prog *prog)
				459	{
				460	struct cgroup *cgrp;
				461	int ret;
				462
				463	cgrp = cgroup_get_from_fd(attr->target_fd);
				464	if (IS_ERR(cgrp))
				465	return PTR_ERR(cgrp);
				466
				467	ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
				468	attr->attach_flags);
				469	cgroup_put(cgrp);
				470	return ret;
				471	}
				472
				473	int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
				474	{
				475	struct bpf_prog *prog;
				476	struct cgroup *cgrp;
				477	int ret;
				478
				479	cgrp = cgroup_get_from_fd(attr->target_fd);
				480	if (IS_ERR(cgrp))
				481	return PTR_ERR(cgrp);
				482
				483	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
				484	if (IS_ERR(prog))
				485	prog = NULL;
				486
				487	ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
				488	if (prog)
				489	bpf_prog_put(prog);
				490
				491	cgroup_put(cgrp);
				492	return ret;
				493	}
				494
				495	int cgroup_bpf_prog_query(const union bpf_attr *attr,
				496	union bpf_attr __user *uattr)
				497	{
				498	struct cgroup *cgrp;
				499	int ret;
				500
				501	cgrp = cgroup_get_from_fd(attr->query.target_fd);
				502	if (IS_ERR(cgrp))
				503	return PTR_ERR(cgrp);
				504
				505	ret = cgroup_bpf_query(cgrp, attr, uattr);
				506
				507	cgroup_put(cgrp);
				508	return ret;
				509	}
				510
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	511	/**
David Ahern	b2cd125	2016-12-01 08:48:03 -0800	[diff] [blame]	512	* __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
Willem de Bruijn	8f917bb	2017-04-11 14:08:08 -0400	[diff] [blame]	513	* @sk: The socket sending or receiving traffic
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	514	* @skb: The skb that is being sent or received
				515	* @type: The type of program to be exectuted
				516	*
				517	* If no socket is passed, or the socket is not of type INET or INET6,
				518	* this function does nothing and returns 0.
				519	*
				520	* The program type passed in via @type must be suitable for network
				521	* filtering. No further check is performed to assert that.
				522	*
				523	* This function will return %-EPERM if any if an attached program was found
				524	* and if it returned != 1 during execution. In all other cases, 0 is returned.
				525	*/
David Ahern	b2cd125	2016-12-01 08:48:03 -0800	[diff] [blame]	526	int __cgroup_bpf_run_filter_skb(struct sock *sk,
				527	struct sk_buff *skb,
				528	enum bpf_attach_type type)
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	529	{
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	530	unsigned int offset = skb->data - skb_network_header(skb);
				531	struct sock *save_sk;
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	532	struct cgroup *cgrp;
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	533	int ret;
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	534
				535	if (!sk \|\| !sk_fullsock(sk))
				536	return 0;
				537
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	538	if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	539	return 0;
				540
				541	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	542	save_sk = skb->sk;
				543	skb->sk = sk;
				544	__skb_push(skb, offset);
				545	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
				546	bpf_prog_run_save_cb);
				547	__skb_pull(skb, offset);
				548	skb->sk = save_sk;
				549	return ret == 1 ? 0 : -EPERM;
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	550	}
David Ahern	b2cd125	2016-12-01 08:48:03 -0800	[diff] [blame]	551	EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
David Ahern	61023658	2016-12-01 08:48:04 -0800	[diff] [blame]	552
				553	/**
				554	* __cgroup_bpf_run_filter_sk() - Run a program on a sock
				555	* @sk: sock structure to manipulate
				556	* @type: The type of program to be exectuted
				557	*
				558	* socket is passed is expected to be of type INET or INET6.
				559	*
				560	* The program type passed in via @type must be suitable for sock
				561	* filtering. No further check is performed to assert that.
				562	*
				563	* This function will return %-EPERM if any if an attached program was found
				564	* and if it returned != 1 during execution. In all other cases, 0 is returned.
				565	*/
				566	int __cgroup_bpf_run_filter_sk(struct sock *sk,
				567	enum bpf_attach_type type)
				568	{
				569	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	570	int ret;
David Ahern	61023658	2016-12-01 08:48:04 -0800	[diff] [blame]	571
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	572	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sk, BPF_PROG_RUN);
				573	return ret == 1 ? 0 : -EPERM;
David Ahern	61023658	2016-12-01 08:48:04 -0800	[diff] [blame]	574	}
				575	EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
Lawrence Brakmo	40304b2	2017-06-30 20:02:40 -0700	[diff] [blame]	576
				577	/**
Andrey Ignatov	4fbac77	2018-03-30 15:08:02 -0700	[diff] [blame]	578	* __cgroup_bpf_run_filter_sock_addr() - Run a program on a sock and
				579	* provided by user sockaddr
				580	* @sk: sock struct that will use sockaddr
				581	* @uaddr: sockaddr struct provided by user
				582	* @type: The type of program to be exectuted
Andrey Ignatov	1cedee1	2018-05-25 08:55:23 -0700	[diff] [blame]	583	* @t_ctx: Pointer to attach type specific context
Andrey Ignatov	4fbac77	2018-03-30 15:08:02 -0700	[diff] [blame]	584	*
				585	* socket is expected to be of type INET or INET6.
				586	*
				587	* This function will return %-EPERM if an attached program is found and
				588	* returned value != 1 during execution. In all other cases, 0 is returned.
				589	*/
				590	int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
				591	struct sockaddr *uaddr,
Andrey Ignatov	1cedee1	2018-05-25 08:55:23 -0700	[diff] [blame]	592	enum bpf_attach_type type,
				593	void *t_ctx)
Andrey Ignatov	4fbac77	2018-03-30 15:08:02 -0700	[diff] [blame]	594	{
				595	struct bpf_sock_addr_kern ctx = {
				596	.sk = sk,
				597	.uaddr = uaddr,
Andrey Ignatov	1cedee1	2018-05-25 08:55:23 -0700	[diff] [blame]	598	.t_ctx = t_ctx,
Andrey Ignatov	4fbac77	2018-03-30 15:08:02 -0700	[diff] [blame]	599	};
Andrey Ignatov	1cedee1	2018-05-25 08:55:23 -0700	[diff] [blame]	600	struct sockaddr_storage unspec;
Andrey Ignatov	4fbac77	2018-03-30 15:08:02 -0700	[diff] [blame]	601	struct cgroup *cgrp;
				602	int ret;
				603
				604	/* Check socket family since not all sockets represent network
				605	* endpoint (e.g. AF_UNIX).
				606	*/
				607	if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
				608	return 0;
				609
Andrey Ignatov	1cedee1	2018-05-25 08:55:23 -0700	[diff] [blame]	610	if (!ctx.uaddr) {
				611	memset(&unspec, 0, sizeof(unspec));
				612	ctx.uaddr = (struct sockaddr *)&unspec;
				613	}
				614
Andrey Ignatov	4fbac77	2018-03-30 15:08:02 -0700	[diff] [blame]	615	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
				616	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
				617
				618	return ret == 1 ? 0 : -EPERM;
				619	}
				620	EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
				621
				622	/**
Lawrence Brakmo	40304b2	2017-06-30 20:02:40 -0700	[diff] [blame]	623	* __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
				624	* @sk: socket to get cgroup from
				625	* @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
				626	* sk with connection information (IP addresses, etc.) May not contain
				627	* cgroup info if it is a req sock.
				628	* @type: The type of program to be exectuted
				629	*
				630	* socket passed is expected to be of type INET or INET6.
				631	*
				632	* The program type passed in via @type must be suitable for sock_ops
				633	* filtering. No further check is performed to assert that.
				634	*
				635	* This function will return %-EPERM if any if an attached program was found
				636	* and if it returned != 1 during execution. In all other cases, 0 is returned.
				637	*/
				638	int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
				639	struct bpf_sock_ops_kern *sock_ops,
				640	enum bpf_attach_type type)
				641	{
				642	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	643	int ret;
Lawrence Brakmo	40304b2	2017-06-30 20:02:40 -0700	[diff] [blame]	644
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	645	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sock_ops,
				646	BPF_PROG_RUN);
				647	return ret == 1 ? 0 : -EPERM;
Lawrence Brakmo	40304b2	2017-06-30 20:02:40 -0700	[diff] [blame]	648	}
				649	EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
Roman Gushchin	ebc614f	2017-11-05 08:15:32 -0500	[diff] [blame]	650
				651	int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
				652	short access, enum bpf_attach_type type)
				653	{
				654	struct cgroup *cgrp;
				655	struct bpf_cgroup_dev_ctx ctx = {
				656	.access_type = (access << 16) \| dev_type,
				657	.major = major,
				658	.minor = minor,
				659	};
				660	int allow = 1;
				661
				662	rcu_read_lock();
				663	cgrp = task_dfl_cgroup(current);
				664	allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx,
				665	BPF_PROG_RUN);
				666	rcu_read_unlock();
				667
				668	return !allow;
				669	}
				670	EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);
				671
				672	static const struct bpf_func_proto *
Andrey Ignatov	5e43f89	2018-03-30 15:08:00 -0700	[diff] [blame]	673	cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
Roman Gushchin	ebc614f	2017-11-05 08:15:32 -0500	[diff] [blame]	674	{
				675	switch (func_id) {
				676	case BPF_FUNC_map_lookup_elem:
				677	return &bpf_map_lookup_elem_proto;
				678	case BPF_FUNC_map_update_elem:
				679	return &bpf_map_update_elem_proto;
				680	case BPF_FUNC_map_delete_elem:
				681	return &bpf_map_delete_elem_proto;
				682	case BPF_FUNC_get_current_uid_gid:
				683	return &bpf_get_current_uid_gid_proto;
				684	case BPF_FUNC_trace_printk:
				685	if (capable(CAP_SYS_ADMIN))
				686	return bpf_get_trace_printk_proto();
				687	default:
				688	return NULL;
				689	}
				690	}
				691
				692	static bool cgroup_dev_is_valid_access(int off, int size,
				693	enum bpf_access_type type,
Andrey Ignatov	5e43f89	2018-03-30 15:08:00 -0700	[diff] [blame]	694	const struct bpf_prog *prog,
Roman Gushchin	ebc614f	2017-11-05 08:15:32 -0500	[diff] [blame]	695	struct bpf_insn_access_aux *info)
				696	{
Yonghong Song	06ef0cc	2017-12-18 10:13:44 -0800	[diff] [blame]	697	const int size_default = sizeof(__u32);
				698
Roman Gushchin	ebc614f	2017-11-05 08:15:32 -0500	[diff] [blame]	699	if (type == BPF_WRITE)
				700	return false;
				701
				702	if (off < 0 \|\| off + size > sizeof(struct bpf_cgroup_dev_ctx))
				703	return false;
				704	/* The verifier guarantees that size > 0. */
				705	if (off % size != 0)
				706	return false;
Yonghong Song	06ef0cc	2017-12-18 10:13:44 -0800	[diff] [blame]	707
				708	switch (off) {
				709	case bpf_ctx_range(struct bpf_cgroup_dev_ctx, access_type):
				710	bpf_ctx_record_field_size(info, size_default);
				711	if (!bpf_ctx_narrow_access_ok(off, size, size_default))
				712	return false;
				713	break;
				714	default:
				715	if (size != size_default)
				716	return false;
				717	}
Roman Gushchin	ebc614f	2017-11-05 08:15:32 -0500	[diff] [blame]	718
				719	return true;
				720	}
				721
				722	const struct bpf_prog_ops cg_dev_prog_ops = {
				723	};
				724
				725	const struct bpf_verifier_ops cg_dev_verifier_ops = {
				726	.get_func_proto = cgroup_dev_func_proto,
				727	.is_valid_access = cgroup_dev_is_valid_access,
				728	};