Blame - kernel/bpf/cgroup.c - github.com/raspberrypi/raspberrypi-kernel

blob: badabb0b435cb72747a4065b6d8c62c7c3419a2d [file] [log] [blame]

Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	1	/*
				2	* Functions to manage eBPF programs attached to cgroups
				3	*
				4	* Copyright (c) 2016 Daniel Mack
				5	*
				6	* This file is subject to the terms and conditions of version 2 of the GNU
				7	* General Public License. See the file COPYING in the main directory of the
				8	* Linux distribution for more details.
				9	*/
				10
				11	#include <linux/kernel.h>
				12	#include <linux/atomic.h>
				13	#include <linux/cgroup.h>
				14	#include <linux/slab.h>
				15	#include <linux/bpf.h>
				16	#include <linux/bpf-cgroup.h>
				17	#include <net/sock.h>
				18
				19	DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
				20	EXPORT_SYMBOL(cgroup_bpf_enabled_key);
				21
				22	/**
				23	* cgroup_bpf_put() - put references of all bpf programs
				24	* @cgrp: the cgroup to modify
				25	*/
				26	void cgroup_bpf_put(struct cgroup *cgrp)
				27	{
				28	unsigned int type;
				29
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	30	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
				31	struct list_head *progs = &cgrp->bpf.progs[type];
				32	struct bpf_prog_list pl, tmp;
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	33
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	34	list_for_each_entry_safe(pl, tmp, progs, node) {
				35	list_del(&pl->node);
				36	bpf_prog_put(pl->prog);
				37	kfree(pl);
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	38	static_branch_dec(&cgroup_bpf_enabled_key);
				39	}
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	40	bpf_prog_array_free(cgrp->bpf.effective[type]);
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	41	}
				42	}
				43
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	44	/* count number of elements in the list.
				45	* it's slow but the list cannot be long
				46	*/
				47	static u32 prog_list_length(struct list_head *head)
				48	{
				49	struct bpf_prog_list *pl;
				50	u32 cnt = 0;
				51
				52	list_for_each_entry(pl, head, node) {
				53	if (!pl->prog)
				54	continue;
				55	cnt++;
				56	}
				57	return cnt;
				58	}
				59
				60	/* if parent has non-overridable prog attached,
				61	* disallow attaching new programs to the descendent cgroup.
				62	* if parent has overridable or multi-prog, allow attaching
				63	*/
				64	static bool hierarchy_allows_attach(struct cgroup *cgrp,
				65	enum bpf_attach_type type,
				66	u32 new_flags)
				67	{
				68	struct cgroup *p;
				69
				70	p = cgroup_parent(cgrp);
				71	if (!p)
				72	return true;
				73	do {
				74	u32 flags = p->bpf.flags[type];
				75	u32 cnt;
				76
				77	if (flags & BPF_F_ALLOW_MULTI)
				78	return true;
				79	cnt = prog_list_length(&p->bpf.progs[type]);
				80	WARN_ON_ONCE(cnt > 1);
				81	if (cnt == 1)
				82	return !!(flags & BPF_F_ALLOW_OVERRIDE);
				83	p = cgroup_parent(p);
				84	} while (p);
				85	return true;
				86	}
				87
				88	/* compute a chain of effective programs for a given cgroup:
				89	* start from the list of programs in this cgroup and add
				90	* all parent programs.
				91	* Note that parent's F_ALLOW_OVERRIDE-type program is yielding
				92	* to programs in this cgroup
				93	*/
				94	static int compute_effective_progs(struct cgroup *cgrp,
				95	enum bpf_attach_type type,
				96	struct bpf_prog_array __rcu **array)
				97	{
Roman Gushchin	3960f4f	2018-07-13 12:41:11 -0700	[diff] [blame^]	98	struct bpf_prog_array *progs;
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	99	struct bpf_prog_list *pl;
				100	struct cgroup *p = cgrp;
				101	int cnt = 0;
				102
				103	/* count number of effective programs by walking parents */
				104	do {
				105	if (cnt == 0 \|\| (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
				106	cnt += prog_list_length(&p->bpf.progs[type]);
				107	p = cgroup_parent(p);
				108	} while (p);
				109
				110	progs = bpf_prog_array_alloc(cnt, GFP_KERNEL);
				111	if (!progs)
				112	return -ENOMEM;
				113
				114	/* populate the array with effective progs */
				115	cnt = 0;
				116	p = cgrp;
				117	do {
				118	if (cnt == 0 \|\| (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
				119	list_for_each_entry(pl,
				120	&p->bpf.progs[type], node) {
				121	if (!pl->prog)
				122	continue;
Roman Gushchin	3960f4f	2018-07-13 12:41:11 -0700	[diff] [blame^]	123	progs->progs[cnt++] = pl->prog;
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	124	}
				125	p = cgroup_parent(p);
				126	} while (p);
				127
Roman Gushchin	3960f4f	2018-07-13 12:41:11 -0700	[diff] [blame^]	128	rcu_assign_pointer(*array, progs);
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	129	return 0;
				130	}
				131
				132	static void activate_effective_progs(struct cgroup *cgrp,
				133	enum bpf_attach_type type,
				134	struct bpf_prog_array __rcu *array)
				135	{
				136	struct bpf_prog_array __rcu *old_array;
				137
				138	old_array = xchg(&cgrp->bpf.effective[type], array);
				139	/* free prog array after grace period, since __cgroup_bpf_run_*()
				140	* might be still walking the array
				141	*/
				142	bpf_prog_array_free(old_array);
				143	}
				144
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	145	/**
				146	* cgroup_bpf_inherit() - inherit effective programs from parent
				147	* @cgrp: the cgroup to modify
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	148	*/
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	149	int cgroup_bpf_inherit(struct cgroup *cgrp)
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	150	{
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	151	/* has to use marco instead of const int, since compiler thinks
				152	* that array below is variable length
				153	*/
				154	#define NR ARRAY_SIZE(cgrp->bpf.effective)
				155	struct bpf_prog_array __rcu *arrays[NR] = {};
				156	int i;
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	157
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	158	for (i = 0; i < NR; i++)
				159	INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	160
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	161	for (i = 0; i < NR; i++)
				162	if (compute_effective_progs(cgrp, i, &arrays[i]))
				163	goto cleanup;
				164
				165	for (i = 0; i < NR; i++)
				166	activate_effective_progs(cgrp, i, arrays[i]);
				167
				168	return 0;
				169	cleanup:
				170	for (i = 0; i < NR; i++)
				171	bpf_prog_array_free(arrays[i]);
				172	return -ENOMEM;
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	173	}
				174
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	175	#define BPF_CGROUP_MAX_PROGS 64
				176
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	177	/**
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	178	* __cgroup_bpf_attach() - Attach the program to a cgroup, and
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	179	* propagate the change to descendants
				180	* @cgrp: The cgroup which descendants to traverse
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	181	* @prog: A program to attach
				182	* @type: Type of attach operation
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	183	*
				184	* Must be called with cgroup_mutex held.
				185	*/
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	186	int __cgroup_bpf_attach(struct cgroup cgrp, struct bpf_prog prog,
				187	enum bpf_attach_type type, u32 flags)
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	188	{
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	189	struct list_head *progs = &cgrp->bpf.progs[type];
				190	struct bpf_prog *old_prog = NULL;
				191	struct cgroup_subsys_state *css;
				192	struct bpf_prog_list *pl;
				193	bool pl_was_allocated;
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	194	int err;
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	195
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	196	if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI))
				197	/* invalid combination */
				198	return -EINVAL;
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	199
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	200	if (!hierarchy_allows_attach(cgrp, type, flags))
				201	return -EPERM;
				202
				203	if (!list_empty(progs) && cgrp->bpf.flags[type] != flags)
				204	/* Disallow attaching non-overridable on top
				205	* of existing overridable in this cgroup.
				206	* Disallow attaching multi-prog if overridable or none
Alexei Starovoitov	7f67763	2017-02-10 20:28:24 -0800	[diff] [blame]	207	*/
				208	return -EPERM;
				209
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	210	if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
				211	return -E2BIG;
Alexei Starovoitov	7f67763	2017-02-10 20:28:24 -0800	[diff] [blame]	212
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	213	if (flags & BPF_F_ALLOW_MULTI) {
				214	list_for_each_entry(pl, progs, node)
				215	if (pl->prog == prog)
				216	/* disallow attaching the same prog twice */
				217	return -EINVAL;
Alexei Starovoitov	7f67763	2017-02-10 20:28:24 -0800	[diff] [blame]	218
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	219	pl = kmalloc(sizeof(*pl), GFP_KERNEL);
				220	if (!pl)
				221	return -ENOMEM;
				222	pl_was_allocated = true;
				223	pl->prog = prog;
				224	list_add_tail(&pl->node, progs);
				225	} else {
				226	if (list_empty(progs)) {
				227	pl = kmalloc(sizeof(*pl), GFP_KERNEL);
				228	if (!pl)
				229	return -ENOMEM;
				230	pl_was_allocated = true;
				231	list_add_tail(&pl->node, progs);
Alexei Starovoitov	7f67763	2017-02-10 20:28:24 -0800	[diff] [blame]	232	} else {
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	233	pl = list_first_entry(progs, typeof(*pl), node);
				234	old_prog = pl->prog;
				235	pl_was_allocated = false;
Alexei Starovoitov	7f67763	2017-02-10 20:28:24 -0800	[diff] [blame]	236	}
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	237	pl->prog = prog;
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	238	}
				239
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	240	cgrp->bpf.flags[type] = flags;
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	241
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	242	/* allocate and recompute effective prog arrays */
				243	css_for_each_descendant_pre(css, &cgrp->self) {
				244	struct cgroup *desc = container_of(css, struct cgroup, self);
				245
				246	err = compute_effective_progs(desc, type, &desc->bpf.inactive);
				247	if (err)
				248	goto cleanup;
				249	}
				250
				251	/* all allocations were successful. Activate all prog arrays */
				252	css_for_each_descendant_pre(css, &cgrp->self) {
				253	struct cgroup *desc = container_of(css, struct cgroup, self);
				254
				255	activate_effective_progs(desc, type, desc->bpf.inactive);
				256	desc->bpf.inactive = NULL;
				257	}
				258
				259	static_branch_inc(&cgroup_bpf_enabled_key);
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	260	if (old_prog) {
				261	bpf_prog_put(old_prog);
				262	static_branch_dec(&cgroup_bpf_enabled_key);
				263	}
Alexei Starovoitov	7f67763	2017-02-10 20:28:24 -0800	[diff] [blame]	264	return 0;
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	265
				266	cleanup:
				267	/* oom while computing effective. Free all computed effective arrays
				268	* since they were not activated
				269	*/
				270	css_for_each_descendant_pre(css, &cgrp->self) {
				271	struct cgroup *desc = container_of(css, struct cgroup, self);
				272
				273	bpf_prog_array_free(desc->bpf.inactive);
				274	desc->bpf.inactive = NULL;
				275	}
				276
				277	/* and cleanup the prog list */
				278	pl->prog = old_prog;
				279	if (pl_was_allocated) {
				280	list_del(&pl->node);
				281	kfree(pl);
				282	}
				283	return err;
				284	}
				285
				286	/**
				287	* __cgroup_bpf_detach() - Detach the program from a cgroup, and
				288	* propagate the change to descendants
				289	* @cgrp: The cgroup which descendants to traverse
				290	* @prog: A program to detach or NULL
				291	* @type: Type of detach operation
				292	*
				293	* Must be called with cgroup_mutex held.
				294	*/
				295	int __cgroup_bpf_detach(struct cgroup cgrp, struct bpf_prog prog,
				296	enum bpf_attach_type type, u32 unused_flags)
				297	{
				298	struct list_head *progs = &cgrp->bpf.progs[type];
				299	u32 flags = cgrp->bpf.flags[type];
				300	struct bpf_prog *old_prog = NULL;
				301	struct cgroup_subsys_state *css;
				302	struct bpf_prog_list *pl;
				303	int err;
				304
				305	if (flags & BPF_F_ALLOW_MULTI) {
				306	if (!prog)
				307	/* to detach MULTI prog the user has to specify valid FD
				308	* of the program to be detached
				309	*/
				310	return -EINVAL;
				311	} else {
				312	if (list_empty(progs))
				313	/* report error when trying to detach and nothing is attached */
				314	return -ENOENT;
				315	}
				316
				317	if (flags & BPF_F_ALLOW_MULTI) {
				318	/* find the prog and detach it */
				319	list_for_each_entry(pl, progs, node) {
				320	if (pl->prog != prog)
				321	continue;
				322	old_prog = prog;
				323	/* mark it deleted, so it's ignored while
				324	* recomputing effective
				325	*/
				326	pl->prog = NULL;
				327	break;
				328	}
				329	if (!old_prog)
				330	return -ENOENT;
				331	} else {
				332	/* to maintain backward compatibility NONE and OVERRIDE cgroups
				333	* allow detaching with invalid FD (prog==NULL)
				334	*/
				335	pl = list_first_entry(progs, typeof(*pl), node);
				336	old_prog = pl->prog;
				337	pl->prog = NULL;
				338	}
				339
				340	/* allocate and recompute effective prog arrays */
				341	css_for_each_descendant_pre(css, &cgrp->self) {
				342	struct cgroup *desc = container_of(css, struct cgroup, self);
				343
				344	err = compute_effective_progs(desc, type, &desc->bpf.inactive);
				345	if (err)
				346	goto cleanup;
				347	}
				348
				349	/* all allocations were successful. Activate all prog arrays */
				350	css_for_each_descendant_pre(css, &cgrp->self) {
				351	struct cgroup *desc = container_of(css, struct cgroup, self);
				352
				353	activate_effective_progs(desc, type, desc->bpf.inactive);
				354	desc->bpf.inactive = NULL;
				355	}
				356
				357	/* now can actually delete it from this cgroup list */
				358	list_del(&pl->node);
				359	kfree(pl);
				360	if (list_empty(progs))
				361	/* last program was detached, reset flags to zero */
				362	cgrp->bpf.flags[type] = 0;
				363
				364	bpf_prog_put(old_prog);
				365	static_branch_dec(&cgroup_bpf_enabled_key);
				366	return 0;
				367
				368	cleanup:
				369	/* oom while computing effective. Free all computed effective arrays
				370	* since they were not activated
				371	*/
				372	css_for_each_descendant_pre(css, &cgrp->self) {
				373	struct cgroup *desc = container_of(css, struct cgroup, self);
				374
				375	bpf_prog_array_free(desc->bpf.inactive);
				376	desc->bpf.inactive = NULL;
				377	}
				378
				379	/* and restore back old_prog */
				380	pl->prog = old_prog;
				381	return err;
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	382	}
				383
Alexei Starovoitov	468e2f6	2017-10-02 22:50:22 -0700	[diff] [blame]	384	/* Must be called with cgroup_mutex held to avoid races. */
				385	int __cgroup_bpf_query(struct cgroup cgrp, const union bpf_attr attr,
				386	union bpf_attr __user *uattr)
				387	{
				388	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
				389	enum bpf_attach_type type = attr->query.attach_type;
				390	struct list_head *progs = &cgrp->bpf.progs[type];
				391	u32 flags = cgrp->bpf.flags[type];
				392	int cnt, ret = 0, i;
				393
				394	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
				395	cnt = bpf_prog_array_length(cgrp->bpf.effective[type]);
				396	else
				397	cnt = prog_list_length(progs);
				398
				399	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
				400	return -EFAULT;
				401	if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt)))
				402	return -EFAULT;
				403	if (attr->query.prog_cnt == 0 \|\| !prog_ids \|\| !cnt)
				404	/* return early if user requested only program count + flags */
				405	return 0;
				406	if (attr->query.prog_cnt < cnt) {
				407	cnt = attr->query.prog_cnt;
				408	ret = -ENOSPC;
				409	}
				410
				411	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
				412	return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type],
				413	prog_ids, cnt);
				414	} else {
				415	struct bpf_prog_list *pl;
				416	u32 id;
				417
				418	i = 0;
				419	list_for_each_entry(pl, progs, node) {
				420	id = pl->prog->aux->id;
				421	if (copy_to_user(prog_ids + i, &id, sizeof(id)))
				422	return -EFAULT;
				423	if (++i == cnt)
				424	break;
				425	}
				426	}
				427	return ret;
				428	}
				429
Sean Young	fdb5c45	2018-06-19 00:04:24 +0100	[diff] [blame]	430	int cgroup_bpf_prog_attach(const union bpf_attr *attr,
				431	enum bpf_prog_type ptype, struct bpf_prog *prog)
				432	{
				433	struct cgroup *cgrp;
				434	int ret;
				435
				436	cgrp = cgroup_get_from_fd(attr->target_fd);
				437	if (IS_ERR(cgrp))
				438	return PTR_ERR(cgrp);
				439
				440	ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
				441	attr->attach_flags);
				442	cgroup_put(cgrp);
				443	return ret;
				444	}
				445
				446	int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
				447	{
				448	struct bpf_prog *prog;
				449	struct cgroup *cgrp;
				450	int ret;
				451
				452	cgrp = cgroup_get_from_fd(attr->target_fd);
				453	if (IS_ERR(cgrp))
				454	return PTR_ERR(cgrp);
				455
				456	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
				457	if (IS_ERR(prog))
				458	prog = NULL;
				459
				460	ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
				461	if (prog)
				462	bpf_prog_put(prog);
				463
				464	cgroup_put(cgrp);
				465	return ret;
				466	}
				467
				468	int cgroup_bpf_prog_query(const union bpf_attr *attr,
				469	union bpf_attr __user *uattr)
				470	{
				471	struct cgroup *cgrp;
				472	int ret;
				473
				474	cgrp = cgroup_get_from_fd(attr->query.target_fd);
				475	if (IS_ERR(cgrp))
				476	return PTR_ERR(cgrp);
				477
				478	ret = cgroup_bpf_query(cgrp, attr, uattr);
				479
				480	cgroup_put(cgrp);
				481	return ret;
				482	}
				483
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	484	/**
David Ahern	b2cd125	2016-12-01 08:48:03 -0800	[diff] [blame]	485	* __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
Willem de Bruijn	8f917bb	2017-04-11 14:08:08 -0400	[diff] [blame]	486	* @sk: The socket sending or receiving traffic
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	487	* @skb: The skb that is being sent or received
				488	* @type: The type of program to be exectuted
				489	*
				490	* If no socket is passed, or the socket is not of type INET or INET6,
				491	* this function does nothing and returns 0.
				492	*
				493	* The program type passed in via @type must be suitable for network
				494	* filtering. No further check is performed to assert that.
				495	*
				496	* This function will return %-EPERM if any if an attached program was found
				497	* and if it returned != 1 during execution. In all other cases, 0 is returned.
				498	*/
David Ahern	b2cd125	2016-12-01 08:48:03 -0800	[diff] [blame]	499	int __cgroup_bpf_run_filter_skb(struct sock *sk,
				500	struct sk_buff *skb,
				501	enum bpf_attach_type type)
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	502	{
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	503	unsigned int offset = skb->data - skb_network_header(skb);
				504	struct sock *save_sk;
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	505	struct cgroup *cgrp;
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	506	int ret;
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	507
				508	if (!sk \|\| !sk_fullsock(sk))
				509	return 0;
				510
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	511	if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	512	return 0;
				513
				514	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	515	save_sk = skb->sk;
				516	skb->sk = sk;
				517	__skb_push(skb, offset);
				518	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
				519	bpf_prog_run_save_cb);
				520	__skb_pull(skb, offset);
				521	skb->sk = save_sk;
				522	return ret == 1 ? 0 : -EPERM;
Daniel Mack	3007098	2016-11-23 16:52:26 +0100	[diff] [blame]	523	}
David Ahern	b2cd125	2016-12-01 08:48:03 -0800	[diff] [blame]	524	EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
David Ahern	61023658	2016-12-01 08:48:04 -0800	[diff] [blame]	525
				526	/**
				527	* __cgroup_bpf_run_filter_sk() - Run a program on a sock
				528	* @sk: sock structure to manipulate
				529	* @type: The type of program to be exectuted
				530	*
				531	* socket is passed is expected to be of type INET or INET6.
				532	*
				533	* The program type passed in via @type must be suitable for sock
				534	* filtering. No further check is performed to assert that.
				535	*
				536	* This function will return %-EPERM if any if an attached program was found
				537	* and if it returned != 1 during execution. In all other cases, 0 is returned.
				538	*/
				539	int __cgroup_bpf_run_filter_sk(struct sock *sk,
				540	enum bpf_attach_type type)
				541	{
				542	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	543	int ret;
David Ahern	61023658	2016-12-01 08:48:04 -0800	[diff] [blame]	544
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	545	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sk, BPF_PROG_RUN);
				546	return ret == 1 ? 0 : -EPERM;
David Ahern	61023658	2016-12-01 08:48:04 -0800	[diff] [blame]	547	}
				548	EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
Lawrence Brakmo	40304b2	2017-06-30 20:02:40 -0700	[diff] [blame]	549
				550	/**
Andrey Ignatov	4fbac77	2018-03-30 15:08:02 -0700	[diff] [blame]	551	* __cgroup_bpf_run_filter_sock_addr() - Run a program on a sock and
				552	* provided by user sockaddr
				553	* @sk: sock struct that will use sockaddr
				554	* @uaddr: sockaddr struct provided by user
				555	* @type: The type of program to be exectuted
Andrey Ignatov	1cedee1	2018-05-25 08:55:23 -0700	[diff] [blame]	556	* @t_ctx: Pointer to attach type specific context
Andrey Ignatov	4fbac77	2018-03-30 15:08:02 -0700	[diff] [blame]	557	*
				558	* socket is expected to be of type INET or INET6.
				559	*
				560	* This function will return %-EPERM if an attached program is found and
				561	* returned value != 1 during execution. In all other cases, 0 is returned.
				562	*/
				563	int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
				564	struct sockaddr *uaddr,
Andrey Ignatov	1cedee1	2018-05-25 08:55:23 -0700	[diff] [blame]	565	enum bpf_attach_type type,
				566	void *t_ctx)
Andrey Ignatov	4fbac77	2018-03-30 15:08:02 -0700	[diff] [blame]	567	{
				568	struct bpf_sock_addr_kern ctx = {
				569	.sk = sk,
				570	.uaddr = uaddr,
Andrey Ignatov	1cedee1	2018-05-25 08:55:23 -0700	[diff] [blame]	571	.t_ctx = t_ctx,
Andrey Ignatov	4fbac77	2018-03-30 15:08:02 -0700	[diff] [blame]	572	};
Andrey Ignatov	1cedee1	2018-05-25 08:55:23 -0700	[diff] [blame]	573	struct sockaddr_storage unspec;
Andrey Ignatov	4fbac77	2018-03-30 15:08:02 -0700	[diff] [blame]	574	struct cgroup *cgrp;
				575	int ret;
				576
				577	/* Check socket family since not all sockets represent network
				578	* endpoint (e.g. AF_UNIX).
				579	*/
				580	if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
				581	return 0;
				582
Andrey Ignatov	1cedee1	2018-05-25 08:55:23 -0700	[diff] [blame]	583	if (!ctx.uaddr) {
				584	memset(&unspec, 0, sizeof(unspec));
				585	ctx.uaddr = (struct sockaddr *)&unspec;
				586	}
				587
Andrey Ignatov	4fbac77	2018-03-30 15:08:02 -0700	[diff] [blame]	588	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
				589	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
				590
				591	return ret == 1 ? 0 : -EPERM;
				592	}
				593	EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
				594
				595	/**
Lawrence Brakmo	40304b2	2017-06-30 20:02:40 -0700	[diff] [blame]	596	* __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
				597	* @sk: socket to get cgroup from
				598	* @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
				599	* sk with connection information (IP addresses, etc.) May not contain
				600	* cgroup info if it is a req sock.
				601	* @type: The type of program to be exectuted
				602	*
				603	* socket passed is expected to be of type INET or INET6.
				604	*
				605	* The program type passed in via @type must be suitable for sock_ops
				606	* filtering. No further check is performed to assert that.
				607	*
				608	* This function will return %-EPERM if any if an attached program was found
				609	* and if it returned != 1 during execution. In all other cases, 0 is returned.
				610	*/
				611	int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
				612	struct bpf_sock_ops_kern *sock_ops,
				613	enum bpf_attach_type type)
				614	{
				615	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	616	int ret;
Lawrence Brakmo	40304b2	2017-06-30 20:02:40 -0700	[diff] [blame]	617
Alexei Starovoitov	324bda9e6	2017-10-02 22:50:21 -0700	[diff] [blame]	618	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sock_ops,
				619	BPF_PROG_RUN);
				620	return ret == 1 ? 0 : -EPERM;
Lawrence Brakmo	40304b2	2017-06-30 20:02:40 -0700	[diff] [blame]	621	}
				622	EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
Roman Gushchin	ebc614f	2017-11-05 08:15:32 -0500	[diff] [blame]	623
				624	int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
				625	short access, enum bpf_attach_type type)
				626	{
				627	struct cgroup *cgrp;
				628	struct bpf_cgroup_dev_ctx ctx = {
				629	.access_type = (access << 16) \| dev_type,
				630	.major = major,
				631	.minor = minor,
				632	};
				633	int allow = 1;
				634
				635	rcu_read_lock();
				636	cgrp = task_dfl_cgroup(current);
				637	allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx,
				638	BPF_PROG_RUN);
				639	rcu_read_unlock();
				640
				641	return !allow;
				642	}
				643	EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);
				644
				645	static const struct bpf_func_proto *
Andrey Ignatov	5e43f89	2018-03-30 15:08:00 -0700	[diff] [blame]	646	cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
Roman Gushchin	ebc614f	2017-11-05 08:15:32 -0500	[diff] [blame]	647	{
				648	switch (func_id) {
				649	case BPF_FUNC_map_lookup_elem:
				650	return &bpf_map_lookup_elem_proto;
				651	case BPF_FUNC_map_update_elem:
				652	return &bpf_map_update_elem_proto;
				653	case BPF_FUNC_map_delete_elem:
				654	return &bpf_map_delete_elem_proto;
				655	case BPF_FUNC_get_current_uid_gid:
				656	return &bpf_get_current_uid_gid_proto;
				657	case BPF_FUNC_trace_printk:
				658	if (capable(CAP_SYS_ADMIN))
				659	return bpf_get_trace_printk_proto();
				660	default:
				661	return NULL;
				662	}
				663	}
				664
				665	static bool cgroup_dev_is_valid_access(int off, int size,
				666	enum bpf_access_type type,
Andrey Ignatov	5e43f89	2018-03-30 15:08:00 -0700	[diff] [blame]	667	const struct bpf_prog *prog,
Roman Gushchin	ebc614f	2017-11-05 08:15:32 -0500	[diff] [blame]	668	struct bpf_insn_access_aux *info)
				669	{
Yonghong Song	06ef0cc	2017-12-18 10:13:44 -0800	[diff] [blame]	670	const int size_default = sizeof(__u32);
				671
Roman Gushchin	ebc614f	2017-11-05 08:15:32 -0500	[diff] [blame]	672	if (type == BPF_WRITE)
				673	return false;
				674
				675	if (off < 0 \|\| off + size > sizeof(struct bpf_cgroup_dev_ctx))
				676	return false;
				677	/* The verifier guarantees that size > 0. */
				678	if (off % size != 0)
				679	return false;
Yonghong Song	06ef0cc	2017-12-18 10:13:44 -0800	[diff] [blame]	680
				681	switch (off) {
				682	case bpf_ctx_range(struct bpf_cgroup_dev_ctx, access_type):
				683	bpf_ctx_record_field_size(info, size_default);
				684	if (!bpf_ctx_narrow_access_ok(off, size, size_default))
				685	return false;
				686	break;
				687	default:
				688	if (size != size_default)
				689	return false;
				690	}
Roman Gushchin	ebc614f	2017-11-05 08:15:32 -0500	[diff] [blame]	691
				692	return true;
				693	}
				694
				695	const struct bpf_prog_ops cg_dev_prog_ops = {
				696	};
				697
				698	const struct bpf_verifier_ops cg_dev_verifier_ops = {
				699	.get_func_proto = cgroup_dev_func_proto,
				700	.is_valid_access = cgroup_dev_is_valid_access,
				701	};