Blame - kernel/bpf/local_storage.c - github.com/raspberrypi/raspberrypi-kernel

blob: 944eb297465fd1c648fc41069e8ea6bb3ff6c976 [file] [log] [blame]

Roman Gushchin	de9cbba	2018-08-02 14:27:18 -0700	[diff] [blame]	1	//SPDX-License-Identifier: GPL-2.0
				2	#include <linux/bpf-cgroup.h>
				3	#include <linux/bpf.h>
				4	#include <linux/bug.h>
				5	#include <linux/filter.h>
				6	#include <linux/mm.h>
				7	#include <linux/rbtree.h>
				8	#include <linux/slab.h>
				9
Roman Gushchin	f294b37	2018-09-28 14:45:40 +0000	[diff] [blame]	10	DEFINE_PER_CPU(struct bpf_cgroup_storage*,
				11	bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
Roman Gushchin	aa0ad5b	2018-08-02 14:27:19 -0700	[diff] [blame]	12
Roman Gushchin	de9cbba	2018-08-02 14:27:18 -0700	[diff] [blame]	13	#ifdef CONFIG_CGROUP_BPF
				14
				15	#define LOCAL_STORAGE_CREATE_FLAG_MASK \
				16	(BPF_F_NUMA_NODE \| BPF_F_RDONLY \| BPF_F_WRONLY)
				17
				18	struct bpf_cgroup_storage_map {
				19	struct bpf_map map;
				20
				21	spinlock_t lock;
				22	struct bpf_prog *prog;
				23	struct rb_root root;
				24	struct list_head list;
				25	};
				26
				27	static struct bpf_cgroup_storage_map map_to_storage(struct bpf_map map)
				28	{
				29	return container_of(map, struct bpf_cgroup_storage_map, map);
				30	}
				31
				32	static int bpf_cgroup_storage_key_cmp(
				33	const struct bpf_cgroup_storage_key *key1,
				34	const struct bpf_cgroup_storage_key *key2)
				35	{
				36	if (key1->cgroup_inode_id < key2->cgroup_inode_id)
				37	return -1;
				38	else if (key1->cgroup_inode_id > key2->cgroup_inode_id)
				39	return 1;
				40	else if (key1->attach_type < key2->attach_type)
				41	return -1;
				42	else if (key1->attach_type > key2->attach_type)
				43	return 1;
				44	return 0;
				45	}
				46
				47	static struct bpf_cgroup_storage *cgroup_storage_lookup(
				48	struct bpf_cgroup_storage_map map, struct bpf_cgroup_storage_key key,
				49	bool locked)
				50	{
				51	struct rb_root *root = &map->root;
				52	struct rb_node *node;
				53
				54	if (!locked)
				55	spin_lock_bh(&map->lock);
				56
				57	node = root->rb_node;
				58	while (node) {
				59	struct bpf_cgroup_storage *storage;
				60
				61	storage = container_of(node, struct bpf_cgroup_storage, node);
				62
				63	switch (bpf_cgroup_storage_key_cmp(key, &storage->key)) {
				64	case -1:
				65	node = node->rb_left;
				66	break;
				67	case 1:
				68	node = node->rb_right;
				69	break;
				70	default:
				71	if (!locked)
				72	spin_unlock_bh(&map->lock);
				73	return storage;
				74	}
				75	}
				76
				77	if (!locked)
				78	spin_unlock_bh(&map->lock);
				79
				80	return NULL;
				81	}
				82
				83	static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map,
				84	struct bpf_cgroup_storage *storage)
				85	{
				86	struct rb_root *root = &map->root;
				87	struct rb_node *new = &(root->rb_node), parent = NULL;
				88
				89	while (*new) {
				90	struct bpf_cgroup_storage *this;
				91
				92	this = container_of(*new, struct bpf_cgroup_storage, node);
				93
				94	parent = *new;
				95	switch (bpf_cgroup_storage_key_cmp(&storage->key, &this->key)) {
				96	case -1:
				97	new = &((*new)->rb_left);
				98	break;
				99	case 1:
				100	new = &((*new)->rb_right);
				101	break;
				102	default:
				103	return -EEXIST;
				104	}
				105	}
				106
				107	rb_link_node(&storage->node, parent, new);
				108	rb_insert_color(&storage->node, root);
				109
				110	return 0;
				111	}
				112
				113	static void cgroup_storage_lookup_elem(struct bpf_map _map, void *_key)
				114	{
				115	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
				116	struct bpf_cgroup_storage_key *key = _key;
				117	struct bpf_cgroup_storage *storage;
				118
				119	storage = cgroup_storage_lookup(map, key, false);
				120	if (!storage)
				121	return NULL;
				122
				123	return &READ_ONCE(storage->buf)->data[0];
				124	}
				125
				126	static int cgroup_storage_update_elem(struct bpf_map map, void _key,
				127	void *value, u64 flags)
				128	{
				129	struct bpf_cgroup_storage_key *key = _key;
				130	struct bpf_cgroup_storage *storage;
				131	struct bpf_storage_buffer *new;
				132
				133	if (flags & BPF_NOEXIST)
				134	return -EINVAL;
				135
				136	storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map,
				137	key, false);
				138	if (!storage)
				139	return -ENOENT;
				140
				141	new = kmalloc_node(sizeof(struct bpf_storage_buffer) +
				142	map->value_size, __GFP_ZERO \| GFP_USER,
				143	map->numa_node);
				144	if (!new)
				145	return -ENOMEM;
				146
				147	memcpy(&new->data[0], value, map->value_size);
				148
				149	new = xchg(&storage->buf, new);
				150	kfree_rcu(new, rcu);
				151
				152	return 0;
				153	}
				154
Roman Gushchin	b741f16	2018-09-28 14:45:43 +0000	[diff] [blame^]	155	int bpf_percpu_cgroup_storage_copy(struct bpf_map _map, void _key,
				156	void *value)
				157	{
				158	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
				159	struct bpf_cgroup_storage_key *key = _key;
				160	struct bpf_cgroup_storage *storage;
				161	int cpu, off = 0;
				162	u32 size;
				163
				164	rcu_read_lock();
				165	storage = cgroup_storage_lookup(map, key, false);
				166	if (!storage) {
				167	rcu_read_unlock();
				168	return -ENOENT;
				169	}
				170
				171	/* per_cpu areas are zero-filled and bpf programs can only
				172	* access 'value_size' of them, so copying rounded areas
				173	* will not leak any kernel data
				174	*/
				175	size = round_up(_map->value_size, 8);
				176	for_each_possible_cpu(cpu) {
				177	bpf_long_memcpy(value + off,
				178	per_cpu_ptr(storage->percpu_buf, cpu), size);
				179	off += size;
				180	}
				181	rcu_read_unlock();
				182	return 0;
				183	}
				184
				185	int bpf_percpu_cgroup_storage_update(struct bpf_map _map, void _key,
				186	void *value, u64 map_flags)
				187	{
				188	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
				189	struct bpf_cgroup_storage_key *key = _key;
				190	struct bpf_cgroup_storage *storage;
				191	int cpu, off = 0;
				192	u32 size;
				193
				194	if (map_flags != BPF_ANY && map_flags != BPF_EXIST)
				195	return -EINVAL;
				196
				197	rcu_read_lock();
				198	storage = cgroup_storage_lookup(map, key, false);
				199	if (!storage) {
				200	rcu_read_unlock();
				201	return -ENOENT;
				202	}
				203
				204	/* the user space will provide round_up(value_size, 8) bytes that
				205	* will be copied into per-cpu area. bpf programs can only access
				206	* value_size of it. During lookup the same extra bytes will be
				207	* returned or zeros which were zero-filled by percpu_alloc,
				208	* so no kernel data leaks possible
				209	*/
				210	size = round_up(_map->value_size, 8);
				211	for_each_possible_cpu(cpu) {
				212	bpf_long_memcpy(per_cpu_ptr(storage->percpu_buf, cpu),
				213	value + off, size);
				214	off += size;
				215	}
				216	rcu_read_unlock();
				217	return 0;
				218	}
				219
Roman Gushchin	de9cbba	2018-08-02 14:27:18 -0700	[diff] [blame]	220	static int cgroup_storage_get_next_key(struct bpf_map _map, void _key,
				221	void *_next_key)
				222	{
				223	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
				224	struct bpf_cgroup_storage_key *key = _key;
				225	struct bpf_cgroup_storage_key *next = _next_key;
				226	struct bpf_cgroup_storage *storage;
				227
				228	spin_lock_bh(&map->lock);
				229
				230	if (list_empty(&map->list))
				231	goto enoent;
				232
				233	if (key) {
				234	storage = cgroup_storage_lookup(map, key, true);
				235	if (!storage)
				236	goto enoent;
				237
				238	storage = list_next_entry(storage, list);
				239	if (!storage)
				240	goto enoent;
				241	} else {
				242	storage = list_first_entry(&map->list,
				243	struct bpf_cgroup_storage, list);
				244	}
				245
				246	spin_unlock_bh(&map->lock);
				247	next->attach_type = storage->key.attach_type;
				248	next->cgroup_inode_id = storage->key.cgroup_inode_id;
				249	return 0;
				250
				251	enoent:
				252	spin_unlock_bh(&map->lock);
				253	return -ENOENT;
				254	}
				255
				256	static struct bpf_map cgroup_storage_map_alloc(union bpf_attr attr)
				257	{
				258	int numa_node = bpf_map_attr_numa_node(attr);
				259	struct bpf_cgroup_storage_map *map;
				260
				261	if (attr->key_size != sizeof(struct bpf_cgroup_storage_key))
				262	return ERR_PTR(-EINVAL);
				263
				264	if (attr->value_size > PAGE_SIZE)
				265	return ERR_PTR(-E2BIG);
				266
				267	if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK)
				268	/* reserved bits should not be used */
				269	return ERR_PTR(-EINVAL);
				270
				271	if (attr->max_entries)
				272	/* max_entries is not used and enforced to be 0 */
				273	return ERR_PTR(-EINVAL);
				274
				275	map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map),
				276	__GFP_ZERO \| GFP_USER, numa_node);
				277	if (!map)
				278	return ERR_PTR(-ENOMEM);
				279
				280	map->map.pages = round_up(sizeof(struct bpf_cgroup_storage_map),
				281	PAGE_SIZE) >> PAGE_SHIFT;
				282
				283	/* copy mandatory map attributes */
				284	bpf_map_init_from_attr(&map->map, attr);
				285
				286	spin_lock_init(&map->lock);
				287	map->root = RB_ROOT;
				288	INIT_LIST_HEAD(&map->list);
				289
				290	return &map->map;
				291	}
				292
				293	static void cgroup_storage_map_free(struct bpf_map *_map)
				294	{
				295	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
				296
				297	WARN_ON(!RB_EMPTY_ROOT(&map->root));
				298	WARN_ON(!list_empty(&map->list));
				299
				300	kfree(map);
				301	}
				302
				303	static int cgroup_storage_delete_elem(struct bpf_map map, void key)
				304	{
				305	return -EINVAL;
				306	}
				307
				308	const struct bpf_map_ops cgroup_storage_map_ops = {
				309	.map_alloc = cgroup_storage_map_alloc,
				310	.map_free = cgroup_storage_map_free,
				311	.map_get_next_key = cgroup_storage_get_next_key,
				312	.map_lookup_elem = cgroup_storage_lookup_elem,
				313	.map_update_elem = cgroup_storage_update_elem,
				314	.map_delete_elem = cgroup_storage_delete_elem,
Daniel Borkmann	e8d2bec	2018-08-12 01:59:17 +0200	[diff] [blame]	315	.map_check_btf = map_check_no_btf,
Roman Gushchin	de9cbba	2018-08-02 14:27:18 -0700	[diff] [blame]	316	};
				317
				318	int bpf_cgroup_storage_assign(struct bpf_prog prog, struct bpf_map _map)
				319	{
Roman Gushchin	8bad74f	2018-09-28 14:45:36 +0000	[diff] [blame]	320	enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
Roman Gushchin	de9cbba	2018-08-02 14:27:18 -0700	[diff] [blame]	321	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
				322	int ret = -EBUSY;
				323
				324	spin_lock_bh(&map->lock);
				325
				326	if (map->prog && map->prog != prog)
				327	goto unlock;
Roman Gushchin	8bad74f	2018-09-28 14:45:36 +0000	[diff] [blame]	328	if (prog->aux->cgroup_storage[stype] &&
				329	prog->aux->cgroup_storage[stype] != _map)
Roman Gushchin	de9cbba	2018-08-02 14:27:18 -0700	[diff] [blame]	330	goto unlock;
				331
				332	map->prog = prog;
Roman Gushchin	8bad74f	2018-09-28 14:45:36 +0000	[diff] [blame]	333	prog->aux->cgroup_storage[stype] = _map;
Roman Gushchin	de9cbba	2018-08-02 14:27:18 -0700	[diff] [blame]	334	ret = 0;
				335	unlock:
				336	spin_unlock_bh(&map->lock);
				337
				338	return ret;
				339	}
				340
				341	void bpf_cgroup_storage_release(struct bpf_prog prog, struct bpf_map _map)
				342	{
Roman Gushchin	8bad74f	2018-09-28 14:45:36 +0000	[diff] [blame]	343	enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
Roman Gushchin	de9cbba	2018-08-02 14:27:18 -0700	[diff] [blame]	344	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
				345
				346	spin_lock_bh(&map->lock);
				347	if (map->prog == prog) {
Roman Gushchin	8bad74f	2018-09-28 14:45:36 +0000	[diff] [blame]	348	WARN_ON(prog->aux->cgroup_storage[stype] != _map);
Roman Gushchin	de9cbba	2018-08-02 14:27:18 -0700	[diff] [blame]	349	map->prog = NULL;
Roman Gushchin	8bad74f	2018-09-28 14:45:36 +0000	[diff] [blame]	350	prog->aux->cgroup_storage[stype] = NULL;
Roman Gushchin	de9cbba	2018-08-02 14:27:18 -0700	[diff] [blame]	351	}
				352	spin_unlock_bh(&map->lock);
				353	}
				354
Roman Gushchin	b741f16	2018-09-28 14:45:43 +0000	[diff] [blame^]	355	static size_t bpf_cgroup_storage_calculate_size(struct bpf_map map, u32 pages)
				356	{
				357	size_t size;
				358
				359	if (cgroup_storage_type(map) == BPF_CGROUP_STORAGE_SHARED) {
				360	size = sizeof(struct bpf_storage_buffer) + map->value_size;
				361	*pages = round_up(sizeof(struct bpf_cgroup_storage) + size,
				362	PAGE_SIZE) >> PAGE_SHIFT;
				363	} else {
				364	size = map->value_size;
				365	pages = round_up(round_up(size, 8) num_possible_cpus(),
				366	PAGE_SIZE) >> PAGE_SHIFT;
				367	}
				368
				369	return size;
				370	}
				371
Roman Gushchin	8bad74f	2018-09-28 14:45:36 +0000	[diff] [blame]	372	struct bpf_cgroup_storage bpf_cgroup_storage_alloc(struct bpf_prog prog,
				373	enum bpf_cgroup_storage_type stype)
Roman Gushchin	de9cbba	2018-08-02 14:27:18 -0700	[diff] [blame]	374	{
				375	struct bpf_cgroup_storage *storage;
				376	struct bpf_map *map;
Roman Gushchin	b741f16	2018-09-28 14:45:43 +0000	[diff] [blame^]	377	gfp_t flags;
				378	size_t size;
Roman Gushchin	de9cbba	2018-08-02 14:27:18 -0700	[diff] [blame]	379	u32 pages;
				380
Roman Gushchin	8bad74f	2018-09-28 14:45:36 +0000	[diff] [blame]	381	map = prog->aux->cgroup_storage[stype];
Roman Gushchin	de9cbba	2018-08-02 14:27:18 -0700	[diff] [blame]	382	if (!map)
				383	return NULL;
				384
Roman Gushchin	b741f16	2018-09-28 14:45:43 +0000	[diff] [blame^]	385	size = bpf_cgroup_storage_calculate_size(map, &pages);
				386
Roman Gushchin	de9cbba	2018-08-02 14:27:18 -0700	[diff] [blame]	387	if (bpf_map_charge_memlock(map, pages))
				388	return ERR_PTR(-EPERM);
				389
				390	storage = kmalloc_node(sizeof(struct bpf_cgroup_storage),
				391	__GFP_ZERO \| GFP_USER, map->numa_node);
Roman Gushchin	b741f16	2018-09-28 14:45:43 +0000	[diff] [blame^]	392	if (!storage)
				393	goto enomem;
Roman Gushchin	de9cbba	2018-08-02 14:27:18 -0700	[diff] [blame]	394
Roman Gushchin	b741f16	2018-09-28 14:45:43 +0000	[diff] [blame^]	395	flags = __GFP_ZERO \| GFP_USER;
				396
				397	if (stype == BPF_CGROUP_STORAGE_SHARED) {
				398	storage->buf = kmalloc_node(size, flags, map->numa_node);
				399	if (!storage->buf)
				400	goto enomem;
				401	} else {
				402	storage->percpu_buf = __alloc_percpu_gfp(size, 8, flags);
				403	if (!storage->percpu_buf)
				404	goto enomem;
Roman Gushchin	de9cbba	2018-08-02 14:27:18 -0700	[diff] [blame]	405	}
				406
				407	storage->map = (struct bpf_cgroup_storage_map *)map;
				408
				409	return storage;
Roman Gushchin	b741f16	2018-09-28 14:45:43 +0000	[diff] [blame^]	410
				411	enomem:
				412	bpf_map_uncharge_memlock(map, pages);
				413	kfree(storage);
				414	return ERR_PTR(-ENOMEM);
				415	}
				416
				417	static void free_shared_cgroup_storage_rcu(struct rcu_head *rcu)
				418	{
				419	struct bpf_cgroup_storage *storage =
				420	container_of(rcu, struct bpf_cgroup_storage, rcu);
				421
				422	kfree(storage->buf);
				423	kfree(storage);
				424	}
				425
				426	static void free_percpu_cgroup_storage_rcu(struct rcu_head *rcu)
				427	{
				428	struct bpf_cgroup_storage *storage =
				429	container_of(rcu, struct bpf_cgroup_storage, rcu);
				430
				431	free_percpu(storage->percpu_buf);
				432	kfree(storage);
Roman Gushchin	de9cbba	2018-08-02 14:27:18 -0700	[diff] [blame]	433	}
				434
				435	void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage)
				436	{
Roman Gushchin	b741f16	2018-09-28 14:45:43 +0000	[diff] [blame^]	437	enum bpf_cgroup_storage_type stype;
Roman Gushchin	de9cbba	2018-08-02 14:27:18 -0700	[diff] [blame]	438	struct bpf_map *map;
Roman Gushchin	b741f16	2018-09-28 14:45:43 +0000	[diff] [blame^]	439	u32 pages;
Roman Gushchin	de9cbba	2018-08-02 14:27:18 -0700	[diff] [blame]	440
				441	if (!storage)
				442	return;
				443
				444	map = &storage->map->map;
Roman Gushchin	b741f16	2018-09-28 14:45:43 +0000	[diff] [blame^]	445
				446	bpf_cgroup_storage_calculate_size(map, &pages);
Roman Gushchin	de9cbba	2018-08-02 14:27:18 -0700	[diff] [blame]	447	bpf_map_uncharge_memlock(map, pages);
				448
Roman Gushchin	b741f16	2018-09-28 14:45:43 +0000	[diff] [blame^]	449	stype = cgroup_storage_type(map);
				450	if (stype == BPF_CGROUP_STORAGE_SHARED)
				451	call_rcu(&storage->rcu, free_shared_cgroup_storage_rcu);
				452	else
				453	call_rcu(&storage->rcu, free_percpu_cgroup_storage_rcu);
Roman Gushchin	de9cbba	2018-08-02 14:27:18 -0700	[diff] [blame]	454	}
				455
				456	void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
				457	struct cgroup *cgroup,
				458	enum bpf_attach_type type)
				459	{
				460	struct bpf_cgroup_storage_map *map;
				461
				462	if (!storage)
				463	return;
				464
				465	storage->key.attach_type = type;
				466	storage->key.cgroup_inode_id = cgroup->kn->id.id;
				467
				468	map = storage->map;
				469
				470	spin_lock_bh(&map->lock);
				471	WARN_ON(cgroup_storage_insert(map, storage));
				472	list_add(&storage->list, &map->list);
				473	spin_unlock_bh(&map->lock);
				474	}
				475
				476	void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage)
				477	{
				478	struct bpf_cgroup_storage_map *map;
				479	struct rb_root *root;
				480
				481	if (!storage)
				482	return;
				483
				484	map = storage->map;
				485
				486	spin_lock_bh(&map->lock);
				487	root = &map->root;
				488	rb_erase(&storage->node, root);
				489
				490	list_del(&storage->list);
				491	spin_unlock_bh(&map->lock);
				492	}
				493
				494	#endif