Blame - system.c - android.googlesource.com/platform/external/minijail

blob: 63f22d8d525b49c7bcf11851c4f6ed57237bac7a [file] [log] [blame]

Mike Frysinger	50e31fa	2018-01-19 18:59:49 -0500	[diff] [blame]	1	/* Copyright 2017 The Chromium OS Authors. All rights reserved.
				2	* Use of this source code is governed by a BSD-style license that can be
				3	* found in the LICENSE file.
Jorge Lucangeli Obes	0b20877	2017-04-19 14:15:46 -0400	[diff] [blame]	4	*/
				5
				6	#include "system.h"
				7
				8	#include <errno.h>
				9	#include <fcntl.h>
Luis Hector Chavez	7132355	2017-09-05 09:17:22 -0700	[diff] [blame]	10	#include <grp.h>
Jorge Lucangeli Obes	0b20877	2017-04-19 14:15:46 -0400	[diff] [blame]	11	#include <net/if.h>
Luis Hector Chavez	7132355	2017-09-05 09:17:22 -0700	[diff] [blame]	12	#include <pwd.h>
Jorge Lucangeli Obes	0b20877	2017-04-19 14:15:46 -0400	[diff] [blame]	13	#include <stdbool.h>
				14	#include <stdio.h>
				15	#include <string.h>
				16	#include <sys/ioctl.h>
				17	#include <sys/prctl.h>
				18	#include <sys/socket.h>
				19	#include <sys/stat.h>
Luis Hector Chavez	0bacbf8	2018-07-10 20:06:55 -0700	[diff] [blame]	20	#include <sys/statvfs.h>
Jorge Lucangeli Obes	0b20877	2017-04-19 14:15:46 -0400	[diff] [blame]	21	#include <unistd.h>
				22
				23	#include "util.h"
				24
				25	#ifdef HAVE_SECUREBITS_H
				26	#include <linux/securebits.h>
				27	#else
				28	#define SECURE_ALL_BITS 0x55
				29	#define SECURE_ALL_LOCKS (SECURE_ALL_BITS << 1)
				30	#endif
Jorge Lucangeli Obes	a6eb21a	2017-04-20 10:44:00 -0400	[diff] [blame]	31
				32	#define SECURE_BITS_NO_AMBIENT 0x15
				33	#define SECURE_LOCKS_NO_AMBIENT (SECURE_BITS_NO_AMBIENT << 1)
Jorge Lucangeli Obes	0b20877	2017-04-19 14:15:46 -0400	[diff] [blame]	34
				35	/*
				36	* Assert the value of SECURE_ALL_BITS at compile-time.
Jorge Lucangeli Obes	a6eb21a	2017-04-20 10:44:00 -0400	[diff] [blame]	37	* Android devices are currently compiled against 4.4 kernel headers. Kernel 4.3
Jorge Lucangeli Obes	0b20877	2017-04-19 14:15:46 -0400	[diff] [blame]	38	* added a new securebit.
				39	* When a new securebit is added, the new SECURE_ALL_BITS mask will return EPERM
				40	* when used on older kernels. The compile-time assert will catch this situation
				41	* at compile time.
				42	*/
Jorge Lucangeli Obes	a6eb21a	2017-04-20 10:44:00 -0400	[diff] [blame]	43	#if defined(__ANDROID__)
Jorge Lucangeli Obes	0b20877	2017-04-19 14:15:46 -0400	[diff] [blame]	44	_Static_assert(SECURE_ALL_BITS == 0x55, "SECURE_ALL_BITS == 0x55.");
				45	#endif
				46
Luis Hector Chavez	89cbc32	2018-08-06 11:31:15 -0700	[diff] [blame]	47	int secure_keep_caps_locked(void)
				48	{
				49	int bits = prctl(PR_GET_SECUREBITS);
				50	if (bits < 0)
				51	return 0;
				52	return bits & SECBIT_KEEP_CAPS_LOCKED;
				53	}
				54
Jorge Lucangeli Obes	5423421	2018-04-26 11:52:15 -0400	[diff] [blame]	55	int secure_noroot_set_and_locked(uint64_t mask)
				56	{
				57	return (mask & (SECBIT_NOROOT \| SECBIT_NOROOT_LOCKED)) ==
				58	(SECBIT_NOROOT \| SECBIT_NOROOT_LOCKED);
				59	}
				60
Luis Hector Chavez	ec0a2c1	2017-06-29 20:29:57 -0700	[diff] [blame]	61	int lock_securebits(uint64_t skip_mask)
Jorge Lucangeli Obes	0b20877	2017-04-19 14:15:46 -0400	[diff] [blame]	62	{
				63	/*
Jorge Lucangeli Obes	a6eb21a	2017-04-20 10:44:00 -0400	[diff] [blame]	64	* Ambient capabilities can only be raised if they're already present
				65	* in the permitted and inheritable set. Therefore, we don't really
				66	* need to lock the NO_CAP_AMBIENT_RAISE securebit, since we are already
				67	* configuring the permitted and inheritable set.
Jorge Lucangeli Obes	0b20877	2017-04-19 14:15:46 -0400	[diff] [blame]	68	*/
Dylan Reid	a7f4fc9	2017-07-13 18:45:23 -0700	[diff] [blame]	69	unsigned long securebits =
Luis Hector Chavez	ec0a2c1	2017-06-29 20:29:57 -0700	[diff] [blame]	70	(SECURE_BITS_NO_AMBIENT \| SECURE_LOCKS_NO_AMBIENT) & ~skip_mask;
				71	if (!securebits) {
Jorge Lucangeli Obes	5423421	2018-04-26 11:52:15 -0400	[diff] [blame]	72	warn("not locking any securebits");
Luis Hector Chavez	ec0a2c1	2017-06-29 20:29:57 -0700	[diff] [blame]	73	return 0;
				74	}
				75	int securebits_ret = prctl(PR_SET_SECUREBITS, securebits);
Jorge Lucangeli Obes	0b20877	2017-04-19 14:15:46 -0400	[diff] [blame]	76	if (securebits_ret < 0) {
				77	pwarn("prctl(PR_SET_SECUREBITS) failed");
				78	return -1;
				79	}
				80
				81	return 0;
				82	}
				83
				84	int write_proc_file(pid_t pid, const char content, const char basename)
				85	{
				86	int fd, ret;
				87	size_t sz, len;
				88	ssize_t written;
				89	char filename[32];
				90
				91	sz = sizeof(filename);
				92	ret = snprintf(filename, sz, "/proc/%d/%s", pid, basename);
				93	if (ret < 0 \|\| (size_t)ret >= sz) {
				94	warn("failed to generate %s filename", basename);
				95	return -1;
				96	}
				97
				98	fd = open(filename, O_WRONLY \| O_CLOEXEC);
				99	if (fd < 0) {
				100	pwarn("failed to open '%s'", filename);
				101	return -errno;
				102	}
				103
				104	len = strlen(content);
				105	written = write(fd, content, len);
				106	if (written < 0) {
				107	pwarn("failed to write '%s'", filename);
Jorge Lucangeli Obes	673c89d	2018-10-04 16:08:10 -0400	[diff] [blame^]	108	return -errno;
Jorge Lucangeli Obes	0b20877	2017-04-19 14:15:46 -0400	[diff] [blame]	109	}
				110
				111	if ((size_t)written < len) {
				112	warn("failed to write %zu bytes to '%s'", len, filename);
				113	return -1;
				114	}
				115	close(fd);
				116	return 0;
				117	}
				118
				119	/*
				120	* We specifically do not use cap_valid() as that only tells us the last
				121	* valid cap we were compiled against (i.e. what the version of kernel
				122	* headers says). If we run on a different kernel version, then it's not
				123	* uncommon for that to be less (if an older kernel) or more (if a newer
				124	* kernel).
				125	* Normally, we suck up the answer via /proc. On Android, not all processes are
				126	* guaranteed to be able to access '/proc/sys/kernel/cap_last_cap' so we
				127	* programmatically find the value by calling prctl(PR_CAPBSET_READ).
				128	*/
				129	unsigned int get_last_valid_cap(void)
				130	{
				131	unsigned int last_valid_cap = 0;
				132	if (is_android()) {
				133	for (; prctl(PR_CAPBSET_READ, last_valid_cap, 0, 0, 0) >= 0;
				134	++last_valid_cap)
				135	;
				136
				137	/* \|last_valid_cap\| will be the first failing value. */
				138	if (last_valid_cap > 0) {
				139	last_valid_cap--;
				140	}
				141	} else {
				142	const char cap_file[] = "/proc/sys/kernel/cap_last_cap";
				143	FILE *fp = fopen(cap_file, "re");
				144	if (fscanf(fp, "%u", &last_valid_cap) != 1)
				145	pdie("fscanf(%s)", cap_file);
				146	fclose(fp);
				147	}
				148	return last_valid_cap;
				149	}
				150
Jorge Lucangeli Obes	a6eb21a	2017-04-20 10:44:00 -0400	[diff] [blame]	151	int cap_ambient_supported(void)
				152	{
				153	return prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_CHOWN, 0, 0) >=
				154	0;
				155	}
				156
Jorge Lucangeli Obes	0b20877	2017-04-19 14:15:46 -0400	[diff] [blame]	157	int config_net_loopback(void)
				158	{
				159	const char ifname[] = "lo";
				160	int sock;
				161	struct ifreq ifr;
				162
				163	/* Make sure people don't try to add really long names. */
				164	_Static_assert(sizeof(ifname) <= IFNAMSIZ, "interface name too long");
				165
				166	sock = socket(AF_LOCAL, SOCK_DGRAM \| SOCK_CLOEXEC, 0);
				167	if (sock < 0) {
				168	pwarn("socket(AF_LOCAL) failed");
				169	return -1;
				170	}
				171
				172	/*
				173	* Do the equiv of `ip link set up lo`. The kernel will assign
				174	* IPv4 (127.0.0.1) & IPv6 (::1) addresses automatically!
				175	*/
				176	strcpy(ifr.ifr_name, ifname);
				177	if (ioctl(sock, SIOCGIFFLAGS, &ifr) < 0) {
				178	pwarn("ioctl(SIOCGIFFLAGS) failed");
				179	return -1;
				180	}
				181
				182	/* The kernel preserves ifr.ifr_name for use. */
				183	ifr.ifr_flags \|= IFF_UP \| IFF_RUNNING;
				184	if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) {
				185	pwarn("ioctl(SIOCSIFFLAGS) failed");
				186	return -1;
				187	}
				188
				189	close(sock);
				190	return 0;
				191	}
				192
				193	int setup_pipe_end(int fds[2], size_t index)
				194	{
				195	if (index > 1)
				196	return -1;
				197
				198	close(fds[1 - index]);
				199	return fds[index];
				200	}
				201
				202	int setup_and_dupe_pipe_end(int fds[2], size_t index, int fd)
				203	{
				204	if (index > 1)
				205	return -1;
				206
				207	close(fds[1 - index]);
				208	/* dup2(2) the corresponding end of the pipe into \|fd\|. */
				209	return dup2(fds[index], fd);
				210	}
				211
				212	int write_pid_to_path(pid_t pid, const char *path)
				213	{
Mike Frysinger	0b5cffa	2017-08-15 18:06:18 -0400	[diff] [blame]	214	FILE *fp = fopen(path, "we");
Jorge Lucangeli Obes	0b20877	2017-04-19 14:15:46 -0400	[diff] [blame]	215
				216	if (!fp) {
				217	pwarn("failed to open '%s'", path);
				218	return -errno;
				219	}
				220	if (fprintf(fp, "%d\n", (int)pid) < 0) {
				221	/* fprintf(3) does not set errno on failure. */
				222	warn("fprintf(%s) failed", path);
				223	return -1;
				224	}
				225	if (fclose(fp)) {
				226	pwarn("fclose(%s) failed", path);
				227	return -errno;
				228	}
				229
				230	return 0;
				231	}
				232
				233	/*
Mike Frysinger	5fdba4e	2018-01-17 15:39:48 -0500	[diff] [blame]	234	* Create the \|path\| directory and its parents (if need be) with \|mode\|.
				235	* If not \|isdir\|, then \|path\| is actually a file, so the last component
				236	* will not be created.
				237	*/
				238	int mkdir_p(const char *path, mode_t mode, bool isdir)
				239	{
yusukes	059e0bd	2018-03-05 10:22:16 -0800	[diff] [blame]	240	int rc;
Mike Frysinger	5fdba4e	2018-01-17 15:39:48 -0500	[diff] [blame]	241	char *dir = strdup(path);
yusukes	059e0bd	2018-03-05 10:22:16 -0800	[diff] [blame]	242	if (!dir) {
				243	rc = errno;
				244	pwarn("strdup(%s) failed", path);
				245	return -rc;
				246	}
Mike Frysinger	5fdba4e	2018-01-17 15:39:48 -0500	[diff] [blame]	247
				248	/* Starting from the root, work our way out to the end. */
				249	char *p = strchr(dir + 1, '/');
				250	while (p) {
				251	*p = '\0';
				252	if (mkdir(dir, mode) && errno != EEXIST) {
yusukes	059e0bd	2018-03-05 10:22:16 -0800	[diff] [blame]	253	rc = errno;
				254	pwarn("mkdir(%s, 0%o) failed", dir, mode);
Mike Frysinger	5fdba4e	2018-01-17 15:39:48 -0500	[diff] [blame]	255	free(dir);
yusukes	059e0bd	2018-03-05 10:22:16 -0800	[diff] [blame]	256	return -rc;
Mike Frysinger	5fdba4e	2018-01-17 15:39:48 -0500	[diff] [blame]	257	}
				258	*p = '/';
				259	p = strchr(p + 1, '/');
				260	}
				261
				262	/*
				263	* Create the last directory. We still check EEXIST here in case
				264	* of trailing slashes.
				265	*/
				266	free(dir);
yusukes	059e0bd	2018-03-05 10:22:16 -0800	[diff] [blame]	267	if (isdir && mkdir(path, mode) && errno != EEXIST) {
				268	rc = errno;
				269	pwarn("mkdir(%s, 0%o) failed", path, mode);
				270	return -rc;
				271	}
Mike Frysinger	5fdba4e	2018-01-17 15:39:48 -0500	[diff] [blame]	272	return 0;
				273	}
				274
				275	/*
Jorge Lucangeli Obes	0b20877	2017-04-19 14:15:46 -0400	[diff] [blame]	276	* setup_mount_destination: Ensures the mount target exists.
				277	* Creates it if needed and possible.
				278	*/
				279	int setup_mount_destination(const char source, const char dest, uid_t uid,
Luis Hector Chavez	0bacbf8	2018-07-10 20:06:55 -0700	[diff] [blame]	280	uid_t gid, bool bind, unsigned long *mnt_flags)
Jorge Lucangeli Obes	0b20877	2017-04-19 14:15:46 -0400	[diff] [blame]	281	{
				282	int rc;
				283	struct stat st_buf;
Mike Frysinger	eaab420	2017-08-14 14:57:21 -0400	[diff] [blame]	284	bool domkdir;
Jorge Lucangeli Obes	0b20877	2017-04-19 14:15:46 -0400	[diff] [blame]	285
				286	rc = stat(dest, &st_buf);
				287	if (rc == 0) /* destination exists */
				288	return 0;
				289
				290	/*
				291	* Try to create the destination.
				292	* Either make a directory or touch a file depending on the source type.
Mike Frysinger	eaab420	2017-08-14 14:57:21 -0400	[diff] [blame]	293	*
				294	* If the source isn't an absolute path, assume it is a filesystem type
				295	* such as "tmpfs" and create a directory to mount it on. The dest will
				296	* be something like "none" or "proc" which we shouldn't be checking.
Jorge Lucangeli Obes	0b20877	2017-04-19 14:15:46 -0400	[diff] [blame]	297	*/
Mike Frysinger	eaab420	2017-08-14 14:57:21 -0400	[diff] [blame]	298	if (source[0] == '/') {
				299	/* The source is an absolute path -- it better exist! */
				300	rc = stat(source, &st_buf);
yusukes	059e0bd	2018-03-05 10:22:16 -0800	[diff] [blame]	301	if (rc) {
				302	rc = errno;
				303	pwarn("stat(%s) failed", source);
				304	return -rc;
				305	}
Mike Frysinger	eaab420	2017-08-14 14:57:21 -0400	[diff] [blame]	306
				307	/*
				308	* If bind mounting, we only create a directory if the source
				309	* is a directory, else we always bind mount it as a file to
				310	* support device nodes, sockets, etc...
				311	*
				312	* For all other mounts, we assume a block/char source is
				313	* going to want a directory to mount to. If the source is
				314	* something else (e.g. a fifo or socket), this probably will
				315	* not do the right thing, but we'll fail later on when we try
				316	* to mount(), so shouldn't be a big deal.
				317	*/
				318	domkdir = S_ISDIR(st_buf.st_mode) \|\|
				319	(!bind && (S_ISBLK(st_buf.st_mode) \|\|
				320	S_ISCHR(st_buf.st_mode)));
Luis Hector Chavez	0bacbf8	2018-07-10 20:06:55 -0700	[diff] [blame]	321
				322	/* If bind mounting, also grab the mount flags of the source. */
				323	if (bind && mnt_flags) {
				324	struct statvfs stvfs_buf;
				325	rc = statvfs(source, &stvfs_buf);
				326	if (rc) {
				327	rc = errno;
				328	pwarn(
				329	"failed to look up mount flags: source=%s",
				330	source);
				331	return -rc;
				332	}
				333	*mnt_flags = stvfs_buf.f_flag;
				334	}
Mike Frysinger	eaab420	2017-08-14 14:57:21 -0400	[diff] [blame]	335	} else {
				336	/* The source is a relative path -- assume it's a pseudo fs. */
				337
				338	/* Disallow relative bind mounts. */
yusukes	059e0bd	2018-03-05 10:22:16 -0800	[diff] [blame]	339	if (bind) {
				340	warn("relative bind-mounts are not allowed: source=%s",
				341	source);
Mike Frysinger	eaab420	2017-08-14 14:57:21 -0400	[diff] [blame]	342	return -EINVAL;
yusukes	059e0bd	2018-03-05 10:22:16 -0800	[diff] [blame]	343	}
Mike Frysinger	eaab420	2017-08-14 14:57:21 -0400	[diff] [blame]	344
				345	domkdir = true;
				346	}
				347
Mike Frysinger	5fdba4e	2018-01-17 15:39:48 -0500	[diff] [blame]	348	/*
				349	* Now that we know what we want to do, do it!
				350	* We always create the intermediate dirs and the final path with 0755
				351	* perms and root/root ownership. This shouldn't be a problem because
				352	* the actual mount will set those perms/ownership on the mount point
				353	* which is all people should need to access it.
				354	*/
yusukes	059e0bd	2018-03-05 10:22:16 -0800	[diff] [blame]	355	rc = mkdir_p(dest, 0755, domkdir);
				356	if (rc)
				357	return rc;
Mike Frysinger	5fdba4e	2018-01-17 15:39:48 -0500	[diff] [blame]	358	if (!domkdir) {
Mike Frysinger	eaab420	2017-08-14 14:57:21 -0400	[diff] [blame]	359	int fd = open(dest, O_RDWR \| O_CREAT \| O_CLOEXEC, 0700);
yusukes	059e0bd	2018-03-05 10:22:16 -0800	[diff] [blame]	360	if (fd < 0) {
				361	rc = errno;
				362	pwarn("open(%s) failed", dest);
				363	return -rc;
				364	}
Jorge Lucangeli Obes	0b20877	2017-04-19 14:15:46 -0400	[diff] [blame]	365	close(fd);
				366	}
yusukes	059e0bd	2018-03-05 10:22:16 -0800	[diff] [blame]	367	if (chown(dest, uid, gid)) {
				368	rc = errno;
				369	pwarn("chown(%s, %u, %u) failed", dest, uid, gid);
				370	return -rc;
				371	}
yusukes	76a9d74	2018-03-05 10:20:22 -0800	[diff] [blame]	372	return 0;
Jorge Lucangeli Obes	0b20877	2017-04-19 14:15:46 -0400	[diff] [blame]	373	}
Luis Hector Chavez	7132355	2017-09-05 09:17:22 -0700	[diff] [blame]	374
				375	/*
				376	* lookup_user: Gets the uid/gid for the given username.
				377	*/
				378	int lookup_user(const char user, uid_t uid, gid_t *gid)
				379	{
				380	char *buf = NULL;
				381	struct passwd pw;
				382	struct passwd *ppw = NULL;
				383	ssize_t sz = sysconf(_SC_GETPW_R_SIZE_MAX);
				384	if (sz == -1)
				385	sz = 65536; /* your guess is as good as mine... */
				386
				387	/*
				388	* sysconf(_SC_GETPW_R_SIZE_MAX), under glibc, is documented to return
				389	* the maximum needed size of the buffer, so we don't have to search.
				390	*/
				391	buf = malloc(sz);
				392	if (!buf)
				393	return -ENOMEM;
				394	getpwnam_r(user, &pw, buf, sz, &ppw);
				395	/*
				396	* We're safe to free the buffer here. The strings inside \|pw\| point
				397	* inside \|buf\|, but we don't use any of them; this leaves the pointers
				398	* dangling but it's safe. \|ppw\| points at \|pw\| if getpwnam_r(3)
				399	* succeeded.
				400	*/
				401	free(buf);
				402	/* getpwnam_r(3) does not set errno when \|ppw\| is NULL. */
				403	if (!ppw)
				404	return -1;
				405
				406	*uid = ppw->pw_uid;
				407	*gid = ppw->pw_gid;
				408	return 0;
				409	}
				410
				411	/*
				412	* lookup_group: Gets the gid for the given group name.
				413	*/
				414	int lookup_group(const char group, gid_t gid)
				415	{
				416	char *buf = NULL;
				417	struct group gr;
				418	struct group *pgr = NULL;
				419	ssize_t sz = sysconf(_SC_GETGR_R_SIZE_MAX);
				420	if (sz == -1)
				421	sz = 65536; /* and mine is as good as yours, really */
				422
				423	/*
				424	* sysconf(_SC_GETGR_R_SIZE_MAX), under glibc, is documented to return
				425	* the maximum needed size of the buffer, so we don't have to search.
				426	*/
				427	buf = malloc(sz);
				428	if (!buf)
				429	return -ENOMEM;
				430	getgrnam_r(group, &gr, buf, sz, &pgr);
				431	/*
				432	* We're safe to free the buffer here. The strings inside gr point
				433	* inside buf, but we don't use any of them; this leaves the pointers
				434	* dangling but it's safe. pgr points at gr if getgrnam_r succeeded.
				435	*/
				436	free(buf);
				437	/* getgrnam_r(3) does not set errno when \|pgr\| is NULL. */
				438	if (!pgr)
				439	return -1;
				440
				441	*gid = pgr->gr_gid;
				442	return 0;
				443	}