minijail0: move ld_preload communication to a pipe
Moves minijail0 communication over to using a file descriptor instead
of packing it in an environment variable. The primary reasoning is to
allow seccomp filter policies to be passed to a child process.
However, this will make it easier for minijail behavior to stay
consistent across minijail_run and minijail_enter if serialization can
be made more generic. For instance, -g does not properly traverse a
preload instead relying on inheritance which is inconsistent depending
on pidns usage.
BUG=chromium-os:19459
TEST=tested -[pvrcu] with /bin/cat /proc/self/status
Change-Id: Id1845b86517ce0a6a9d6bcd85f700ea459d7c8f4
Reviewed-on: http://gerrit.chromium.org/gerrit/7890
Reviewed-by: Elly Jones <ellyjones@chromium.org>
Tested-by: Will Drewry <wad@chromium.org>
diff --git a/libminijail.c b/libminijail.c
index 08482e8..a9ea1b6 100644
--- a/libminijail.c
+++ b/libminijail.c
@@ -14,6 +14,7 @@
#include <pwd.h>
#include <sched.h>
#include <signal.h>
+#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -434,6 +435,27 @@
_exit(WEXITSTATUS(init_exitstatus));
}
+static int write_cmd(int fd, const char *fmt, ...) {
+ char cmd[MINIJAIL_MAX_ARG_LINE];
+ ssize_t written;
+ int r;
+ va_list ap;
+
+ va_start(ap, fmt);
+ r = vsnprintf(cmd, sizeof(cmd), fmt, ap);
+ va_end(ap);
+
+ if (r <= 0)
+ return -EFAULT;
+ if ((size_t) r >= sizeof(cmd))
+ return -E2BIG;
+
+ written = write(fd, cmd, r);
+ if (written != r)
+ return -EFAULT;
+ return 0;
+}
+
/** @brief Move any commands that need to be done post-exec into an environment
* variable
* @param j Jail to move commands from.
@@ -442,62 +464,35 @@
* and adds them to the environment; they will be deserialized later (see
* __minijail_preloaded) and executed inside the execve()'d process.
*/
-static int move_commands_to_env(struct minijail *j) {
- const int kEnvBufSize = 256;
- const char *ptrace = j->flags.ptrace ? "ptrace " : "";
- const char *seccomp = j->flags.seccomp ? "seccomp " : "";
- char setuid[64] = "";
- char caps[32] = "";
- char *newenv;
- char *oldenv;
- char *envbuf = malloc(kEnvBufSize);
- int r;
-
- if (!envbuf)
- return -ENOMEM;
-
- if (j->flags.caps)
- snprintf(caps, sizeof(caps), "caps=%" PRIx64 " ", j->caps);
-
- if (j->flags.uid && j->flags.caps) {
- snprintf(setuid, sizeof(setuid), "uid=%d ", j->uid);
- j->flags.uid = 0;
- }
-
- j->flags.caps = 0;
- j->flags.ptrace = 0;
- j->flags.seccomp = 0;
+static int send_commands_to_child(struct minijail *j, int fd) {
+ if (j->flags.caps && write_cmd(fd, "caps=%" PRIx64 "\n", j->caps))
+ return -EFAULT;
+ if (j->flags.uid && write_cmd(fd, "uid=%d\n", j->uid))
+ return -EFAULT;
+ if (j->flags.ptrace && write_cmd(fd, "ptrace\n"))
+ return -EFAULT;
+ if (j->flags.seccomp && write_cmd(fd, "seccomp\n"))
+ return -EFAULT;
if (j->flags.seccomp_filter)
warn("TODO(wad) seccomp_filter is installed in the parent which "
"requires overly permissive rules for execve(2)ing.");
- r = snprintf(envbuf, kEnvBufSize, "%s%s%s%s", setuid, ptrace, seccomp, caps);
- if (!r) {
- /* No commands generated, so no preload needed :) */
- free(envbuf);
- return 0;
- }
- if (r == kEnvBufSize) {
- free(envbuf);
- return -E2BIG;
- }
+ return write_cmd(fd, "eom\n");
+}
- oldenv = getenv(kLdPreloadEnvVar) ? : "";
- newenv = malloc(strlen(oldenv) + 2 + strlen(PRELOADPATH));
- if (!newenv) {
- free(envbuf);
+static int setup_preload(void) {
+ char *oldenv = getenv(kLdPreloadEnvVar) ? : "";
+ char *newenv = malloc(strlen(oldenv) + 2 + strlen(PRELOADPATH));
+ if (!newenv)
return -ENOMEM;
- }
/* Only insert a separating space if we have something to separate... */
sprintf(newenv, "%s%s%s", oldenv, strlen(oldenv) ? " " : "", PRELOADPATH);
/* setenv() makes a copy of the string we give it */
setenv(kLdPreloadEnvVar, newenv, 1);
- setenv(kCommandEnvVar, envbuf, 1);
free(newenv);
- free(envbuf);
return 0;
}
@@ -505,6 +500,8 @@
unsigned int pidns = j->flags.pids ? CLONE_NEWPID : 0;
char *oldenv, *oldenv_copy = NULL;
pid_t r;
+ int pipe_fds[2];
+ char fd_buf[11];
oldenv = getenv(kLdPreloadEnvVar);
if (oldenv) {
@@ -512,14 +509,20 @@
if (!oldenv_copy)
return -ENOMEM;
}
-
- r = move_commands_to_env(j);
- if (r) {
- /* No environment variable is modified if move_commands_to_env returns
- * a non-zero value. */
- free(oldenv_copy);
+ r = setup_preload();
+ if (r)
return r;
- }
+
+ /* Before we fork(2) and execve(2) the child process, we need to open
+ * a pipe(2) to send the minijail configuration over.
+ */
+ r = pipe(pipe_fds);
+ if (r)
+ return r;
+ r = snprintf(fd_buf, sizeof(fd_buf), "%d", pipe_fds[0]);
+ if (r <= 0)
+ return -EINVAL;
+ setenv(kFdEnvVar, fd_buf, 1);
r = syscall(SYS_clone, pidns | SIGCHLD, NULL);
if (r > 0) {
@@ -529,8 +532,15 @@
} else {
unsetenv(kLdPreloadEnvVar);
}
- unsetenv(kCommandEnvVar);
+ unsetenv(kFdEnvVar);
j->initpid = r;
+ close(pipe_fds[0]);
+ r = send_commands_to_child(j, pipe_fds[1]);
+ close(pipe_fds[1]);
+ if (r) {
+ kill(j->initpid, SIGKILL);
+ die("failed to send marshalled minijail");
+ }
return 0;
}
@@ -539,6 +549,15 @@
if (r < 0)
return r;
+ j->flags.uid = 0;
+ /* TODO(wad) gid should be sent over preload and not done in advance.
+ * j->flags.gid = 0;
+ */
+ j->flags.usergroups = 0;
+ j->flags.caps = 0;
+ j->flags.ptrace = 0;
+ j->flags.seccomp = 0;
+
j->flags.pids = 0;
/* Jail this process and its descendants... */
@@ -555,6 +574,7 @@
init(r); /* never returns */
}
+
/* If we aren't pid-namespaced:
* calling process
* -> execve()-ing process