blob: 46df1491969a14da0902324c36779554637516e8 [file] [log] [blame]
Elly Jonescd7a9042011-07-22 13:56:51 -04001/* Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be
Will Drewry32ac9f52011-08-18 21:36:27 -05003 * found in the LICENSE file.
4 */
Elly Jonescd7a9042011-07-22 13:56:51 -04005
6#define _BSD_SOURCE
7#define _GNU_SOURCE
Will Drewry32ac9f52011-08-18 21:36:27 -05008#include <ctype.h>
Elly Jonescd7a9042011-07-22 13:56:51 -04009#include <errno.h>
10#include <grp.h>
11#include <inttypes.h>
12#include <linux/capability.h>
13#include <linux/securebits.h>
14#include <pwd.h>
15#include <sched.h>
16#include <signal.h>
Will Drewry2f54b6a2011-09-16 13:45:31 -050017#include <stdarg.h>
Elly Jonescd7a9042011-07-22 13:56:51 -040018#include <stdio.h>
19#include <stdlib.h>
20#include <string.h>
21#include <syscall.h>
22#include <sys/capability.h>
23#include <sys/mount.h>
24#include <sys/prctl.h>
25#include <sys/wait.h>
26#include <syslog.h>
27#include <unistd.h>
28
29#include "libminijail.h"
Will Drewry32ac9f52011-08-18 21:36:27 -050030#include "libsyscalls.h"
Elly Jonescd7a9042011-07-22 13:56:51 -040031#include "libminijail-private.h"
32
Will Drewry32ac9f52011-08-18 21:36:27 -050033/* Until these are reliably available in linux/prctl.h */
34#ifndef PR_SET_SECCOMP_FILTER
35# define PR_SECCOMP_FILTER_SYSCALL 0
36# define PR_SECCOMP_FILTER_EVENT 1
37# define PR_GET_SECCOMP_FILTER 35
38# define PR_SET_SECCOMP_FILTER 36
39# define PR_CLEAR_SECCOMP_FILTER 37
40#endif
41
Will Drewry32ac9f52011-08-18 21:36:27 -050042#define die(_msg, ...) do { \
43 syslog(LOG_ERR, "libminijail: " _msg, ## __VA_ARGS__); \
44 abort(); \
45} while (0)
Elly Jonescd7a9042011-07-22 13:56:51 -040046
Will Drewry32ac9f52011-08-18 21:36:27 -050047#define pdie(_msg, ...) \
48 die(_msg ": %s", ## __VA_ARGS__, strerror(errno))
49
50#define warn(_msg, ...) \
51 syslog(LOG_WARNING, "libminijail: " _msg, ## __VA_ARGS__)
Elly Jonescd7a9042011-07-22 13:56:51 -040052
53struct minijail *minijail_new(void) {
54 struct minijail *j = malloc(sizeof(*j));
55 if (j)
56 memset(j, 0, sizeof(*j));
57 return j;
58}
59
60void minijail_change_uid(struct minijail *j, uid_t uid) {
61 if (uid == 0)
62 die("useless change to uid 0");
63 j->uid = uid;
64 j->flags.uid = 1;
65}
66
67void minijail_change_gid(struct minijail *j, gid_t gid) {
68 if (gid == 0)
69 die("useless change to gid 0");
70 j->gid = gid;
71 j->flags.gid = 1;
72}
73
74int minijail_change_user(struct minijail *j, const char *user) {
75 /* In principle this should use getpwnam(), but:
76 * 1) getpwnam_r() isn't actually reentrant anyway, since it uses a
77 * statically-allocated file descriptor internally
78 * 2) fgetpwnam() (by analogy with fgetpwent) would solve (1) except that it
79 * doesn't exist
80 * 3) sysconf() (see getpwnam_r(3)) is allowed to return a size that is not
81 * large enough, which means having to loop on growing the buffer we pass
82 * in
83 */
84 struct passwd *pw = getpwnam(user);
85 if (!pw)
86 return errno;
87 minijail_change_uid(j, pw->pw_uid);
Will Drewry2ddaad02011-09-16 11:36:08 -050088 j->user = strdup(user);
89 if (!j->user)
90 return -ENOMEM;
Elly Jonescd7a9042011-07-22 13:56:51 -040091 j->usergid = pw->pw_gid;
92 return 0;
93}
94
95int minijail_change_group(struct minijail *j, const char *group) {
96 /* In principle this should use getgrnam(), but:
97 * 1) getgrnam_r() isn't actually reentrant anyway, since it uses a
98 * statically-allocated file descriptor internally
99 * 2) fgetgrnam() (by analogy with fgetgrent) would solve (1) except that it
100 * doesn't exist
101 * 3) sysconf() (see getgrnam_r(3)) is allowed to return a size that is not
102 * large enough, which means having to loop on growing the buffer we pass
103 * in
104 */
105 struct group *gr = getgrnam(group);
106 if (!gr)
107 return errno;
108 minijail_change_gid(j, gr->gr_gid);
109 return 0;
110}
111
112void minijail_use_seccomp(struct minijail *j) {
113 j->flags.seccomp = 1;
114}
115
Will Drewry32ac9f52011-08-18 21:36:27 -0500116void minijail_use_seccomp_filter(struct minijail *j) {
117 j->flags.seccomp_filter = 1;
118}
119
Elly Jonescd7a9042011-07-22 13:56:51 -0400120void minijail_use_caps(struct minijail *j, uint64_t capmask) {
121 j->caps = capmask;
122 j->flags.caps = 1;
123}
124
125void minijail_namespace_vfs(struct minijail *j) {
126 j->flags.vfs = 1;
127}
128
129void minijail_namespace_pids(struct minijail *j) {
130 j->flags.pids = 1;
131}
132
133void minijail_remount_readonly(struct minijail *j) {
134 j->flags.vfs = 1;
135 j->flags.readonly = 1;
136}
137
138void minijail_inherit_usergroups(struct minijail *j) {
139 j->flags.usergroups = 1;
140}
141
142void minijail_disable_ptrace(struct minijail *j) {
143 j->flags.ptrace = 1;
144}
145
Will Drewry32ac9f52011-08-18 21:36:27 -0500146int minijail_add_seccomp_filter(struct minijail *j, int nr,
147 const char *filter) {
148 struct seccomp_filter *sf;
149 if (!filter || nr < 0)
150 return -EINVAL;
151
152 sf = malloc(sizeof(*sf));
153 if (!sf)
154 return -ENOMEM;
155 sf->nr = nr;
156 sf->filter = strndup(filter, MINIJAIL_MAX_SECCOMP_FILTER_LINE);
157 if (!sf->filter) {
158 free(sf);
159 return -ENOMEM;
160 }
161
162 if (!j->filters) {
163 j->filters = sf;
164 sf->next = sf;
165 sf->prev = sf;
166 return 0;
167 }
168 sf->next = j->filters;
169 sf->prev = j->filters->prev;
170 sf->prev->next = sf;
171 j->filters->prev = sf;
172 return 0;
173}
174
175int minijail_lookup_syscall(const char *name) {
176 const struct syscall_entry *entry = syscall_table;
177 for (; entry->name && entry->nr >= 0; ++entry)
178 if (!strcmp(entry->name, name))
179 return entry->nr;
180 return -1;
181}
182
183static char *strip(char *s) {
184 char *end;
185 while (*s && isblank(*s))
186 s++;
187 end = s + strlen(s) - 1;
188 while (*end && (isblank(*end) || *end == '\n'))
189 end--;
190 *(end+1) = '\0';
191 return s;
192}
193
194void minijail_parse_seccomp_filters(struct minijail *j, const char *path) {
195 FILE *file = fopen(path, "r");
196 char line[MINIJAIL_MAX_SECCOMP_FILTER_LINE];
197 int count = 1;
198 if (!file)
199 pdie("failed to open seccomp filters file");
200
201 /* Format is simple:
202 * syscall_name<COLON><FILTER STRING>[\n|EOF]
203 * #...comment...
204 * <empty line?
205 */
206 while (fgets(line, sizeof(line), file)) {
207 char *filter = line;
208 char *name = strsep(&filter, ":");
209 char *name_end = NULL;
210 int nr = -1;
211
212 if (!name)
213 die("invalid filter on line %d", count);
214
215 name = strip(name);
216
217 if (!filter) {
218 if (strlen(name))
219 die("invalid filter on line %d", count);
220 /* Allow empty lines */
221 continue;
222 }
223
224 /* Allow comment lines */
225 if (*name == '#')
226 continue;
227
228 filter = strip(filter);
229
230 /* Take direct syscall numbers */
231 nr = strtol(name, &name_end, 0);
232 /* Or fail-over to using names */
233 if (*name_end != '\0')
234 nr = minijail_lookup_syscall(name);
235 if (nr < 0)
236 die("syscall '%s' unknown", name);
237
238 if (minijail_add_seccomp_filter(j, nr, filter))
239 pdie("failed to add filter for syscall '%s'", name);
240 }
241 fclose(file);
242}
243
Will Drewry2ddaad02011-09-16 11:36:08 -0500244size_t minijail_size(const struct minijail *j) {
245 size_t bytes = sizeof(*j);
246 if (j->user)
247 bytes += strlen(j->user) + 1;
248 /* TODO(wad) if (seccomp_filter) */
249 return bytes;
250}
251
252void minijail_preenter(struct minijail *j) {
253 /* Strip out options which are minijail_run() only. */
254 j->flags.pids = 0;
255 j->flags.vfs = 0;
256 j->flags.readonly = 0;
257}
258
259int minijail_marshal(const struct minijail *j, char *buf, size_t available) {
260 size_t total = sizeof(*j);
261 if (available < total)
262 return -ENOSPC;
263 available -= total;
264 memcpy(buf, (void *) j, sizeof(*j));
265 if (j->user) {
266 size_t len = strlen(j->user) + 1;
267 if (available < len)
268 return -ENOSPC;
269 memcpy(buf + total, j->user, len);
270 available -= len;
271 total += len;
272 }
273 return 0;
274}
275
276int minijail_unmarshal(struct minijail *j, char *serialized, size_t length) {
277 if (length < sizeof(*j))
278 return -EINVAL;
279 memcpy((void *) j, serialized, sizeof(*j));
280 serialized += sizeof(*j);
281 length -= sizeof(*j);
282 if (j->user) { /* stale pointer */
283 if (!length)
284 return -EINVAL;
285 j->user = strndup(serialized, length);
286 length -= strlen(j->user) + 1;
287 }
288 return 0;
289}
290
291void minijail_prefork(struct minijail *j) {
292 j->flags.uid = 0;
293 j->flags.caps = 0;
294 j->flags.seccomp = 0;
295 j->flags.usergroups = 0;
296 j->flags.ptrace = 0;
297 j->flags.seccomp_filter = 0;
298 if (j->user)
299 free(j->user);
300 j->user = NULL;
301}
302
Elly Jonescd7a9042011-07-22 13:56:51 -0400303static int remount_readonly(void) {
304 const char *kProcPath = "/proc";
305 const unsigned int kSafeFlags = MS_NODEV | MS_NOEXEC | MS_NOSUID;
306 /* Right now, we're holding a reference to our parent's old mount of /proc in
307 * our namespace, which means using MS_REMOUNT here would mutate our parent's
308 * mount as well, even though we're in a VFS namespace (!). Instead, remove
309 * their mount from our namespace and make our own. */
310 if (umount(kProcPath))
311 return errno;
312 if (mount("", kProcPath, "proc", kSafeFlags | MS_RDONLY, ""))
313 return errno;
314 return 0;
315}
316
317static void drop_caps(const struct minijail *j) {
318 cap_t caps = cap_get_proc();
319 cap_value_t raise_flag[1];
320 unsigned int i;
321 if (!caps)
322 die("can't get process caps");
323 if (cap_clear_flag(caps, CAP_INHERITABLE))
324 die("can't clear inheritable caps");
325 if (cap_clear_flag(caps, CAP_EFFECTIVE))
326 die("can't clear effective caps");
327 if (cap_clear_flag(caps, CAP_PERMITTED))
328 die("can't clear permitted caps");
329 for (i = 0; i < sizeof(j->caps) * 8 && cap_valid((int)i); ++i) {
330 if (i != CAP_SETPCAP && !(j->caps & (1 << i)))
331 continue;
332 raise_flag[0] = i;
333 if (cap_set_flag(caps, CAP_EFFECTIVE, 1, raise_flag, CAP_SET))
334 die("can't add effective cap");
335 if (cap_set_flag(caps, CAP_PERMITTED, 1, raise_flag, CAP_SET))
336 die("can't add permitted cap");
337 if (cap_set_flag(caps, CAP_INHERITABLE, 1, raise_flag, CAP_SET))
338 die("can't add inheritable cap");
339 }
340 if (cap_set_proc(caps))
341 die("can't apply cleaned capset");
342 cap_free(caps);
343 for (i = 0; i < sizeof(j->caps) * 8 && cap_valid((int)i); ++i) {
344 if (j->caps & (1 << i))
345 continue;
346 if (prctl(PR_CAPBSET_DROP, i))
347 pdie("prctl(PR_CAPBSET_DROP)");
348 }
349}
350
Will Drewry32ac9f52011-08-18 21:36:27 -0500351static int setup_seccomp_filters(const struct minijail *j) {
352 const struct seccomp_filter *sf = j->filters;
353 int ret = 0;
354 int broaden = 0;
355
356 /* No filters installed isn't necessarily an error. */
357 if (!sf)
358 return ret;
359
360 do {
361 errno = 0;
362 ret = prctl(PR_SET_SECCOMP_FILTER, PR_SECCOMP_FILTER_SYSCALL,
363 sf->nr, broaden ? "1" : sf->filter);
364 if (ret) {
365 switch (errno) {
366 case ENOSYS:
367 /* TODO(wad) make this a config option */
368 if (broaden)
369 die("CONFIG_SECCOMP_FILTER is not supported by your kernel");
370 warn("missing CONFIG_FTRACE_SYSCALLS; relaxing the filter for %d",
371 sf->nr);
372 broaden = 1;
373 continue;
374 case E2BIG:
375 warn("seccomp filter too long: %d", sf->nr);
376 pdie("filter too long");
377 case ENOSPC:
378 pdie("too many seccomp filters");
379 case EPERM:
380 warn("syscall filter disallowed for %d", sf->nr);
381 pdie("failed to install seccomp filter");
382 case EINVAL:
383 warn("seccomp filter or call method is invalid. %d:'%s'",
384 sf->nr, sf->filter);
385 default:
386 pdie("failed to install seccomp filter");
387 }
388 }
389 sf = sf->next;
390 broaden = 0;
391 } while (sf != j->filters);
392 return ret;
393}
394
Elly Jonescd7a9042011-07-22 13:56:51 -0400395void minijail_enter(const struct minijail *j) {
Will Drewry32ac9f52011-08-18 21:36:27 -0500396 int ret;
Elly Jonescd7a9042011-07-22 13:56:51 -0400397 if (j->flags.pids)
398 die("tried to enter a pid-namespaced jail; try minijail_run()?");
399
Will Drewry32ac9f52011-08-18 21:36:27 -0500400 ret = setup_seccomp_filters(j);
401 if (j->flags.seccomp_filter && ret)
402 die("failed to configure seccomp filters");
403
Elly Jonescd7a9042011-07-22 13:56:51 -0400404 if (j->flags.usergroups && !j->user)
405 die("usergroup inheritance without username");
406
407 /* We can't recover from failures if we've dropped privileges partially,
408 * so we don't even try. If any of our operations fail, we abort() the
409 * entire process. */
410 if (j->flags.vfs && unshare(CLONE_NEWNS))
411 pdie("unshare");
412
413 if (j->flags.readonly && remount_readonly())
414 pdie("remount");
415
416 if (j->flags.caps) {
417 /* POSIX capabilities are a bit tricky. If we drop our capability to change
418 * uids, our attempt to use setuid() below will fail. Hang on to root caps
419 * across setuid(), then lock securebits. */
420 if (prctl(PR_SET_KEEPCAPS, 1))
421 pdie("prctl(PR_SET_KEEPCAPS)");
422 if (prctl(PR_SET_SECUREBITS, SECURE_ALL_BITS | SECURE_ALL_LOCKS))
423 pdie("prctl(PR_SET_SECUREBITS)");
424 }
425
Will Drewry32ac9f52011-08-18 21:36:27 -0500426 if (j->flags.usergroups && initgroups(j->user, j->usergid)) {
Elly Jonescd7a9042011-07-22 13:56:51 -0400427 pdie("initgroups");
Will Drewry32ac9f52011-08-18 21:36:27 -0500428 } else if (!j->flags.usergroups && setgroups(0, NULL)) {
Elly Jonescd7a9042011-07-22 13:56:51 -0400429 pdie("setgroups");
Will Drewry32ac9f52011-08-18 21:36:27 -0500430 }
Elly Jonescd7a9042011-07-22 13:56:51 -0400431
432 if (j->flags.gid && setresgid(j->gid, j->gid, j->gid))
433 pdie("setresgid");
434
435 if (j->flags.uid && setresuid(j->uid, j->uid, j->uid))
436 pdie("setresuid");
437
438 if (j->flags.caps)
439 drop_caps(j);
440
441 /* seccomp has to come last since it cuts off all the other
442 * privilege-dropping syscalls :) */
Will Drewry32ac9f52011-08-18 21:36:27 -0500443 if (j->flags.seccomp_filter && prctl(PR_SET_SECCOMP, 13))
444 pdie("prctl(PR_SET_SECCOMP, 13)");
445
Elly Jonescd7a9042011-07-22 13:56:51 -0400446 if (j->flags.seccomp && prctl(PR_SET_SECCOMP, 1))
447 pdie("prctl(PR_SET_SECCOMP)");
448}
449
450static int init_exitstatus = 0;
451
452static void init_term(int __attribute__((unused)) sig) {
453 _exit(init_exitstatus);
454}
455
456static int init(pid_t rootpid) {
457 pid_t pid;
458 int status;
459 signal(SIGTERM, init_term); /* so that we exit with the right status */
460 while ((pid = wait(&status)) > 0) {
461 /* This loop will only end when either there are no processes left inside
462 * our pid namespace or we get a signal. */
463 if (pid == rootpid)
464 init_exitstatus = status;
465 }
466 if (!WIFEXITED(init_exitstatus))
467 _exit(MINIJAIL_ERR_INIT);
468 _exit(WEXITSTATUS(init_exitstatus));
469}
470
Will Drewry2f54b6a2011-09-16 13:45:31 -0500471static int write_cmd(int fd, const char *fmt, ...) {
472 char cmd[MINIJAIL_MAX_ARG_LINE];
473 ssize_t written;
474 int r;
475 va_list ap;
476
477 va_start(ap, fmt);
478 r = vsnprintf(cmd, sizeof(cmd), fmt, ap);
479 va_end(ap);
480
481 if (r <= 0)
482 return -EFAULT;
483 if ((size_t) r >= sizeof(cmd))
484 return -E2BIG;
485
486 written = write(fd, cmd, r);
487 if (written != r)
488 return -EFAULT;
489 return 0;
490}
491
Elly Jonescd7a9042011-07-22 13:56:51 -0400492/** @brief Move any commands that need to be done post-exec into an environment
493 * variable
494 * @param j Jail to move commands from.
495 *
496 * Serializes post-exec() commands into a string, removes them from the jail,
497 * and adds them to the environment; they will be deserialized later (see
498 * __minijail_preloaded) and executed inside the execve()'d process.
499 */
Will Drewry2f54b6a2011-09-16 13:45:31 -0500500static int send_commands_to_child(struct minijail *j, int fd) {
501 if (j->flags.caps && write_cmd(fd, "caps=%" PRIx64 "\n", j->caps))
502 return -EFAULT;
503 if (j->flags.uid && write_cmd(fd, "uid=%d\n", j->uid))
504 return -EFAULT;
505 if (j->flags.ptrace && write_cmd(fd, "ptrace\n"))
506 return -EFAULT;
507 if (j->flags.seccomp && write_cmd(fd, "seccomp\n"))
508 return -EFAULT;
Elly Jonescd7a9042011-07-22 13:56:51 -0400509
Will Drewry32ac9f52011-08-18 21:36:27 -0500510 if (j->flags.seccomp_filter)
511 warn("TODO(wad) seccomp_filter is installed in the parent which "
512 "requires overly permissive rules for execve(2)ing.");
513
Will Drewry2f54b6a2011-09-16 13:45:31 -0500514 return write_cmd(fd, "eom\n");
515}
Elly Jonescd7a9042011-07-22 13:56:51 -0400516
Will Drewry2f54b6a2011-09-16 13:45:31 -0500517static int setup_preload(void) {
518 char *oldenv = getenv(kLdPreloadEnvVar) ? : "";
519 char *newenv = malloc(strlen(oldenv) + 2 + strlen(PRELOADPATH));
520 if (!newenv)
Elly Jonescd7a9042011-07-22 13:56:51 -0400521 return -ENOMEM;
Elly Jonescd7a9042011-07-22 13:56:51 -0400522
523 /* Only insert a separating space if we have something to separate... */
524 sprintf(newenv, "%s%s%s", oldenv, strlen(oldenv) ? " " : "", PRELOADPATH);
525
526 /* setenv() makes a copy of the string we give it */
Ben Chan541c7e52011-08-26 14:55:53 -0700527 setenv(kLdPreloadEnvVar, newenv, 1);
Elly Jonescd7a9042011-07-22 13:56:51 -0400528 free(newenv);
Elly Jonescd7a9042011-07-22 13:56:51 -0400529 return 0;
530}
531
532int minijail_run(struct minijail *j, const char *filename, char *const argv[]) {
533 unsigned int pidns = j->flags.pids ? CLONE_NEWPID : 0;
Ben Chan541c7e52011-08-26 14:55:53 -0700534 char *oldenv, *oldenv_copy = NULL;
Elly Jonescd7a9042011-07-22 13:56:51 -0400535 pid_t r;
Will Drewry2f54b6a2011-09-16 13:45:31 -0500536 int pipe_fds[2];
537 char fd_buf[11];
Ben Chan541c7e52011-08-26 14:55:53 -0700538
539 oldenv = getenv(kLdPreloadEnvVar);
540 if (oldenv) {
541 oldenv_copy = strdup(oldenv);
542 if (!oldenv_copy)
543 return -ENOMEM;
544 }
Will Drewry2f54b6a2011-09-16 13:45:31 -0500545 r = setup_preload();
546 if (r)
Elly Jonescd7a9042011-07-22 13:56:51 -0400547 return r;
Will Drewry2f54b6a2011-09-16 13:45:31 -0500548
549 /* Before we fork(2) and execve(2) the child process, we need to open
550 * a pipe(2) to send the minijail configuration over.
551 */
552 r = pipe(pipe_fds);
553 if (r)
554 return r;
555 r = snprintf(fd_buf, sizeof(fd_buf), "%d", pipe_fds[0]);
556 if (r <= 0)
557 return -EINVAL;
558 setenv(kFdEnvVar, fd_buf, 1);
Elly Jonescd7a9042011-07-22 13:56:51 -0400559
560 r = syscall(SYS_clone, pidns | SIGCHLD, NULL);
561 if (r > 0) {
Ben Chan541c7e52011-08-26 14:55:53 -0700562 if (oldenv_copy) {
563 setenv(kLdPreloadEnvVar, oldenv_copy, 1);
564 free(oldenv_copy);
565 } else {
566 unsetenv(kLdPreloadEnvVar);
567 }
Will Drewry2f54b6a2011-09-16 13:45:31 -0500568 unsetenv(kFdEnvVar);
Elly Jonescd7a9042011-07-22 13:56:51 -0400569 j->initpid = r;
Will Drewry2f54b6a2011-09-16 13:45:31 -0500570 close(pipe_fds[0]);
571 r = send_commands_to_child(j, pipe_fds[1]);
572 close(pipe_fds[1]);
573 if (r) {
574 kill(j->initpid, SIGKILL);
575 die("failed to send marshalled minijail");
576 }
Elly Jonescd7a9042011-07-22 13:56:51 -0400577 return 0;
578 }
Ben Chan541c7e52011-08-26 14:55:53 -0700579
580 free(oldenv_copy);
581
Elly Jonescd7a9042011-07-22 13:56:51 -0400582 if (r < 0)
583 return r;
584
Will Drewry2f54b6a2011-09-16 13:45:31 -0500585 j->flags.uid = 0;
586 /* TODO(wad) gid should be sent over preload and not done in advance.
587 * j->flags.gid = 0;
588 */
589 j->flags.usergroups = 0;
590 j->flags.caps = 0;
591 j->flags.ptrace = 0;
592 j->flags.seccomp = 0;
593
Elly Jonescd7a9042011-07-22 13:56:51 -0400594 j->flags.pids = 0;
595
596 /* Jail this process and its descendants... */
597 minijail_enter(j);
598
599 if (pidns) {
600 /* pid namespace: this process will become init inside the new namespace, so
601 * fork off a child to actually run the program (we don't want all programs
602 * we might exec to have to know how to be init). */
603 r = fork();
604 if (r < 0)
605 _exit(r);
606 else if (r > 0)
607 init(r); /* never returns */
608 }
609
Will Drewry2f54b6a2011-09-16 13:45:31 -0500610
Elly Jonescd7a9042011-07-22 13:56:51 -0400611 /* If we aren't pid-namespaced:
612 * calling process
613 * -> execve()-ing process
614 * If we are:
615 * calling process
616 * -> init()-ing process
617 * -> execve()-ing process
618 */
619 _exit(execve(filename, argv, environ));
620}
621
622int minijail_kill(struct minijail *j) {
623 int st;
624 if (kill(j->initpid, SIGTERM))
625 return errno;
626 if (waitpid(j->initpid, &st, 0) < 0)
627 return errno;
628 return st;
629}
630
631int minijail_wait(struct minijail *j) {
632 int st;
633 if (waitpid(j->initpid, &st, 0) < 0)
634 return errno;
635 if (!WIFEXITED(st))
636 return MINIJAIL_ERR_JAIL;
637 return WEXITSTATUS(st);
638}
639
640void minijail_destroy(struct minijail *j) {
Will Drewry32ac9f52011-08-18 21:36:27 -0500641 struct seccomp_filter *f = j->filters;
642 /* Unlink the tail and head */
643 if (f)
644 f->prev->next = NULL;
645 while (f) {
646 struct seccomp_filter *next = f->next;
647 free(f->filter);
648 free(f);
649 f = next;
650 }
Will Drewry2ddaad02011-09-16 11:36:08 -0500651 if (j->user)
652 free(j->user);
Elly Jonescd7a9042011-07-22 13:56:51 -0400653 free(j);
654}