blob: 25aa930302db6ff7a2445fe5fd820e63575ba212 [file] [log] [blame]
Mike Frysinger4c331892022-09-13 05:17:08 -04001/* Copyright 2018 The ChromiumOS Authors
Mike Frysinger5ef22ca2018-01-20 13:42:10 -05002 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file.
4 */
5
6#include <dlfcn.h>
Mike Frysinger68f7ccd2021-11-24 22:06:51 -05007#include <err.h>
Mike Frysinger5ef22ca2018-01-20 13:42:10 -05008#include <errno.h>
Zi Lina9e72262022-01-11 03:22:21 +00009#include <fcntl.h>
Mike Frysinger5ef22ca2018-01-20 13:42:10 -050010#include <getopt.h>
Luis Hector Chavezc3e17722018-10-16 20:43:12 -070011#include <inttypes.h>
Luis Hector Chavez8ddef8f2019-01-02 08:40:54 -080012#include <stdbool.h>
Mike Frysinger5ef22ca2018-01-20 13:42:10 -050013#include <stdio.h>
14#include <stdlib.h>
15#include <string.h>
16#include <sys/capability.h>
Mike Frysinger785b1c32018-02-23 15:47:24 -050017#include <sys/mount.h>
Zi Lina9e72262022-01-11 03:22:21 +000018#include <sys/stat.h>
Mike Frysinger5ef22ca2018-01-20 13:42:10 -050019#include <sys/types.h>
Zi Lina9e72262022-01-11 03:22:21 +000020#include <sys/vfs.h>
Mike Frysinger5ef22ca2018-01-20 13:42:10 -050021#include <unistd.h>
22
Luis Hector Chavezc3e17722018-10-16 20:43:12 -070023#include <linux/filter.h>
24
Mike Frysinger5ef22ca2018-01-20 13:42:10 -050025#include "libminijail.h"
26#include "libsyscalls.h"
27
Zi Lin44461c72021-11-16 18:37:27 +000028#include "config_parser.h"
Mike Frysinger5ef22ca2018-01-20 13:42:10 -050029#include "elfparse.h"
30#include "minijail0_cli.h"
31#include "system.h"
32#include "util.h"
33
34#define IDMAP_LEN 32U
35#define DEFAULT_TMP_SIZE (64 * 1024 * 1024)
36
Mike Frysinger1036cd82020-08-28 00:15:59 -040037/*
38 * A malloc() that aborts on failure. We only implement this in the CLI as
39 * the library should return ENOMEM errors when allocations fail.
40 */
41static void *xmalloc(size_t size)
42{
43 void *ret = malloc(size);
Mike Frysinger68f7ccd2021-11-24 22:06:51 -050044 if (!ret)
45 err(1, "malloc() failed");
Mike Frysinger1036cd82020-08-28 00:15:59 -040046 return ret;
47}
48
49static char *xstrdup(const char *s)
50{
51 char *ret = strdup(s);
Mike Frysinger68f7ccd2021-11-24 22:06:51 -050052 if (!ret)
53 err(1, "strdup() failed");
Mike Frysinger1036cd82020-08-28 00:15:59 -040054 return ret;
55}
56
Mike Frysinger5ef22ca2018-01-20 13:42:10 -050057static void set_user(struct minijail *j, const char *arg, uid_t *out_uid,
58 gid_t *out_gid)
59{
60 char *end = NULL;
Stéphane Lesimpled4911dd2022-01-10 10:52:30 +010061 uid_t uid = strtoul(arg, &end, 10);
Mike Frysinger5ef22ca2018-01-20 13:42:10 -050062 if (!*end && *arg) {
63 *out_uid = uid;
64 minijail_change_uid(j, uid);
65 return;
66 }
67
Mattias Nissler160d58f2020-02-25 11:01:30 +010068 int ret = lookup_user(arg, out_uid, out_gid);
69 if (ret) {
Mike Frysinger68f7ccd2021-11-24 22:06:51 -050070 errno = -ret;
71 err(1, "Bad user '%s'", arg);
Mike Frysinger5ef22ca2018-01-20 13:42:10 -050072 }
73
Mattias Nissler160d58f2020-02-25 11:01:30 +010074 ret = minijail_change_user(j, arg);
75 if (ret) {
Mike Frysinger68f7ccd2021-11-24 22:06:51 -050076 errno = -ret;
77 err(1, "minijail_change_user('%s') failed", arg);
Mike Frysinger5ef22ca2018-01-20 13:42:10 -050078 }
79}
80
81static void set_group(struct minijail *j, const char *arg, gid_t *out_gid)
82{
83 char *end = NULL;
Stéphane Lesimpled4911dd2022-01-10 10:52:30 +010084 gid_t gid = strtoul(arg, &end, 10);
Mike Frysinger5ef22ca2018-01-20 13:42:10 -050085 if (!*end && *arg) {
86 *out_gid = gid;
87 minijail_change_gid(j, gid);
88 return;
89 }
90
Mattias Nissler160d58f2020-02-25 11:01:30 +010091 int ret = lookup_group(arg, out_gid);
92 if (ret) {
Mike Frysinger68f7ccd2021-11-24 22:06:51 -050093 errno = -ret;
94 err(1, "Bad group '%s'", arg);
Mike Frysinger5ef22ca2018-01-20 13:42:10 -050095 }
96
Mattias Nissler160d58f2020-02-25 11:01:30 +010097 minijail_change_gid(j, *out_gid);
Mike Frysinger5ef22ca2018-01-20 13:42:10 -050098}
99
Stéphane Lesimple8d7174b2020-02-07 20:51:08 +0100100/*
101 * Helper function used by --add-suppl-group (possibly more than once),
102 * to build the supplementary gids array.
103 */
104static void suppl_group_add(size_t *suppl_gids_count, gid_t **suppl_gids,
Zi Lina9e72262022-01-11 03:22:21 +0000105 char *arg)
106{
Stéphane Lesimple8d7174b2020-02-07 20:51:08 +0100107 char *end = NULL;
Stéphane Lesimpled4911dd2022-01-10 10:52:30 +0100108 gid_t gid = strtoul(arg, &end, 10);
Mattias Nissler160d58f2020-02-25 11:01:30 +0100109 int ret;
Stéphane Lesimple8d7174b2020-02-07 20:51:08 +0100110 if (!*end && *arg) {
111 /* A gid number has been specified, proceed. */
Mattias Nissler160d58f2020-02-25 11:01:30 +0100112 } else if ((ret = lookup_group(arg, &gid))) {
Stéphane Lesimple8d7174b2020-02-07 20:51:08 +0100113 /*
114 * A group name has been specified,
115 * but doesn't exist: we bail out.
116 */
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500117 errno = -ret;
118 err(1, "Bad group '%s'", arg);
Stéphane Lesimple8d7174b2020-02-07 20:51:08 +0100119 }
120
121 /*
122 * From here, gid is guaranteed to be set and valid,
123 * we add it to our supplementary gids array.
124 */
Zi Linfdc98d62022-01-19 22:10:29 +0000125 *suppl_gids =
126 realloc(*suppl_gids, sizeof(gid_t) * ++(*suppl_gids_count));
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500127 if (!suppl_gids)
128 err(1, "failed to allocate memory");
Stéphane Lesimple8d7174b2020-02-07 20:51:08 +0100129
130 (*suppl_gids)[*suppl_gids_count - 1] = gid;
131}
132
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500133static void skip_securebits(struct minijail *j, const char *arg)
134{
135 uint64_t securebits_skip_mask;
136 char *end = NULL;
137 securebits_skip_mask = strtoull(arg, &end, 16);
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500138 if (*end)
139 errx(1, "Invalid securebit mask: '%s'", arg);
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500140 minijail_skip_setting_securebits(j, securebits_skip_mask);
141}
142
143static void use_caps(struct minijail *j, const char *arg)
144{
Luis Hector Chavezdabc4302018-09-21 09:21:47 -0700145 uint64_t caps = 0;
146 cap_t parsed_caps = cap_from_text(arg);
147
148 if (parsed_caps != NULL) {
149 unsigned int i;
150 const uint64_t one = 1;
151 cap_flag_value_t cap_value;
152 unsigned int last_valid_cap = get_last_valid_cap();
153
154 for (i = 0; i <= last_valid_cap; ++i) {
155 if (cap_get_flag(parsed_caps, i, CAP_EFFECTIVE,
156 &cap_value)) {
Luis Hector Chavez677900f2018-09-24 09:13:26 -0700157 if (errno == EINVAL) {
158 /*
159 * Some versions of libcap reject any
160 * capabilities they were not compiled
161 * with by returning EINVAL.
162 */
163 continue;
164 }
Zi Linfdc98d62022-01-19 22:10:29 +0000165 err(1,
166 "Could not get the value of the %d-th "
Zi Lina9e72262022-01-11 03:22:21 +0000167 "capability",
168 i);
Luis Hector Chavezdabc4302018-09-21 09:21:47 -0700169 }
170 if (cap_value == CAP_SET)
171 caps |= (one << i);
172 }
173 cap_free(parsed_caps);
174 } else {
175 char *end = NULL;
176 caps = strtoull(arg, &end, 16);
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500177 if (*end)
178 errx(1, "Invalid cap set: '%s'", arg);
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500179 }
Luis Hector Chavezdabc4302018-09-21 09:21:47 -0700180
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500181 minijail_use_caps(j, caps);
182}
183
184static void add_binding(struct minijail *j, char *arg)
185{
186 char *src = tokenize(&arg, ",");
187 char *dest = tokenize(&arg, ",");
188 char *flags = tokenize(&arg, ",");
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500189 if (!src || src[0] == '\0' || arg != NULL)
190 errx(1, "Bad binding: %s %s", src, dest);
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500191 if (dest == NULL || dest[0] == '\0')
192 dest = src;
David Coles87ec5cd2019-06-13 17:20:10 -0700193 int writable;
Mike Frysinger22dc3522022-07-07 19:24:13 -0400194 if (flags == NULL || flags[0] == '\0' || streq(flags, "0"))
David Coles87ec5cd2019-06-13 17:20:10 -0700195 writable = 0;
Mike Frysinger22dc3522022-07-07 19:24:13 -0400196 else if (streq(flags, "1"))
David Coles87ec5cd2019-06-13 17:20:10 -0700197 writable = 1;
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500198 else
199 errx(1, "Bad value for <writable>: %s", flags);
200 if (minijail_bind(j, src, dest, writable))
201 errx(1, "minijail_bind failed");
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500202}
203
204static void add_rlimit(struct minijail *j, char *arg)
205{
206 char *type = tokenize(&arg, ",");
207 char *cur = tokenize(&arg, ",");
208 char *max = tokenize(&arg, ",");
Luis Hector Chavez7058a2d2018-01-29 08:41:34 -0800209 char *end;
Zi Linfdc98d62022-01-19 22:10:29 +0000210 if (!type || type[0] == '\0' || !cur || cur[0] == '\0' || !max ||
211 max[0] == '\0' || arg != NULL) {
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500212 errx(1, "Bad rlimit '%s'", arg);
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500213 }
Luis Hector Chavez7058a2d2018-01-29 08:41:34 -0800214 rlim_t cur_rlim;
215 rlim_t max_rlim;
Mike Frysinger22dc3522022-07-07 19:24:13 -0400216 if (streq(cur, "unlimited")) {
Luis Hector Chavez7058a2d2018-01-29 08:41:34 -0800217 cur_rlim = RLIM_INFINITY;
218 } else {
219 end = NULL;
Mike Frysingere34d7fe2018-05-23 04:18:30 -0400220 cur_rlim = strtoul(cur, &end, 0);
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500221 if (*end)
222 errx(1, "Bad soft limit: '%s'", cur);
Luis Hector Chavez7058a2d2018-01-29 08:41:34 -0800223 }
Mike Frysinger22dc3522022-07-07 19:24:13 -0400224 if (streq(max, "unlimited")) {
Luis Hector Chavez7058a2d2018-01-29 08:41:34 -0800225 max_rlim = RLIM_INFINITY;
226 } else {
227 end = NULL;
Mike Frysingere34d7fe2018-05-23 04:18:30 -0400228 max_rlim = strtoul(max, &end, 0);
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500229 if (*end)
230 errx(1, "Bad hard limit: '%s'", max);
Luis Hector Chavez7058a2d2018-01-29 08:41:34 -0800231 }
Mike Frysingere34d7fe2018-05-23 04:18:30 -0400232
233 end = NULL;
234 int resource = parse_single_constant(type, &end);
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500235 if (type == end)
236 errx(1, "Bad rlimit: '%s'", type);
Mike Frysingere34d7fe2018-05-23 04:18:30 -0400237
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500238 if (minijail_rlimit(j, resource, cur_rlim, max_rlim))
239 errx(1, "minijail_rlimit '%s,%s,%s' failed", type, cur, max);
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500240}
241
242static void add_mount(struct minijail *j, char *arg)
243{
244 char *src = tokenize(&arg, ",");
245 char *dest = tokenize(&arg, ",");
246 char *type = tokenize(&arg, ",");
247 char *flags = tokenize(&arg, ",");
248 char *data = tokenize(&arg, ",");
Mike Frysinger6f4e93d2018-05-23 05:05:35 -0400249 char *end;
Zi Linfdc98d62022-01-19 22:10:29 +0000250 if (!src || src[0] == '\0' || !dest || dest[0] == '\0' || !type ||
251 type[0] == '\0') {
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500252 errx(1, "Bad mount: %s %s %s", src, dest, type);
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500253 }
Mike Frysinger4f3e09f2018-01-24 18:01:16 -0500254
255 /*
256 * Fun edge case: the data option itself is comma delimited. If there
257 * were no more options, then arg would be set to NULL. But if we had
258 * more pending, it'll be pointing to the next token. Back up and undo
259 * the null byte so it'll be merged back.
260 * An example:
261 * none,/tmp,tmpfs,0xe,mode=0755,uid=10,gid=10
262 * The tokenize calls above will turn this memory into:
263 * none\0/tmp\0tmpfs\00xe\0mode=0755\0uid=10,gid=10
264 * With data pointing at mode=0755 and arg pointing at uid=10,gid=10.
265 */
266 if (arg != NULL)
267 arg[-1] = ',';
268
Mike Frysinger6f4e93d2018-05-23 05:05:35 -0400269 unsigned long mountflags;
270 if (flags == NULL || flags[0] == '\0') {
271 mountflags = 0;
272 } else {
273 end = NULL;
274 mountflags = parse_constant(flags, &end);
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500275 if (flags == end)
276 errx(1, "Bad mount flags: %s", flags);
Mike Frysinger6f4e93d2018-05-23 05:05:35 -0400277 }
278
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500279 if (minijail_mount_with_data(j, src, dest, type, mountflags, data))
280 errx(1, "minijail_mount failed");
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500281}
282
283static char *build_idmap(id_t id, id_t lowerid)
284{
285 int ret;
Mike Frysinger1036cd82020-08-28 00:15:59 -0400286 char *idmap = xmalloc(IDMAP_LEN);
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500287 ret = snprintf(idmap, IDMAP_LEN, "%d %d 1", id, lowerid);
288 if (ret < 0 || (size_t)ret >= IDMAP_LEN) {
289 free(idmap);
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500290 errx(1, "Could not build id map");
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500291 }
292 return idmap;
293}
294
295static int has_cap_setgid(void)
296{
297 cap_t caps;
298 cap_flag_value_t cap_value;
299
300 if (!CAP_IS_SUPPORTED(CAP_SETGID))
301 return 0;
302
303 caps = cap_get_proc();
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500304 if (!caps)
305 err(1, "Could not get process' capabilities");
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500306
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500307 if (cap_get_flag(caps, CAP_SETGID, CAP_EFFECTIVE, &cap_value))
308 err(1, "Could not get the value of CAP_SETGID");
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500309
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500310 if (cap_free(caps))
311 err(1, "Could not free capabilities");
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500312
313 return cap_value == CAP_SET;
314}
315
316static void set_ugid_mapping(struct minijail *j, int set_uidmap, uid_t uid,
317 char *uidmap, int set_gidmap, gid_t gid,
318 char *gidmap)
319{
320 if (set_uidmap) {
321 minijail_namespace_user(j);
322 minijail_namespace_pids(j);
323
324 if (!uidmap) {
325 /*
326 * If no map is passed, map the current uid to the
327 * chosen uid in the target namespace (or root, if none
328 * was chosen).
329 */
330 uidmap = build_idmap(uid, getuid());
331 }
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500332 if (0 != minijail_uidmap(j, uidmap))
333 errx(1, "Could not set uid map");
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500334 free(uidmap);
335 }
336 if (set_gidmap) {
337 minijail_namespace_user(j);
338 minijail_namespace_pids(j);
339
340 if (!gidmap) {
341 /*
342 * If no map is passed, map the current gid to the
343 * chosen gid in the target namespace.
344 */
345 gidmap = build_idmap(gid, getgid());
346 }
347 if (!has_cap_setgid()) {
348 /*
349 * This means that we are not running as root,
350 * so we also have to disable setgroups(2) to
351 * be able to set the gid map.
352 * See
353 * http://man7.org/linux/man-pages/man7/user_namespaces.7.html
354 */
355 minijail_namespace_user_disable_setgroups(j);
356 }
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500357 if (0 != minijail_gidmap(j, gidmap))
358 errx(1, "Could not set gid map");
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500359 free(gidmap);
360 }
361}
362
363static void use_chroot(struct minijail *j, const char *path, int *chroot,
364 int pivot_root)
365{
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500366 if (pivot_root)
367 errx(1, "Could not set chroot because -P was specified");
368 if (minijail_enter_chroot(j, path))
369 errx(1, "Could not set chroot");
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500370 *chroot = 1;
371}
372
373static void use_pivot_root(struct minijail *j, const char *path,
374 int *pivot_root, int chroot)
375{
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500376 if (chroot)
377 errx(1, "Could not set pivot_root because -C was specified");
378 if (minijail_enter_pivot_root(j, path))
379 errx(1, "Could not set pivot_root");
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500380 minijail_namespace_vfs(j);
381 *pivot_root = 1;
382}
383
384static void use_profile(struct minijail *j, const char *profile,
385 int *pivot_root, int chroot, size_t *tmp_size)
386{
Mike Frysinger4d2a81e2018-01-22 16:43:33 -0500387 /* Note: New profiles should be added in minijail0_cli_unittest.cc. */
388
Mike Frysinger22dc3522022-07-07 19:24:13 -0400389 if (streq(profile, "minimalistic-mountns") ||
390 streq(profile, "minimalistic-mountns-nodev")) {
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500391 minijail_namespace_vfs(j);
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500392 if (minijail_bind(j, "/", "/", 0))
393 errx(1, "minijail_bind(/) failed");
394 if (minijail_bind(j, "/proc", "/proc", 0))
395 errx(1, "minijail_bind(/proc) failed");
Mike Frysinger22dc3522022-07-07 19:24:13 -0400396 if (streq(profile, "minimalistic-mountns")) {
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500397 if (minijail_bind(j, "/dev/log", "/dev/log", 0))
398 errx(1, "minijail_bind(/dev/log) failed");
Mike Frysingercc5917c2020-02-03 12:34:14 -0500399 minijail_mount_dev(j);
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500400 }
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500401 if (!*tmp_size) {
402 /* Avoid clobbering |tmp_size| if it was already set. */
403 *tmp_size = DEFAULT_TMP_SIZE;
404 }
405 minijail_remount_proc_readonly(j);
Ben Scarlatof6102622022-09-05 19:31:42 +0000406 minijail_set_using_minimalistic_mountns(j);
Allen Webbee876072019-02-21 10:56:21 -0800407 use_pivot_root(j, DEFAULT_PIVOT_ROOT, pivot_root, chroot);
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500408 } else
409 errx(1, "Unrecognized profile name '%s'", profile);
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500410}
411
Nicole Anderson-Auafa54be2021-03-09 23:00:49 +0000412static void set_remount_mode(struct minijail *j, const char *mode)
Mike Frysinger785b1c32018-02-23 15:47:24 -0500413{
414 unsigned long msmode;
Mike Frysinger22dc3522022-07-07 19:24:13 -0400415 if (streq(mode, "shared"))
Mike Frysinger785b1c32018-02-23 15:47:24 -0500416 msmode = MS_SHARED;
Mike Frysinger22dc3522022-07-07 19:24:13 -0400417 else if (streq(mode, "private"))
Mike Frysinger785b1c32018-02-23 15:47:24 -0500418 msmode = MS_PRIVATE;
Mike Frysinger22dc3522022-07-07 19:24:13 -0400419 else if (streq(mode, "slave"))
Mike Frysinger785b1c32018-02-23 15:47:24 -0500420 msmode = MS_SLAVE;
Mike Frysinger22dc3522022-07-07 19:24:13 -0400421 else if (streq(mode, "unbindable"))
Mike Frysinger785b1c32018-02-23 15:47:24 -0500422 msmode = MS_UNBINDABLE;
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500423 else
424 errx(1, "Unknown remount mode: '%s'", mode);
Nicole Anderson-Auafa54be2021-03-09 23:00:49 +0000425 minijail_remount_mode(j, msmode);
Mike Frysinger785b1c32018-02-23 15:47:24 -0500426}
427
Luis Hector Chavezc3e17722018-10-16 20:43:12 -0700428static void read_seccomp_filter(const char *filter_path,
429 struct sock_fprog *filter)
430{
Mike Frysingerdebdf5d2021-06-21 09:52:06 -0400431 attribute_cleanup_fp FILE *f = fopen(filter_path, "re");
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500432 if (!f)
433 err(1, "failed to open %s", filter_path);
Luis Hector Chavezc3e17722018-10-16 20:43:12 -0700434 off_t filter_size = 0;
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500435 if (fseeko(f, 0, SEEK_END) == -1 || (filter_size = ftello(f)) == -1)
436 err(1, "failed to get file size of %s", filter_path);
Luis Hector Chavezc3e17722018-10-16 20:43:12 -0700437 if (filter_size % sizeof(struct sock_filter) != 0) {
Zi Lina9e72262022-01-11 03:22:21 +0000438 errx(1,
439 "filter size (%" PRId64 ") of %s is not a multiple of"
440 " %zu",
441 filter_size, filter_path, sizeof(struct sock_filter));
Luis Hector Chavezc3e17722018-10-16 20:43:12 -0700442 }
443 rewind(f);
444
445 filter->len = filter_size / sizeof(struct sock_filter);
Mike Frysinger1036cd82020-08-28 00:15:59 -0400446 filter->filter = xmalloc(filter_size);
Luis Hector Chavezc3e17722018-10-16 20:43:12 -0700447 if (fread(filter->filter, sizeof(struct sock_filter), filter->len, f) !=
448 filter->len) {
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500449 err(1, "failed read %s", filter_path);
Luis Hector Chavezc3e17722018-10-16 20:43:12 -0700450 }
Luis Hector Chavezc3e17722018-10-16 20:43:12 -0700451}
452
Mike Frysinger4ffd7e52022-01-24 14:07:02 -0500453/*
454 * Long options use values starting at 0x100 so that they're out of range of
455 * bytes which is how command line options are processed. Practically speaking,
456 * we could get by with the (7-bit) ASCII range, but UTF-8 codepoints would be a
457 * bit confusing, and honestly there's no reason to "optimize" here.
458 *
459 * The long enum values are internal to this file and can freely change at any
Stéphane Lesimplef65da3a2022-01-11 11:44:47 +0100460 * time without breaking anything. Please keep alphabetically ordered.
Mike Frysinger4ffd7e52022-01-24 14:07:02 -0500461 */
462enum {
463 /* Everything after this point only have long options. */
464 LONG_OPTION_BASE = 0x100,
465 OPT_ADD_SUPPL_GROUP,
466 OPT_ALLOW_SPECULATIVE_EXECUTION,
467 OPT_AMBIENT,
468 OPT_CONFIG,
Stéphane Lesimplef65da3a2022-01-11 11:44:47 +0100469 OPT_ENV_ADD,
470 OPT_ENV_RESET,
Ben Scarlato485c8a02022-08-11 06:02:02 +0000471 OPT_FS_DEFAULT_PATHS,
Ben Scarlatoee82b492022-08-09 18:33:25 +0000472 OPT_FS_PATH_RX,
473 OPT_FS_PATH_RO,
474 OPT_FS_PATH_RW,
475 OPT_FS_PATH_ADVANCED_RW,
Mike Frysinger4ffd7e52022-01-24 14:07:02 -0500476 OPT_LOGGING,
477 OPT_PRELOAD_LIBRARY,
478 OPT_PROFILE,
479 OPT_SECCOMP_BPF_BINARY,
480 OPT_UTS,
481};
482
Mike Frysingere16ab3e2022-01-25 00:28:54 -0500483/*
484 * NB: When adding new options, prefer long-option only. Add a short option
485 * only if its meaning is intuitive/obvious at a glance.
486 *
487 * Keep this sorted.
488 */
489static const char optstring[] =
490 "+a:b:c:de::f:g:hik:lm::nprst::u:vwyzB:C:GHIK::LM::NP:R:S:T:UV:Y";
491
492static const struct option long_options[] = {
493 {"help", no_argument, 0, 'h'},
494 {"mount-dev", no_argument, 0, 'd'},
495 {"ambient", no_argument, 0, OPT_AMBIENT},
496 {"uts", optional_argument, 0, OPT_UTS},
497 {"logging", required_argument, 0, OPT_LOGGING},
498 {"profile", required_argument, 0, OPT_PROFILE},
499 {"preload-library", required_argument, 0, OPT_PRELOAD_LIBRARY},
500 {"seccomp-bpf-binary", required_argument, 0, OPT_SECCOMP_BPF_BINARY},
501 {"add-suppl-group", required_argument, 0, OPT_ADD_SUPPL_GROUP},
502 {"allow-speculative-execution", no_argument, 0,
503 OPT_ALLOW_SPECULATIVE_EXECUTION},
504 {"config", required_argument, 0, OPT_CONFIG},
Stéphane Lesimplef65da3a2022-01-11 11:44:47 +0100505 {"env-add", required_argument, 0, OPT_ENV_ADD},
506 {"env-reset", no_argument, 0, OPT_ENV_RESET},
Mike Frysingere16ab3e2022-01-25 00:28:54 -0500507 {"mount", required_argument, 0, 'k'},
508 {"bind-mount", required_argument, 0, 'b'},
Zi Linc6289102022-01-28 23:38:32 +0000509 {"ns-mount", no_argument, 0, 'v'},
Ben Scarlato485c8a02022-08-11 06:02:02 +0000510 {"fs-default-paths", no_argument, 0, OPT_FS_DEFAULT_PATHS},
Ben Scarlatoee82b492022-08-09 18:33:25 +0000511 {"fs-path-rx", required_argument, 0, OPT_FS_PATH_RX},
512 {"fs-path-ro", required_argument, 0, OPT_FS_PATH_RO},
513 {"fs-path-rw", required_argument, 0, OPT_FS_PATH_RW},
514 {"fs-path-advanced-rw", required_argument, 0, OPT_FS_PATH_ADVANCED_RW},
Mike Frysingere16ab3e2022-01-25 00:28:54 -0500515 {0, 0, 0, 0},
516};
517
518/*
519 * Pull the usage string out into the top-level to help with long-lines. We
520 * want the output to be wrapped at 80 cols when it's shown to the user in the
521 * terminal, but we don't want the source wrapped to 80 cols because that will
522 * effectively make terminal output wrap to much lower levels (like <70).
523 */
524/* clang-format off */
525static const char help_text[] =
526"Account (user/group) options:\n"
527" -u <user> Change uid to <user>.\n"
528" -g <group> Change gid to <group>.\n"
529" -G Inherit supplementary groups from new uid.\n"
530" Incompatible with -y or --add-suppl-group.\n"
531" -y Keep original uid's supplementary groups.\n"
532" Incompatible with -G or --add-suppl-group.\n"
533" --add-suppl-group <group>\n"
534" Add <group> to the proccess' supplementary groups.\n"
535" Can be specified multiple times to add several groups.\n"
536" Incompatible with -y or -G.\n"
537"\n"
538"Mount/path options:\n"
539" -b <src[,dst[,writable]]>, --bind-mount <...>\n"
540" Bind <src> to <dst>.\n"
541" -k <src,dst,fstype[,flags[,data]]>, --mount <...>\n"
542" Mount <src> at <dst>. <flags> and <data> can be specified as\n"
543" in mount(2). Multiple instances allowed.\n"
544" -K Do not change share mode of any existing mounts.\n"
545" -K<mode> Mark all existing mounts as <mode> instead of MS_PRIVATE.\n"
546" -r Remount /proc read-only (implies -v).\n"
547" -d, --mount-dev\n"
548" Create a new /dev with a minimal set of device nodes\n"
549" (implies -v). See minijail0(1) for exact list.\n"
550" -t[size] Mount tmpfs at /tmp (implies -v).\n"
551" Optional argument specifies size (default \"64M\").\n"
552" -C <dir> chroot(2) to <dir>. Incompatible with -P.\n"
553" -P <dir> pivot_root(2) to <dir> (implies -v). Incompatible with -C.\n"
554"\n"
555"Namespace options:\n"
556" -N Enter a new cgroup namespace.\n"
557" -l Enter new IPC namespace.\n"
Zi Linc6289102022-01-28 23:38:32 +0000558" -v, --ns-mount\n"
559" Enter new mount namespace.\n"
Mike Frysingere16ab3e2022-01-25 00:28:54 -0500560" -V <file> Enter specified mount namespace.\n"
561" -e[file] Enter new network namespace, or existing |file| if provided.\n"
562" -p Enter new pid namespace (implies -vr).\n"
563" -I Run as init (pid 1) inside a new pid namespace (implies -p).\n"
564" -U Enter new user namespace (implies -p).\n"
565" -m[<uid> <loweruid> <count>]\n"
566" Set the uid map of a user namespace (implies -pU).\n"
567" Same arguments as newuidmap(1); mappings are comma separated.\n"
568" With no mapping, map the current uid to root.\n"
569" Incompatible with -b without the 'writable' option.\n"
570" -M[<gid> <lowergid> <count>]\n"
571" Set the gid map of a user namespace (implies -pU).\n"
572" Same arguments as newgidmap(1); mappings are comma separated.\n"
573" With no mapping, map the current gid to root.\n"
574" Incompatible with -b without the 'writable' option.\n"
575" --uts[=name] Enter a new UTS namespace (and set hostname).\n"
576"\n"
577"Seccomp options:\n"
578" -S <file> Set seccomp filter using <file>.\n"
579" E.g., '-S /usr/share/filters/<prog>.$(uname -m)'.\n"
580" Requires -n when not running as root.\n"
581" --seccomp-bpf-binary=<f>\n"
582" Set a pre-compiled seccomp filter using <f>.\n"
583" E.g., '-S /usr/share/filters/<prog>.$(uname -m).bpf'.\n"
584" Requires -n when not running as root.\n"
585" The user is responsible for ensuring that the binary\n"
586" was compiled for the correct architecture / kernel version.\n"
587" -L Report blocked syscalls when using seccomp filter.\n"
588" If the kernel does not support SECCOMP_RET_LOG, some syscalls\n"
589" will automatically be allowed (see below).\n"
590" -Y Synchronize seccomp filters across thread group.\n"
591" -a <table> Use alternate syscall table <table>.\n"
592" -s Use seccomp mode 1 (not the same as -S).\n"
593"\n"
594"Other options:\n"
595" --config <file>\n"
596" Load the Minijail configuration file <file>.\n"
597" If used, must be specified ahead of other options.\n"
598" --profile <p>\n"
599" Configure minijail0 to run with the <p> sandboxing profile,\n"
600" which is a convenient way to express multiple flags\n"
601" that are typically used together.\n"
602" See the minijail0(1) man page for the full list.\n"
603" -n Set no_new_privs. See prctl(2) for details.\n"
604" -c <caps> Restrict caps to <caps>.\n"
605" --ambient Raise ambient capabilities. Requires -c.\n"
606" -B <mask> Skip setting <mask> securebits when restricting caps (-c).\n"
607" By default, SECURE_NOROOT, SECURE_NO_SETUID_FIXUP, and \n"
608" SECURE_KEEP_CAPS (with their respective locks) are set.\n"
609" -f <file> Write the pid of the jailed process to <file>.\n"
610" -i Exit immediately after fork(2); i.e. background the program.\n"
611" -z Don't forward signals to jailed process.\n"
612" -R <type,cur,max>\n"
613" Call setrlimit(3); can be specified multiple times.\n"
614" -T <type> Assume <program> is a <type> ELF binary;\n"
615" <type> may be 'static' or 'dynamic'.\n"
616" This will avoid accessing <program> binary before execve(2).\n"
617" Type 'static' will avoid preload hooking.\n"
618" -w Create and join a new anonymous session keyring.\n"
Stéphane Lesimplef65da3a2022-01-11 11:44:47 +0100619" --env-reset Clear the current environment instead of having <program>\n"
620" inherit the active environment. Often used to start <program>\n"
621" with a minimal sanitized environment.\n"
622" --env-add <NAME=value>\n"
623" Sets the specified environment variable <NAME>\n"
624" in the <program>'s environment before starting it.\n"
Mike Frysingere16ab3e2022-01-25 00:28:54 -0500625"\n"
626"Uncommon options:\n"
627" --allow-speculative-execution\n"
628" Allow speculative execution by disabling mitigations.\n"
Ben Scarlato485c8a02022-08-11 06:02:02 +0000629" --fs-default-paths\n"
630" Adds a set of allowed paths to allow running common system \n"
631" executables.\n"
Ben Scarlatoee82b492022-08-09 18:33:25 +0000632" --fs-path-rx\n"
633" Adds an allowed read-execute path.\n"
634" --fs-path-ro\n"
635" Adds an allowed read-only path.\n"
636" --fs-path-rw\n"
637" Adds an allowed read-write path.\n"
638" --fs-path-advanced-rw\n"
639" Adds an allowed advanced read-write path.\n"
Mike Frysingere16ab3e2022-01-25 00:28:54 -0500640" --preload-library=<file>\n"
641" Overrides the path to \"" PRELOADPATH "\".\n"
642" This is only really useful for local testing.\n"
643" --logging=<output>\n"
644" Set the logging system output: 'auto' (default),\n"
645" 'syslog', or 'stderr'.\n"
646" -h Help (this message).\n"
647" -H Seccomp filter help message.\n";
648/* clang-format on */
649
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500650static void usage(const char *progn)
651{
Mike Frysingere16ab3e2022-01-25 00:28:54 -0500652 printf("Usage: %s [options] [--] <program> [args...]\n\n%s", progn,
653 help_text);
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500654
Mike Frysingere16ab3e2022-01-25 00:28:54 -0500655 printf("\nsyscalls allowed when logging (-L):\n ");
656 for (size_t i = 0; i < log_syscalls_len; ++i)
657 printf(" %s", log_syscalls[i]);
658 printf("\n");
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500659}
660
661static void seccomp_filter_usage(const char *progn)
662{
663 const struct syscall_entry *entry = syscall_table;
664 printf("Usage: %s -S <policy.file> <program> [args...]\n\n"
665 "System call names supported:\n",
666 progn);
667 for (; entry->name && entry->nr >= 0; ++entry)
668 printf(" %s [%d]\n", entry->name, entry->nr);
669 printf("\nSee minijail0(5) for example policies.\n");
670}
671
Zi Lin44461c72021-11-16 18:37:27 +0000672/*
673 * Return the next unconsumed option char/value parsed from
674 * |*conf_entry_list|. |optarg| is updated to point to an argument from
675 * the entry value. If all options have been consumed, |*conf_entry_list|
676 * will be freed and -1 will be returned.
677 */
678static int getopt_from_conf(const struct option *longopts,
679 struct config_entry_list **conf_entry_list,
680 size_t *conf_index)
681{
682 int opt = -1;
683 /* If we've consumed all the options in the this config, reset it. */
684 if (*conf_index >= (*conf_entry_list)->num_entries) {
685 free_config_entry_list(*conf_entry_list);
686 *conf_entry_list = NULL;
687 *conf_index = 0;
688 return opt;
689 }
690
691 struct config_entry *entry = &(*conf_entry_list)->entries[*conf_index];
692 /* Look up a matching long option. */
693 size_t i = 0;
694 const struct option *curr_opt;
695 for (curr_opt = &longopts[0]; curr_opt->name != NULL;
696 curr_opt = &longopts[++i])
Mike Frysinger22dc3522022-07-07 19:24:13 -0400697 if (streq(entry->key, curr_opt->name))
Zi Lin44461c72021-11-16 18:37:27 +0000698 break;
699 if (curr_opt->name == NULL) {
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500700 errx(1,
701 "Unable to recognize '%s' as Minijail conf entry key, "
Mike Frysingerdac801f2022-01-14 13:31:11 -0500702 "please refer to minijail0(5) for syntax and examples.",
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500703 entry->key);
Zi Lin44461c72021-11-16 18:37:27 +0000704 }
705 opt = curr_opt->val;
706 optarg = (char *)entry->value;
707 (*conf_index)++;
708 return opt;
709}
710
711/*
712 * Similar to getopt(3), return the next option char/value as it
713 * parses through the CLI argument list. Config entries in
714 * |*conf_entry_list| will be parsed with precendences over cli options.
715 * Same as getopt(3), |optarg| is pointing to the option argument.
716 */
717static int getopt_conf_or_cli(int argc, char *const argv[],
718 struct config_entry_list **conf_entry_list,
719 size_t *conf_index)
720{
721 int opt = -1;
Zi Lin44461c72021-11-16 18:37:27 +0000722 if (*conf_entry_list != NULL)
723 opt =
724 getopt_from_conf(long_options, conf_entry_list, conf_index);
725 if (opt == -1)
726 opt = getopt_long(argc, argv, optstring, long_options, NULL);
727 return opt;
728}
729
Stéphane Lesimplef65da3a2022-01-11 11:44:47 +0100730static void set_child_env(char ***envp, char *arg, char *const environ[])
731{
732 /* We expect VAR=value format for arg. */
733 char *delim = strchr(arg, '=');
734 if (!delim) {
735 errx(1, "Expected an argument of the "
736 "form VAR=value (got '%s')", arg);
737 }
738 *delim = '\0';
739 const char *env_value = delim + 1;
740 if (!*envp) {
741 /*
742 * We got our first --env-add. Initialize *envp by
743 * copying our current env to the future child env.
744 */
745 *envp = minijail_copy_env(environ);
746 if (!*envp)
747 err(1, "Failed to allocate memory.");
748 }
749 if (minijail_setenv(envp, arg, env_value, 1))
750 err(1, "minijail_setenv() failed.");
751}
752
Luis Hector Chavez9acba452018-10-11 10:13:25 -0700753int parse_args(struct minijail *j, int argc, char *const argv[],
Stéphane Lesimplef65da3a2022-01-11 11:44:47 +0100754 char *const environ[], int *exit_immediately,
755 ElfType *elftype, const char **preload_path,
756 char ***envp)
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500757{
Zi Lin44461c72021-11-16 18:37:27 +0000758 enum seccomp_type { None, Strict, Filter, BpfBinaryFilter };
Nicole Anderson-Auc1118f62021-09-14 22:20:23 +0000759 enum seccomp_type seccomp = None;
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500760 int opt;
Nicole Anderson-Auc1118f62021-09-14 22:20:23 +0000761 int use_seccomp_filter = 0;
762 int use_seccomp_filter_binary = 0;
763 int use_seccomp_log = 0;
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500764 int forward = 1;
765 int binding = 0;
766 int chroot = 0, pivot_root = 0;
Nicole Anderson-Auafa54be2021-03-09 23:00:49 +0000767 int mount_ns = 0, change_remount = 0;
Jorge Lucangeli Obes9e1ac372020-01-23 14:36:50 -0500768 const char *remount_mode = NULL;
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500769 int inherit_suppl_gids = 0, keep_suppl_gids = 0;
770 int caps = 0, ambient_caps = 0;
Luis Hector Chavez8ddef8f2019-01-02 08:40:54 -0800771 bool use_uid = false, use_gid = false;
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500772 uid_t uid = 0;
773 gid_t gid = 0;
Stéphane Lesimple8d7174b2020-02-07 20:51:08 +0100774 gid_t *suppl_gids = NULL;
775 size_t suppl_gids_count = 0;
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500776 char *uidmap = NULL, *gidmap = NULL;
777 int set_uidmap = 0, set_gidmap = 0;
778 size_t tmp_size = 0;
779 const char *filter_path = NULL;
Mike Frysinger3e6a12c2019-09-24 12:50:55 -0400780 int log_to_stderr = -1;
Zi Lin44461c72021-11-16 18:37:27 +0000781 struct config_entry_list *conf_entry_list = NULL;
782 size_t conf_index = 0;
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500783
Zi Lin44461c72021-11-16 18:37:27 +0000784 while ((opt = getopt_conf_or_cli(argc, argv, &conf_entry_list,
785 &conf_index)) != -1) {
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500786 switch (opt) {
787 case 'u':
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500788 if (use_uid)
789 errx(1, "-u provided multiple times.");
Luis Hector Chavez8ddef8f2019-01-02 08:40:54 -0800790 use_uid = true;
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500791 set_user(j, optarg, &uid, &gid);
792 break;
793 case 'g':
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500794 if (use_gid)
795 errx(1, "-g provided multiple times.");
Luis Hector Chavez8ddef8f2019-01-02 08:40:54 -0800796 use_gid = true;
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500797 set_group(j, optarg, &gid);
798 break;
799 case 'n':
800 minijail_no_new_privs(j);
801 break;
802 case 's':
Nicole Anderson-Auc1118f62021-09-14 22:20:23 +0000803 if (seccomp != None && seccomp != Strict) {
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500804 errx(1, "Do not use -s, -S, or "
805 "--seccomp-bpf-binary together");
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500806 }
Nicole Anderson-Auc1118f62021-09-14 22:20:23 +0000807 seccomp = Strict;
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500808 minijail_use_seccomp(j);
809 break;
810 case 'S':
Nicole Anderson-Auc1118f62021-09-14 22:20:23 +0000811 if (seccomp != None && seccomp != Filter) {
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500812 errx(1, "Do not use -s, -S, or "
813 "--seccomp-bpf-binary together");
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500814 }
Nicole Anderson-Auc1118f62021-09-14 22:20:23 +0000815 seccomp = Filter;
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500816 minijail_use_seccomp_filter(j);
Luis Hector Chavezc3e17722018-10-16 20:43:12 -0700817 filter_path = optarg;
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500818 use_seccomp_filter = 1;
819 break;
820 case 'l':
821 minijail_namespace_ipc(j);
822 break;
823 case 'L':
Nicole Anderson-Auc1118f62021-09-14 22:20:23 +0000824 if (seccomp == BpfBinaryFilter) {
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500825 errx(1, "-L does not work with "
826 "--seccomp-bpf-binary");
Nicole Anderson-Auc1118f62021-09-14 22:20:23 +0000827 }
828 use_seccomp_log = 1;
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500829 minijail_log_seccomp_filter_failures(j);
830 break;
831 case 'b':
832 add_binding(j, optarg);
833 binding = 1;
834 break;
835 case 'B':
836 skip_securebits(j, optarg);
837 break;
838 case 'c':
839 caps = 1;
840 use_caps(j, optarg);
841 break;
842 case 'C':
843 use_chroot(j, optarg, &chroot, pivot_root);
844 break;
845 case 'k':
846 add_mount(j, optarg);
847 break;
848 case 'K':
Jorge Lucangeli Obes9e1ac372020-01-23 14:36:50 -0500849 remount_mode = optarg;
Jorge Lucangeli Obes93418062019-09-27 10:59:45 -0400850 change_remount = 1;
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500851 break;
852 case 'P':
853 use_pivot_root(j, optarg, &pivot_root, chroot);
854 break;
855 case 'f':
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500856 if (0 != minijail_write_pid_file(j, optarg))
857 errx(1, "Could not prepare pid file path");
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500858 break;
859 case 't':
860 minijail_namespace_vfs(j);
861 if (!tmp_size) {
862 /*
863 * Avoid clobbering |tmp_size| if it was already
864 * set.
865 */
866 tmp_size = DEFAULT_TMP_SIZE;
867 }
868 if (optarg != NULL &&
869 0 != parse_size(&tmp_size, optarg)) {
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500870 errx(1, "Invalid /tmp tmpfs size");
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500871 }
872 break;
873 case 'v':
874 minijail_namespace_vfs(j);
Jorge Lucangeli Obes9e1ac372020-01-23 14:36:50 -0500875 /*
876 * Set the default mount propagation in the command-line
877 * tool to MS_SLAVE.
878 *
879 * When executing the sandboxed program in a new mount
880 * namespace the Minijail library will by default
881 * remount all mounts with the MS_PRIVATE flag. While
882 * this is an appropriate, safe default for the library,
883 * MS_PRIVATE can be problematic: unmount events will
884 * not propagate into mountpoints marked as MS_PRIVATE.
885 * This means that if a mount is unmounted in the root
886 * mount namespace, it will not be unmounted in the
887 * non-root mount namespace.
888 * This in turn can be problematic because activity in
889 * the non-root mount namespace can now directly
890 * influence the root mount namespace (e.g. preventing
891 * re-mounts of said mount), which would be a privilege
892 * inversion.
893 *
894 * Setting the default in the command-line to MS_SLAVE
895 * will still prevent mounts from leaking out of the
896 * non-root mount namespace but avoid these
897 * privilege-inversion issues.
898 * For cases where mounts should not flow *into* the
899 * namespace either, the user can pass -Kprivate.
900 * Note that mounts are marked as MS_PRIVATE by default
901 * by the kernel, so unless the init process (like
902 * systemd) or something else marks them as shared, this
903 * won't do anything.
904 */
905 minijail_remount_mode(j, MS_SLAVE);
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500906 mount_ns = 1;
907 break;
908 case 'V':
909 minijail_namespace_enter_vfs(j, optarg);
910 break;
911 case 'r':
912 minijail_remount_proc_readonly(j);
913 break;
914 case 'G':
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500915 if (keep_suppl_gids)
916 errx(1, "-y and -G are not compatible");
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500917 minijail_inherit_usergroups(j);
918 inherit_suppl_gids = 1;
919 break;
920 case 'y':
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500921 if (inherit_suppl_gids)
922 errx(1, "-y and -G are not compatible");
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500923 minijail_keep_supplementary_gids(j);
924 keep_suppl_gids = 1;
925 break;
926 case 'N':
927 minijail_namespace_cgroups(j);
928 break;
929 case 'p':
930 minijail_namespace_pids(j);
931 break;
932 case 'e':
933 if (optarg)
934 minijail_namespace_enter_net(j, optarg);
935 else
936 minijail_namespace_net(j);
937 break;
938 case 'i':
939 *exit_immediately = 1;
940 break;
941 case 'H':
942 seccomp_filter_usage(argv[0]);
943 exit(0);
944 case 'I':
945 minijail_namespace_pids(j);
946 minijail_run_as_init(j);
947 break;
948 case 'U':
949 minijail_namespace_user(j);
950 minijail_namespace_pids(j);
951 break;
952 case 'm':
953 set_uidmap = 1;
954 if (uidmap) {
955 free(uidmap);
956 uidmap = NULL;
957 }
958 if (optarg)
Mike Frysinger1036cd82020-08-28 00:15:59 -0400959 uidmap = xstrdup(optarg);
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500960 break;
961 case 'M':
962 set_gidmap = 1;
963 if (gidmap) {
964 free(gidmap);
965 gidmap = NULL;
966 }
967 if (optarg)
Mike Frysinger1036cd82020-08-28 00:15:59 -0400968 gidmap = xstrdup(optarg);
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500969 break;
970 case 'a':
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500971 if (0 != minijail_use_alt_syscall(j, optarg))
972 errx(1, "Could not set alt-syscall table");
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500973 break;
974 case 'R':
975 add_rlimit(j, optarg);
976 break;
977 case 'T':
Mike Frysinger22dc3522022-07-07 19:24:13 -0400978 if (streq(optarg, "static"))
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500979 *elftype = ELFSTATIC;
Mike Frysinger22dc3522022-07-07 19:24:13 -0400980 else if (streq(optarg, "dynamic"))
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500981 *elftype = ELFDYNAMIC;
982 else {
Mike Frysinger68f7ccd2021-11-24 22:06:51 -0500983 errx(1, "ELF type must be 'static' or "
984 "'dynamic'");
Mike Frysinger5ef22ca2018-01-20 13:42:10 -0500985 }
986 break;
987 case 'w':
988 minijail_new_session_keyring(j);
989 break;
990 case 'Y':
991 minijail_set_seccomp_filter_tsync(j);
992 break;
993 case 'z':
994 forward = 0;
995 break;
996 case 'd':
997 minijail_namespace_vfs(j);
998 minijail_mount_dev(j);
999 break;
1000 /* Long options. */
Mike Frysinger4ffd7e52022-01-24 14:07:02 -05001001 case OPT_AMBIENT:
Mike Frysinger5ef22ca2018-01-20 13:42:10 -05001002 ambient_caps = 1;
1003 minijail_set_ambient_caps(j);
1004 break;
Mike Frysinger4ffd7e52022-01-24 14:07:02 -05001005 case OPT_UTS:
Mike Frysinger5ef22ca2018-01-20 13:42:10 -05001006 minijail_namespace_uts(j);
1007 if (optarg)
1008 minijail_namespace_set_hostname(j, optarg);
1009 break;
Mike Frysinger4ffd7e52022-01-24 14:07:02 -05001010 case OPT_LOGGING:
Mike Frysinger22dc3522022-07-07 19:24:13 -04001011 if (streq(optarg, "auto"))
Mike Frysinger3e6a12c2019-09-24 12:50:55 -04001012 log_to_stderr = -1;
Mike Frysinger22dc3522022-07-07 19:24:13 -04001013 else if (streq(optarg, "syslog"))
Mike Frysinger5ef22ca2018-01-20 13:42:10 -05001014 log_to_stderr = 0;
Mike Frysinger22dc3522022-07-07 19:24:13 -04001015 else if (streq(optarg, "stderr"))
Mike Frysinger5ef22ca2018-01-20 13:42:10 -05001016 log_to_stderr = 1;
Mike Frysinger68f7ccd2021-11-24 22:06:51 -05001017 else
Zi Lina9e72262022-01-11 03:22:21 +00001018 errx(1,
1019 "--logger must be 'syslog' or 'stderr'");
Mike Frysinger5ef22ca2018-01-20 13:42:10 -05001020 break;
Mike Frysinger4ffd7e52022-01-24 14:07:02 -05001021 case OPT_PROFILE:
Mike Frysinger5ef22ca2018-01-20 13:42:10 -05001022 use_profile(j, optarg, &pivot_root, chroot, &tmp_size);
1023 break;
Mike Frysinger4ffd7e52022-01-24 14:07:02 -05001024 case OPT_PRELOAD_LIBRARY:
Luis Hector Chavez9acba452018-10-11 10:13:25 -07001025 *preload_path = optarg;
1026 break;
Ben Scarlato485c8a02022-08-11 06:02:02 +00001027 case OPT_FS_DEFAULT_PATHS:
Ben Scarlatof6102622022-09-05 19:31:42 +00001028 minijail_enable_default_fs_restrictions(j);
Ben Scarlato485c8a02022-08-11 06:02:02 +00001029 break;
Ben Scarlatoee82b492022-08-09 18:33:25 +00001030 case OPT_FS_PATH_RX:
1031 minijail_add_fs_restriction_rx(j, optarg);
1032 break;
1033 case OPT_FS_PATH_RO:
1034 minijail_add_fs_restriction_ro(j, optarg);
1035 break;
1036 case OPT_FS_PATH_RW:
1037 minijail_add_fs_restriction_rw(j, optarg);
1038 break;
1039 case OPT_FS_PATH_ADVANCED_RW:
1040 minijail_add_fs_restriction_advanced_rw(j, optarg);
1041 break;
Mike Frysinger4ffd7e52022-01-24 14:07:02 -05001042 case OPT_SECCOMP_BPF_BINARY:
Nicole Anderson-Auc1118f62021-09-14 22:20:23 +00001043 if (seccomp != None && seccomp != BpfBinaryFilter) {
Mike Frysinger68f7ccd2021-11-24 22:06:51 -05001044 errx(1, "Do not use -s, -S, or "
1045 "--seccomp-bpf-binary together");
Luis Hector Chavezc3e17722018-10-16 20:43:12 -07001046 }
Mike Frysinger68f7ccd2021-11-24 22:06:51 -05001047 if (use_seccomp_log == 1)
Zi Linfdc98d62022-01-19 22:10:29 +00001048 errx(1, "-L does not work with "
1049 "--seccomp-bpf-binary");
Nicole Anderson-Auc1118f62021-09-14 22:20:23 +00001050 seccomp = BpfBinaryFilter;
Luis Hector Chavezc3e17722018-10-16 20:43:12 -07001051 minijail_use_seccomp_filter(j);
1052 filter_path = optarg;
1053 use_seccomp_filter_binary = 1;
1054 break;
Mike Frysinger4ffd7e52022-01-24 14:07:02 -05001055 case OPT_ADD_SUPPL_GROUP:
Zi Linfdc98d62022-01-19 22:10:29 +00001056 suppl_group_add(&suppl_gids_count, &suppl_gids, optarg);
Stéphane Lesimple8d7174b2020-02-07 20:51:08 +01001057 break;
Mike Frysinger4ffd7e52022-01-24 14:07:02 -05001058 case OPT_ALLOW_SPECULATIVE_EXECUTION:
Anand K Mistry31adc6c2020-11-26 11:39:46 +11001059 minijail_set_seccomp_filter_allow_speculation(j);
1060 break;
Mike Frysinger4ffd7e52022-01-24 14:07:02 -05001061 case OPT_CONFIG: {
Zi Lin44461c72021-11-16 18:37:27 +00001062 if (conf_entry_list != NULL) {
Zi Linfdc98d62022-01-19 22:10:29 +00001063 errx(1, "Nested config file specification is "
1064 "not allowed.");
Zi Lin44461c72021-11-16 18:37:27 +00001065 }
1066 conf_entry_list = new_config_entry_list();
1067 conf_index = 0;
Zi Lina9e72262022-01-11 03:22:21 +00001068#if defined(BLOCK_NOEXEC_CONF)
1069 /*
Zi Linfdc98d62022-01-19 22:10:29 +00001070 * Check the conf file is in a exec mount.
1071 * With a W^X invariant, it excludes writable
1072 * mounts.
1073 */
Zi Lina9e72262022-01-11 03:22:21 +00001074 struct statfs conf_statfs;
1075 if (statfs(optarg, &conf_statfs) != 0)
1076 err(1, "statfs(%s) failed.", optarg);
1077 if ((conf_statfs.f_flags & MS_NOEXEC) != 0)
1078 errx(1,
1079 "Conf file must be in a exec "
1080 "mount: %s",
1081 optarg);
1082#endif
1083#if defined(ENFORCE_ROOTFS_CONF)
Zi Linfdc98d62022-01-19 22:10:29 +00001084 /* Make sure the conf file is in the same device as the
1085 * rootfs. */
Zi Lina9e72262022-01-11 03:22:21 +00001086 struct stat root_stat;
1087 struct stat conf_stat;
1088 if (stat("/", &root_stat) != 0)
1089 err(1, "stat(/) failed.");
1090 if (stat(optarg, &conf_stat) != 0)
1091 err(1, "stat(%s) failed.", optarg);
1092 if (root_stat.st_dev != conf_stat.st_dev)
1093 errx(1, "Conf file must be in the rootfs.");
1094#endif
Zi Lin44461c72021-11-16 18:37:27 +00001095 attribute_cleanup_fp FILE *config_file =
1096 fopen(optarg, "re");
Mike Frysinger68f7ccd2021-11-24 22:06:51 -05001097 if (!config_file)
Zi Lina9e72262022-01-11 03:22:21 +00001098 err(1, "Failed to open %s", optarg);
Zi Lin44461c72021-11-16 18:37:27 +00001099 if (!parse_config_file(config_file, conf_entry_list)) {
Zi Linfdc98d62022-01-19 22:10:29 +00001100 errx(
1101 1,
1102 "Unable to parse %s as Minijail conf file, "
1103 "please refer to minijail0(5) for syntax "
1104 "and examples.",
1105 optarg);
Zi Lin44461c72021-11-16 18:37:27 +00001106 }
1107 break;
1108 }
Stéphane Lesimplef65da3a2022-01-11 11:44:47 +01001109 case OPT_ENV_ADD:
1110 /*
1111 * We either copy our current env to the child env
1112 * then add the requested envvar to it, or just
1113 * add the requested envvar to the already existing
1114 * envp.
1115 */
1116 set_child_env(envp, optarg, environ);
1117 break;
1118 case OPT_ENV_RESET:
1119 if (*envp && *envp != environ) {
1120 /*
1121 * We already started to initialize the future
1122 * child env, because we got some --env-add
1123 * earlier on the command-line, so first,
1124 * free the memory we allocated.
1125 * If |*envp| happens to point to |environ|,
1126 * don't attempt to free it.
1127 */
1128 minijail_free_env(*envp);
1129 }
1130 /* Allocate an empty environment for the child. */
1131 *envp = calloc(1, sizeof(char *));
1132 if (!*envp)
1133 err(1, "Failed to allocate memory.");
1134 break;
Mike Frysinger5ef22ca2018-01-20 13:42:10 -05001135 default:
1136 usage(argv[0]);
1137 exit(opt == 'h' ? 0 : 1);
1138 }
1139 }
1140
Mike Frysinger3e6a12c2019-09-24 12:50:55 -04001141 if (log_to_stderr == -1) {
1142 /* Autodetect default logging output. */
Mike Frysinger056955c2019-09-24 16:07:05 -04001143 log_to_stderr = isatty(STDIN_FILENO) ? 1 : 0;
Mike Frysinger3e6a12c2019-09-24 12:50:55 -04001144 }
Mike Frysinger5ef22ca2018-01-20 13:42:10 -05001145 if (log_to_stderr) {
1146 init_logging(LOG_TO_FD, STDERR_FILENO, LOG_INFO);
1147 /*
1148 * When logging to stderr, ensure the FD survives the jailing.
1149 */
1150 if (0 !=
1151 minijail_preserve_fd(j, STDERR_FILENO, STDERR_FILENO)) {
Mike Frysinger68f7ccd2021-11-24 22:06:51 -05001152 errx(1, "Could not preserve stderr");
Mike Frysinger5ef22ca2018-01-20 13:42:10 -05001153 }
1154 }
1155
1156 /* Set up uid/gid mapping. */
1157 if (set_uidmap || set_gidmap) {
1158 set_ugid_mapping(j, set_uidmap, uid, uidmap, set_gidmap, gid,
1159 gidmap);
1160 }
1161
1162 /* Can only set ambient caps when using regular caps. */
1163 if (ambient_caps && !caps) {
Mike Frysinger68f7ccd2021-11-24 22:06:51 -05001164 errx(1, "Can't set ambient capabilities (--ambient) "
1165 "without actually using capabilities (-c)");
Mike Frysinger5ef22ca2018-01-20 13:42:10 -05001166 }
1167
1168 /* Set up signal handlers in minijail unless asked not to. */
1169 if (forward)
1170 minijail_forward_signals(j);
1171
1172 /*
1173 * Only allow bind mounts when entering a chroot, using pivot_root, or
1174 * a new mount namespace.
1175 */
1176 if (binding && !(chroot || pivot_root || mount_ns)) {
Mike Frysinger68f7ccd2021-11-24 22:06:51 -05001177 errx(1, "Bind mounts require a chroot, pivot_root, or "
1178 " new mount namespace");
Mike Frysinger5ef22ca2018-01-20 13:42:10 -05001179 }
1180
1181 /*
Jorge Lucangeli Obes93418062019-09-27 10:59:45 -04001182 * / is only remounted when entering a new mount namespace, so unless
1183 * that's set there is no need for the -K/-K<mode> flags.
Mike Frysinger5ef22ca2018-01-20 13:42:10 -05001184 */
Jorge Lucangeli Obes93418062019-09-27 10:59:45 -04001185 if (change_remount && !mount_ns) {
Mike Frysinger68f7ccd2021-11-24 22:06:51 -05001186 errx(1, "No need to use -K (skip remounting '/') or "
Mike Frysingerdac801f2022-01-14 13:31:11 -05001187 "-K<mode> (remount '/' as <mode>) "
Mike Frysinger68f7ccd2021-11-24 22:06:51 -05001188 "without -v (new mount namespace).\n"
Mike Frysingerdac801f2022-01-14 13:31:11 -05001189 "Do you need to add '-v' explicitly?");
Mike Frysinger5ef22ca2018-01-20 13:42:10 -05001190 }
1191
Jorge Lucangeli Obes9e1ac372020-01-23 14:36:50 -05001192 /* Configure the remount flag here to avoid having -v override it. */
1193 if (change_remount) {
1194 if (remount_mode != NULL) {
1195 set_remount_mode(j, remount_mode);
1196 } else {
1197 minijail_skip_remount_private(j);
1198 }
1199 }
1200
Mike Frysinger5ef22ca2018-01-20 13:42:10 -05001201 /*
Stéphane Lesimple8d7174b2020-02-07 20:51:08 +01001202 * Proceed in setting the supplementary gids specified on the
1203 * cmdline options.
1204 */
1205 if (suppl_gids_count) {
1206 minijail_set_supplementary_gids(j, suppl_gids_count,
Zi Linfdc98d62022-01-19 22:10:29 +00001207 suppl_gids);
Stéphane Lesimple8d7174b2020-02-07 20:51:08 +01001208 free(suppl_gids);
1209 }
1210
1211 /*
Mike Frysinger5ef22ca2018-01-20 13:42:10 -05001212 * We parse seccomp filters here to make sure we've collected all
1213 * cmdline options.
1214 */
1215 if (use_seccomp_filter) {
1216 minijail_parse_seccomp_filters(j, filter_path);
Luis Hector Chavezc3e17722018-10-16 20:43:12 -07001217 } else if (use_seccomp_filter_binary) {
1218 struct sock_fprog filter;
1219 read_seccomp_filter(filter_path, &filter);
1220 minijail_set_seccomp_filters(j, &filter);
1221 free((void *)filter.filter);
Mike Frysinger5ef22ca2018-01-20 13:42:10 -05001222 }
1223
1224 /* Mount a tmpfs under /tmp and set its size. */
1225 if (tmp_size)
1226 minijail_mount_tmp_size(j, tmp_size);
1227
1228 /*
Stéphane Lesimplef65da3a2022-01-11 11:44:47 +01001229 * Copy our current env to the child if its |*envp| has not
1230 * already been initialized from --env-(reset|add) usage.
1231 */
1232 if (!*envp) {
1233 *envp = minijail_copy_env(environ);
1234 if (!*envp)
1235 err(1, "Failed to allocate memory.");
1236 }
1237
1238 /*
Mike Frysinger5ef22ca2018-01-20 13:42:10 -05001239 * There should be at least one additional unparsed argument: the
1240 * executable name.
1241 */
1242 if (argc == optind) {
1243 usage(argv[0]);
1244 exit(1);
1245 }
1246
1247 if (*elftype == ELFERROR) {
1248 /*
1249 * -T was not specified.
1250 * Get the path to the program adjusted for changing root.
1251 */
1252 char *program_path =
1253 minijail_get_original_path(j, argv[optind]);
1254
1255 /* Check that we can access the target program. */
1256 if (access(program_path, X_OK)) {
Mike Frysinger68f7ccd2021-11-24 22:06:51 -05001257 errx(1, "Target program '%s' is not accessible",
Zi Lina9e72262022-01-11 03:22:21 +00001258 argv[optind]);
Mike Frysinger5ef22ca2018-01-20 13:42:10 -05001259 }
1260
1261 /* Check if target is statically or dynamically linked. */
1262 *elftype = get_elf_linkage(program_path);
1263 free(program_path);
1264 }
1265
1266 /*
1267 * Setting capabilities need either a dynamically-linked binary, or the
1268 * use of ambient capabilities for them to be able to survive an
1269 * execve(2).
1270 */
1271 if (caps && *elftype == ELFSTATIC && !ambient_caps) {
Mike Frysinger68f7ccd2021-11-24 22:06:51 -05001272 errx(1, "Can't run statically-linked binaries with capabilities"
1273 " (-c) without also setting ambient capabilities. "
1274 "Try passing --ambient.");
Mike Frysinger5ef22ca2018-01-20 13:42:10 -05001275 }
1276
1277 return optind;
1278}