blob: ba869f39eaaf78965ed08334c1f48e39798ba0ae [file] [log] [blame]
Dylan Reid837c74a2016-01-22 17:25:21 -08001/* Copyright 2016 The Chromium OS Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file.
4 */
5
6#define _GNU_SOURCE /* For asprintf */
7
8#include <errno.h>
9#include <fcntl.h>
Mike Frysinger05e594e2017-01-10 02:11:08 -050010#if USE_device_mapper
11#include <libdevmapper.h>
12#endif
Dylan Reid837c74a2016-01-22 17:25:21 -080013#include <malloc.h>
14#include <signal.h>
Luis Hector Chavezff5978f2017-06-27 12:52:58 -070015#include <stdint.h>
Dylan Reid837c74a2016-01-22 17:25:21 -080016#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19#include <sys/mount.h>
20#include <sys/stat.h>
21#include <sys/types.h>
Dylan Reid2bd9ea92016-04-07 20:57:47 -070022#include <sys/wait.h>
Dylan Reid837c74a2016-01-22 17:25:21 -080023#include <unistd.h>
24
Mike Frysinger412dbd22017-01-06 01:50:34 -050025#include <linux/loop.h>
26
Dylan Reid837c74a2016-01-22 17:25:21 -080027#include "container_cgroup.h"
28#include "libcontainer.h"
29#include "libminijail.h"
30
Luis Hector Chavez479b95f2016-06-06 08:01:05 -070031#define FREE_AND_NULL(ptr) \
32do { \
33 free(ptr); \
34 ptr = NULL; \
35} while(0)
36
Yusuke Sato91f11f02016-12-02 16:15:13 -080037#define MAX_NUM_SETFILES_ARGS 128
Dylan Reid93fa4602017-06-06 13:39:31 -070038#define MAX_RLIMITS 32 // Linux defines 15 at the time of writing.
Yusuke Sato91f11f02016-12-02 16:15:13 -080039
Mike Frysinger412dbd22017-01-06 01:50:34 -050040static const char loopdev_ctl[] = "/dev/loop-control";
Mike Frysinger05e594e2017-01-10 02:11:08 -050041#if USE_device_mapper
42static const char dm_dev_prefix[] = "/dev/mapper/";
43#endif
Mike Frysinger412dbd22017-01-06 01:50:34 -050044
Luis Hector Chavez945af482016-06-03 08:39:34 -070045static int container_teardown(struct container *c);
46
Luis Hector Chavez479b95f2016-06-06 08:01:05 -070047static int strdup_and_free(char **dest, const char *src)
48{
49 char *copy = strdup(src);
50 if (!copy)
51 return -ENOMEM;
52 if (*dest)
53 free(*dest);
54 *dest = copy;
55 return 0;
56}
57
Dylan Reid837c74a2016-01-22 17:25:21 -080058struct container_mount {
59 char *name;
60 char *source;
61 char *destination;
62 char *type;
63 char *data;
Mike Frysinger05e594e2017-01-10 02:11:08 -050064 char *verity;
Dylan Reid837c74a2016-01-22 17:25:21 -080065 int flags;
66 int uid;
67 int gid;
68 int mode;
69 int mount_in_ns; /* True if mount should happen in new vfs ns */
70 int create; /* True if target should be created if it doesn't exist */
Mike Frysinger412dbd22017-01-06 01:50:34 -050071 int loopback; /* True if target should be mounted via loopback */
Dylan Reid837c74a2016-01-22 17:25:21 -080072};
73
74struct container_device {
75 char type; /* 'c' or 'b' for char or block */
76 char *path;
77 int fs_permissions;
78 int major;
79 int minor;
Dylan Reid355d5e42016-04-29 16:53:31 -070080 int copy_minor; /* Copy the minor from existing node, ignores |minor| */
Dylan Reid837c74a2016-01-22 17:25:21 -080081 int uid;
82 int gid;
Dylan Reid4843d6b2017-03-31 18:14:30 -070083};
84
85struct container_cgroup_device {
86 int allow;
87 char type;
88 int major; /* -1 means all */
89 int minor; /* -1 means all */
90 int read;
91 int write;
92 int modify;
Dylan Reid837c74a2016-01-22 17:25:21 -080093};
94
Chinyue Chenfac909e2016-06-24 14:17:42 +080095struct container_cpu_cgroup {
96 int shares;
97 int quota;
98 int period;
99 int rt_runtime;
100 int rt_period;
101};
102
Dylan Reid93fa4602017-06-06 13:39:31 -0700103struct container_rlimit {
104 int type;
105 uint32_t cur;
106 uint32_t max;
107};
108
Dylan Reid837c74a2016-01-22 17:25:21 -0800109/*
110 * Structure that configures how the container is run.
111 *
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500112 * config_root - Path to the root of the container itself.
Dylan Reid837c74a2016-01-22 17:25:21 -0800113 * rootfs - Path to the root of the container's filesystem.
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -0700114 * rootfs_mount_flags - Flags that will be passed to mount() for the rootfs.
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700115 * premounted_runfs - Path to where the container will be run.
116 * pid_file_path - Path to the file where the pid should be written.
Dylan Reid837c74a2016-01-22 17:25:21 -0800117 * program_argv - The program to run and args, e.g. "/sbin/init".
118 * num_args - Number of args in program_argv.
Dylan Reid1874feb2016-06-22 17:53:50 -0700119 * uid - The uid the container will run as.
Dylan Reid837c74a2016-01-22 17:25:21 -0800120 * uid_map - Mapping of UIDs in the container, e.g. "0 100000 1024"
Dylan Reid1874feb2016-06-22 17:53:50 -0700121 * gid - The gid the container will run as.
Dylan Reid837c74a2016-01-22 17:25:21 -0800122 * gid_map - Mapping of GIDs in the container, e.g. "0 100000 1024"
123 * alt_syscall_table - Syscall table to use or NULL if none.
124 * mounts - Filesystems to mount in the new namespace.
125 * num_mounts - Number of above.
126 * devices - Device nodes to create.
127 * num_devices - Number of above.
Dylan Reid4843d6b2017-03-31 18:14:30 -0700128 * cgroup_devices - Device node cgroup permissions.
129 * num_cgroup_devices - Number of above.
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700130 * run_setfiles - Should run setfiles on mounts to enable selinux.
Chinyue Chenfac909e2016-06-24 14:17:42 +0800131 * cpu_cgparams - CPU cgroup params.
Dylan Reid9e724af2016-07-21 09:58:07 -0700132 * cgroup_parent - Parent dir for cgroup creation
133 * cgroup_owner - uid to own the created cgroups
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700134 * cgroup_group - gid to own the created cgroups
Keshav Santhanam1b6bf672016-08-10 18:35:12 -0700135 * share_host_netns - Enable sharing of the host network namespace.
Dylan Reidc4335842016-11-11 10:24:52 -0800136 * keep_fds_open - Allow the child process to keep open FDs (for stdin/out/err).
Dylan Reid93fa4602017-06-06 13:39:31 -0700137 * rlimits - Array of rlimits for the contained process.
138 * num_rlimits - The number of elements in `rlimits`.
Luis Hector Chavezcd44ba72017-06-30 13:01:38 -0700139 * securebits_skip_mask - The mask of securebits to skip when restricting caps.
Luis Hector Chavezdac65c32017-07-21 10:30:23 -0700140 * do_init - Whether the container needs an extra process to be run as init.
Dylan Reid837c74a2016-01-22 17:25:21 -0800141 */
142struct container_config {
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500143 char *config_root;
Dylan Reid837c74a2016-01-22 17:25:21 -0800144 char *rootfs;
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -0700145 unsigned long rootfs_mount_flags;
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700146 char *premounted_runfs;
147 char *pid_file_path;
Dylan Reid837c74a2016-01-22 17:25:21 -0800148 char **program_argv;
149 size_t num_args;
Dylan Reid1874feb2016-06-22 17:53:50 -0700150 uid_t uid;
Dylan Reid837c74a2016-01-22 17:25:21 -0800151 char *uid_map;
Dylan Reid1874feb2016-06-22 17:53:50 -0700152 gid_t gid;
Dylan Reid837c74a2016-01-22 17:25:21 -0800153 char *gid_map;
154 char *alt_syscall_table;
155 struct container_mount *mounts;
156 size_t num_mounts;
157 struct container_device *devices;
158 size_t num_devices;
Dylan Reid4843d6b2017-03-31 18:14:30 -0700159 struct container_cgroup_device *cgroup_devices;
160 size_t num_cgroup_devices;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700161 char *run_setfiles;
Chinyue Chenfac909e2016-06-24 14:17:42 +0800162 struct container_cpu_cgroup cpu_cgparams;
Dylan Reid9e724af2016-07-21 09:58:07 -0700163 char *cgroup_parent;
164 uid_t cgroup_owner;
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700165 gid_t cgroup_group;
Keshav Santhanam1b6bf672016-08-10 18:35:12 -0700166 int share_host_netns;
Dylan Reidc4335842016-11-11 10:24:52 -0800167 int keep_fds_open;
Dylan Reid93fa4602017-06-06 13:39:31 -0700168 struct container_rlimit rlimits[MAX_RLIMITS];
169 int num_rlimits;
Luis Hector Chavezff5978f2017-06-27 12:52:58 -0700170 int use_capmask;
171 int use_capmask_ambient;
172 uint64_t capmask;
Luis Hector Chavezcd44ba72017-06-30 13:01:38 -0700173 uint64_t securebits_skip_mask;
Luis Hector Chavezdac65c32017-07-21 10:30:23 -0700174 int do_init;
Dylan Reid837c74a2016-01-22 17:25:21 -0800175};
176
177struct container_config *container_config_create()
178{
179 return calloc(1, sizeof(struct container_config));
180}
181
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700182static void container_free_program_args(struct container_config *c)
183{
184 int i;
185
186 if (!c->program_argv)
187 return;
188 for (i = 0; i < c->num_args; ++i) {
189 FREE_AND_NULL(c->program_argv[i]);
190 }
191 FREE_AND_NULL(c->program_argv);
192}
193
194static void container_config_free_mount(struct container_mount *mount)
195{
196 FREE_AND_NULL(mount->name);
197 FREE_AND_NULL(mount->source);
198 FREE_AND_NULL(mount->destination);
199 FREE_AND_NULL(mount->type);
200 FREE_AND_NULL(mount->data);
201}
202
203static void container_config_free_device(struct container_device *device)
204{
205 FREE_AND_NULL(device->path);
206}
207
Dylan Reid837c74a2016-01-22 17:25:21 -0800208void container_config_destroy(struct container_config *c)
209{
210 size_t i;
211
212 if (c == NULL)
213 return;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700214 FREE_AND_NULL(c->rootfs);
215 container_free_program_args(c);
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700216 FREE_AND_NULL(c->premounted_runfs);
217 FREE_AND_NULL(c->pid_file_path);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700218 FREE_AND_NULL(c->uid_map);
219 FREE_AND_NULL(c->gid_map);
220 FREE_AND_NULL(c->alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -0800221 for (i = 0; i < c->num_mounts; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700222 container_config_free_mount(&c->mounts[i]);
Dylan Reid837c74a2016-01-22 17:25:21 -0800223 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700224 FREE_AND_NULL(c->mounts);
Dylan Reid837c74a2016-01-22 17:25:21 -0800225 for (i = 0; i < c->num_devices; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700226 container_config_free_device(&c->devices[i]);
Dylan Reid837c74a2016-01-22 17:25:21 -0800227 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700228 FREE_AND_NULL(c->devices);
Dylan Reida34f8162017-05-10 11:33:11 -0700229 FREE_AND_NULL(c->cgroup_devices);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700230 FREE_AND_NULL(c->run_setfiles);
Dylan Reid9e724af2016-07-21 09:58:07 -0700231 FREE_AND_NULL(c->cgroup_parent);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700232 FREE_AND_NULL(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800233}
234
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500235int container_config_config_root(struct container_config *c,
236 const char *config_root)
237{
238 return strdup_and_free(&c->config_root, config_root);
239}
240
241const char *container_config_get_config_root(const struct container_config *c)
242{
243 return c->config_root;
244}
245
Dylan Reid837c74a2016-01-22 17:25:21 -0800246int container_config_rootfs(struct container_config *c, const char *rootfs)
247{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700248 return strdup_and_free(&c->rootfs, rootfs);
Dylan Reid837c74a2016-01-22 17:25:21 -0800249}
250
Dylan Reid11456722016-05-02 11:24:50 -0700251const char *container_config_get_rootfs(const struct container_config *c)
252{
253 return c->rootfs;
254}
255
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -0700256void container_config_rootfs_mount_flags(struct container_config *c,
257 unsigned long rootfs_mount_flags)
258{
259 /* Since we are going to add MS_REMOUNT anyways, add it here so we can
260 * simply check against zero later. MS_BIND is also added to avoid
261 * re-mounting the original filesystem, since the rootfs is always
262 * bind-mounted.
263 */
264 c->rootfs_mount_flags = MS_REMOUNT | MS_BIND | rootfs_mount_flags;
265}
266
267unsigned long container_config_get_rootfs_mount_flags(
268 const struct container_config *c)
269{
270 return c->rootfs_mount_flags;
271}
272
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700273int container_config_premounted_runfs(struct container_config *c, const char *runfs)
274{
275 return strdup_and_free(&c->premounted_runfs, runfs);
276}
277
278const char *container_config_get_premounted_runfs(const struct container_config *c)
279{
280 return c->premounted_runfs;
281}
282
283int container_config_pid_file(struct container_config *c, const char *path)
284{
285 return strdup_and_free(&c->pid_file_path, path);
286}
287
288const char *container_config_get_pid_file(const struct container_config *c)
289{
290 return c->pid_file_path;
291}
292
Dylan Reid837c74a2016-01-22 17:25:21 -0800293int container_config_program_argv(struct container_config *c,
Dylan Reid17fd53f2016-11-18 19:14:41 -0800294 const char **argv, size_t num_args)
Dylan Reid837c74a2016-01-22 17:25:21 -0800295{
296 size_t i;
297
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700298 container_free_program_args(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800299 c->num_args = num_args;
300 c->program_argv = calloc(num_args + 1, sizeof(char *));
301 if (!c->program_argv)
302 return -ENOMEM;
303 for (i = 0; i < num_args; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700304 if (strdup_and_free(&c->program_argv[i], argv[i]))
305 goto error_free_return;
Dylan Reid837c74a2016-01-22 17:25:21 -0800306 }
307 c->program_argv[num_args] = NULL;
308 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700309
310error_free_return:
311 container_free_program_args(c);
312 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800313}
314
Dylan Reid11456722016-05-02 11:24:50 -0700315size_t container_config_get_num_program_args(const struct container_config *c)
316{
317 return c->num_args;
318}
319
320const char *container_config_get_program_arg(const struct container_config *c,
321 size_t index)
322{
323 if (index >= c->num_args)
324 return NULL;
325 return c->program_argv[index];
326}
327
Dylan Reid1874feb2016-06-22 17:53:50 -0700328void container_config_uid(struct container_config *c, uid_t uid)
329{
330 c->uid = uid;
331}
332
333uid_t container_config_get_uid(const struct container_config *c)
334{
335 return c->uid;
336}
337
Dylan Reid837c74a2016-01-22 17:25:21 -0800338int container_config_uid_map(struct container_config *c, const char *uid_map)
339{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700340 return strdup_and_free(&c->uid_map, uid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800341}
342
Dylan Reid1874feb2016-06-22 17:53:50 -0700343void container_config_gid(struct container_config *c, gid_t gid)
344{
345 c->gid = gid;
346}
347
348gid_t container_config_get_gid(const struct container_config *c)
349{
350 return c->gid;
351}
352
Dylan Reid837c74a2016-01-22 17:25:21 -0800353int container_config_gid_map(struct container_config *c, const char *gid_map)
354{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700355 return strdup_and_free(&c->gid_map, gid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800356}
357
358int container_config_alt_syscall_table(struct container_config *c,
359 const char *alt_syscall_table)
360{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700361 return strdup_and_free(&c->alt_syscall_table, alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -0800362}
363
Dylan Reid93fa4602017-06-06 13:39:31 -0700364int container_config_add_rlimit(struct container_config *c, int type,
365 uint32_t cur, uint32_t max)
366{
367 if (c->num_rlimits >= MAX_RLIMITS) {
368 return -ENOMEM;
369 }
370 c->rlimits[c->num_rlimits].type = type;
371 c->rlimits[c->num_rlimits].cur = cur;
372 c->rlimits[c->num_rlimits].max = max;
373 c->num_rlimits++;
374 return 0;
375}
376
Dylan Reid837c74a2016-01-22 17:25:21 -0800377int container_config_add_mount(struct container_config *c,
378 const char *name,
379 const char *source,
380 const char *destination,
381 const char *type,
382 const char *data,
Mike Frysinger05e594e2017-01-10 02:11:08 -0500383 const char *verity,
Dylan Reid837c74a2016-01-22 17:25:21 -0800384 int flags,
385 int uid,
386 int gid,
387 int mode,
388 int mount_in_ns,
Mike Frysinger412dbd22017-01-06 01:50:34 -0500389 int create,
390 int loopback)
Dylan Reid837c74a2016-01-22 17:25:21 -0800391{
392 struct container_mount *mount_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700393 struct container_mount *current_mount;
Dylan Reid837c74a2016-01-22 17:25:21 -0800394
395 if (name == NULL || source == NULL ||
396 destination == NULL || type == NULL)
397 return -EINVAL;
398
399 mount_ptr = realloc(c->mounts,
400 sizeof(c->mounts[0]) * (c->num_mounts + 1));
401 if (!mount_ptr)
402 return -ENOMEM;
403 c->mounts = mount_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700404 current_mount = &c->mounts[c->num_mounts];
405 memset(current_mount, 0, sizeof(struct container_mount));
406
407 if (strdup_and_free(&current_mount->name, name))
408 goto error_free_return;
409 if (strdup_and_free(&current_mount->source, source))
410 goto error_free_return;
411 if (strdup_and_free(&current_mount->destination, destination))
412 goto error_free_return;
413 if (strdup_and_free(&current_mount->type, type))
414 goto error_free_return;
415 if (data && strdup_and_free(&current_mount->data, data))
416 goto error_free_return;
Mike Frysinger05e594e2017-01-10 02:11:08 -0500417 if (verity && strdup_and_free(&current_mount->verity, verity))
418 goto error_free_return;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700419 current_mount->flags = flags;
420 current_mount->uid = uid;
421 current_mount->gid = gid;
422 current_mount->mode = mode;
423 current_mount->mount_in_ns = mount_in_ns;
424 current_mount->create = create;
Mike Frysinger412dbd22017-01-06 01:50:34 -0500425 current_mount->loopback = loopback;
Dylan Reid837c74a2016-01-22 17:25:21 -0800426 ++c->num_mounts;
427 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700428
429error_free_return:
430 container_config_free_mount(current_mount);
431 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800432}
433
Dylan Reid4843d6b2017-03-31 18:14:30 -0700434int container_config_add_cgroup_device(struct container_config *c,
435 int allow,
436 char type,
437 int major,
438 int minor,
439 int read,
440 int write,
441 int modify)
442{
443 struct container_cgroup_device *dev_ptr;
444 struct container_cgroup_device *current_dev;
445
446 dev_ptr = realloc(c->cgroup_devices,
447 sizeof(c->cgroup_devices[0]) *
448 (c->num_cgroup_devices + 1));
449 if (!dev_ptr)
450 return -ENOMEM;
451 c->cgroup_devices = dev_ptr;
452
453 current_dev = &c->cgroup_devices[c->num_cgroup_devices];
454 memset(current_dev, 0, sizeof(struct container_cgroup_device));
455 current_dev->allow = allow;
456 current_dev->type = type;
457 current_dev->major = major;
458 current_dev->minor = minor;
459 current_dev->read = read;
460 current_dev->write = write;
461 current_dev->modify = modify;
462 ++c->num_cgroup_devices;
463
464 return 0;
465}
466
Dylan Reid837c74a2016-01-22 17:25:21 -0800467int container_config_add_device(struct container_config *c,
468 char type,
469 const char *path,
470 int fs_permissions,
471 int major,
472 int minor,
Dylan Reid355d5e42016-04-29 16:53:31 -0700473 int copy_minor,
Dylan Reid837c74a2016-01-22 17:25:21 -0800474 int uid,
475 int gid,
476 int read_allowed,
477 int write_allowed,
478 int modify_allowed)
479{
480 struct container_device *dev_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700481 struct container_device *current_dev;
Dylan Reid837c74a2016-01-22 17:25:21 -0800482
483 if (path == NULL)
484 return -EINVAL;
Dylan Reid355d5e42016-04-29 16:53:31 -0700485 /* If using a dynamic minor number, ensure that minor is -1. */
486 if (copy_minor && (minor != -1))
487 return -EINVAL;
488
Dylan Reid837c74a2016-01-22 17:25:21 -0800489 dev_ptr = realloc(c->devices,
490 sizeof(c->devices[0]) * (c->num_devices + 1));
491 if (!dev_ptr)
492 return -ENOMEM;
493 c->devices = dev_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700494 current_dev = &c->devices[c->num_devices];
495 memset(current_dev, 0, sizeof(struct container_device));
496
497 current_dev->type = type;
498 if (strdup_and_free(&current_dev->path, path))
499 goto error_free_return;
500 current_dev->fs_permissions = fs_permissions;
501 current_dev->major = major;
502 current_dev->minor = minor;
503 current_dev->copy_minor = copy_minor;
504 current_dev->uid = uid;
505 current_dev->gid = gid;
Dylan Reid4843d6b2017-03-31 18:14:30 -0700506 if (read_allowed || write_allowed || modify_allowed) {
507 if (container_config_add_cgroup_device(c,
508 1,
509 type,
510 major,
511 minor,
512 read_allowed,
513 write_allowed,
514 modify_allowed))
515 goto error_free_return;
516 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800517 ++c->num_devices;
518 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700519
520error_free_return:
521 container_config_free_device(current_dev);
522 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800523}
524
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700525int container_config_run_setfiles(struct container_config *c,
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700526 const char *setfiles_cmd)
527{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700528 return strdup_and_free(&c->run_setfiles, setfiles_cmd);
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700529}
Dylan Reid837c74a2016-01-22 17:25:21 -0800530
Dylan Reid11456722016-05-02 11:24:50 -0700531const char *container_config_get_run_setfiles(const struct container_config *c)
532{
533 return c->run_setfiles;
534}
535
Chinyue Chenfac909e2016-06-24 14:17:42 +0800536int container_config_set_cpu_shares(struct container_config *c, int shares)
537{
538 /* CPU shares must be 2 or higher. */
539 if (shares < 2)
540 return -EINVAL;
541
542 c->cpu_cgparams.shares = shares;
543 return 0;
544}
545
546int container_config_set_cpu_cfs_params(struct container_config *c,
547 int quota,
548 int period)
549{
550 /*
551 * quota could be set higher than period to utilize more than one CPU.
552 * quota could also be set as -1 to indicate the cgroup does not adhere
553 * to any CPU time restrictions.
554 */
555 if (quota <= 0 && quota != -1)
556 return -EINVAL;
557 if (period <= 0)
558 return -EINVAL;
559
560 c->cpu_cgparams.quota = quota;
561 c->cpu_cgparams.period = period;
562 return 0;
563}
564
565int container_config_set_cpu_rt_params(struct container_config *c,
566 int rt_runtime,
567 int rt_period)
568{
569 /*
570 * rt_runtime could be set as 0 to prevent the cgroup from using
571 * realtime CPU.
572 */
573 if (rt_runtime < 0 || rt_runtime >= rt_period)
574 return -EINVAL;
575
576 c->cpu_cgparams.rt_runtime = rt_runtime;
577 c->cpu_cgparams.rt_period = rt_period;
578 return 0;
579}
580
Chinyue Chen4f3fd682016-07-01 14:11:42 +0800581int container_config_get_cpu_shares(struct container_config *c)
582{
583 return c->cpu_cgparams.shares;
584}
585
586int container_config_get_cpu_quota(struct container_config *c)
587{
588 return c->cpu_cgparams.quota;
589}
590
591int container_config_get_cpu_period(struct container_config *c)
592{
593 return c->cpu_cgparams.period;
594}
595
596int container_config_get_cpu_rt_runtime(struct container_config *c)
597{
598 return c->cpu_cgparams.rt_runtime;
599}
600
601int container_config_get_cpu_rt_period(struct container_config *c)
602{
603 return c->cpu_cgparams.rt_period;
604}
605
Dylan Reid9e724af2016-07-21 09:58:07 -0700606int container_config_set_cgroup_parent(struct container_config *c,
607 const char *parent,
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700608 uid_t cgroup_owner, gid_t cgroup_group)
Dylan Reid9e724af2016-07-21 09:58:07 -0700609{
610 c->cgroup_owner = cgroup_owner;
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700611 c->cgroup_group = cgroup_group;
Dylan Reid9e724af2016-07-21 09:58:07 -0700612 return strdup_and_free(&c->cgroup_parent, parent);
613}
614
615const char *container_config_get_cgroup_parent(struct container_config *c)
616{
617 return c->cgroup_parent;
618}
619
Keshav Santhanam1b6bf672016-08-10 18:35:12 -0700620void container_config_share_host_netns(struct container_config *c)
621{
622 c->share_host_netns = 1;
623}
624
625int get_container_config_share_host_netns(struct container_config *c)
626{
627 return c->share_host_netns;
628}
629
Dylan Reidc4335842016-11-11 10:24:52 -0800630void container_config_keep_fds_open(struct container_config *c)
631{
632 c->keep_fds_open = 1;
633}
634
Luis Hector Chavezff5978f2017-06-27 12:52:58 -0700635void container_config_set_capmask(struct container_config *c,
636 uint64_t capmask,
637 int ambient)
638{
639 c->use_capmask = 1;
640 c->capmask = capmask;
641 c->use_capmask_ambient = ambient;
642}
643
Luis Hector Chavezcd44ba72017-06-30 13:01:38 -0700644void container_config_set_securebits_skip_mask(struct container_config *c,
645 uint64_t securebits_skip_mask)
646{
647 c->securebits_skip_mask = securebits_skip_mask;
648}
649
Luis Hector Chavezdac65c32017-07-21 10:30:23 -0700650void container_config_set_run_as_init(struct container_config *c,
651 int run_as_init)
652{
653 c->do_init = !run_as_init;
654}
655
Dylan Reid837c74a2016-01-22 17:25:21 -0800656/*
657 * Container manipulation
658 */
659struct container {
Dylan Reid837c74a2016-01-22 17:25:21 -0800660 struct container_cgroup *cgroup;
661 struct minijail *jail;
662 pid_t init_pid;
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500663 char *config_root;
Dylan Reid837c74a2016-01-22 17:25:21 -0800664 char *runfs;
665 char *rundir;
666 char *runfsroot;
667 char *pid_file_path;
Dylan Reide040c6b2016-05-02 18:49:02 -0700668 char **ext_mounts; /* Mounts made outside of the minijail */
669 size_t num_ext_mounts;
Mike Frysinger412dbd22017-01-06 01:50:34 -0500670 char **loopdevs;
671 size_t num_loopdevs;
Mike Frysinger05e594e2017-01-10 02:11:08 -0500672 char **device_mappers;
673 size_t num_device_mappers;
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700674 char *name;
Dylan Reid837c74a2016-01-22 17:25:21 -0800675};
676
677struct container *container_new(const char *name,
Dylan Reide040c6b2016-05-02 18:49:02 -0700678 const char *rundir)
Dylan Reid837c74a2016-01-22 17:25:21 -0800679{
680 struct container *c;
681
Dylan Reid837c74a2016-01-22 17:25:21 -0800682 c = calloc(1, sizeof(*c));
Dylan Reidb435c682016-04-12 04:17:49 -0700683 if (!c)
684 return NULL;
Dylan Reid837c74a2016-01-22 17:25:21 -0800685 c->rundir = strdup(rundir);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700686 c->name = strdup(name);
Dylan Reida9966422016-07-21 10:11:34 -0700687 if (!c->rundir || !c->name) {
Dylan Reid684975e2016-05-02 15:44:47 -0700688 container_destroy(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800689 return NULL;
Dylan Reidb435c682016-04-12 04:17:49 -0700690 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800691 return c;
692}
693
694void container_destroy(struct container *c)
695{
Dylan Reid684975e2016-05-02 15:44:47 -0700696 if (c->cgroup)
697 container_cgroup_destroy(c->cgroup);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700698 if (c->jail)
699 minijail_destroy(c->jail);
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500700 FREE_AND_NULL(c->config_root);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700701 FREE_AND_NULL(c->name);
702 FREE_AND_NULL(c->rundir);
703 FREE_AND_NULL(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800704}
705
Stephen Barber1a398c72017-01-23 12:39:44 -0800706/*
707 * Given a uid/gid map of "inside1 outside1 length1, ...", and an id
708 * inside of the user namespace, return the equivalent outside id, or
709 * return < 0 on error.
710 */
711static int get_userns_outside_id(const char *map, int id)
712{
713 char *map_copy, *mapping, *saveptr1, *saveptr2;
714 int inside, outside, length;
715 int result = 0;
716 errno = 0;
717
718 if (asprintf(&map_copy, "%s", map) < 0)
719 return -ENOMEM;
720
721 mapping = strtok_r(map_copy, ",", &saveptr1);
722 while (mapping) {
723 inside = strtol(strtok_r(mapping, " ", &saveptr2), NULL, 10);
724 outside = strtol(strtok_r(NULL, " ", &saveptr2), NULL, 10);
725 length = strtol(strtok_r(NULL, "\0", &saveptr2), NULL, 10);
726 if (errno) {
727 goto error_free_return;
728 } else if (inside < 0 || outside < 0 || length < 0) {
729 errno = EINVAL;
730 goto error_free_return;
731 }
732
733 if (id >= inside && id <= (inside + length)) {
734 result = (id - inside) + outside;
735 goto exit;
736 }
737
738 mapping = strtok_r(NULL, ",", &saveptr1);
739 }
740 errno = EINVAL;
741
742error_free_return:
743 result = -errno;
744exit:
745 free(map_copy);
746 return result;
747}
748
Dylan Reid837c74a2016-01-22 17:25:21 -0800749static int make_dir(const char *path, int uid, int gid, int mode)
750{
751 if (mkdir(path, mode))
752 return -errno;
753 if (chmod(path, mode))
754 return -errno;
755 if (chown(path, uid, gid))
756 return -errno;
757 return 0;
758}
759
760static int touch_file(const char *path, int uid, int gid, int mode)
761{
762 int rc;
763 int fd = open(path, O_RDWR | O_CREAT, mode);
764 if (fd < 0)
765 return -errno;
766 rc = fchown(fd, uid, gid);
767 close(fd);
768
769 if (rc)
770 return -errno;
771 return 0;
772}
773
774/* Make sure the mount target exists in the new rootfs. Create if needed and
775 * possible.
776 */
Stephen Barber1a398c72017-01-23 12:39:44 -0800777static int setup_mount_destination(const struct container_config *config,
778 const struct container_mount *mnt,
Dylan Reid2149be92016-04-28 18:38:57 -0700779 const char *source,
Dylan Reid837c74a2016-01-22 17:25:21 -0800780 const char *dest)
781{
Stephen Barber1a398c72017-01-23 12:39:44 -0800782 int uid_userns, gid_userns;
Dylan Reid837c74a2016-01-22 17:25:21 -0800783 int rc;
784 struct stat st_buf;
785
786 rc = stat(dest, &st_buf);
787 if (rc == 0) /* destination exists */
788 return 0;
789
790 /* Try to create the destination. Either make directory or touch a file
791 * depending on the source type.
792 */
Stephen Barber1a398c72017-01-23 12:39:44 -0800793 uid_userns = get_userns_outside_id(config->uid_map, mnt->uid);
794 if (uid_userns < 0)
795 return uid_userns;
796 gid_userns = get_userns_outside_id(config->gid_map, mnt->gid);
797 if (gid_userns < 0)
798 return gid_userns;
799
Dylan Reid2149be92016-04-28 18:38:57 -0700800 rc = stat(source, &st_buf);
Dylan Reid837c74a2016-01-22 17:25:21 -0800801 if (rc || S_ISDIR(st_buf.st_mode) || S_ISBLK(st_buf.st_mode))
Stephen Barber1a398c72017-01-23 12:39:44 -0800802 return make_dir(dest, uid_userns, gid_userns, mnt->mode);
Dylan Reid837c74a2016-01-22 17:25:21 -0800803
Stephen Barber1a398c72017-01-23 12:39:44 -0800804 return touch_file(dest, uid_userns, gid_userns, mnt->mode);
Dylan Reid837c74a2016-01-22 17:25:21 -0800805}
806
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700807/* Fork and exec the setfiles command to configure the selinux policy. */
Dylan Reide040c6b2016-05-02 18:49:02 -0700808static int run_setfiles_command(const struct container *c,
809 const struct container_config *config,
Yusuke Sato91f11f02016-12-02 16:15:13 -0800810 char *const *destinations, size_t num_destinations)
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700811{
812 int rc;
813 int status;
814 int pid;
815 char *context_path;
816
Dylan Reide040c6b2016-05-02 18:49:02 -0700817 if (!config->run_setfiles)
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700818 return 0;
819
820 if (asprintf(&context_path, "%s/file_contexts",
821 c->runfsroot) < 0)
822 return -errno;
823
824 pid = fork();
825 if (pid == 0) {
Yusuke Sato91f11f02016-12-02 16:15:13 -0800826 size_t i;
827 size_t arg_index = 0;
828 const char *argv[MAX_NUM_SETFILES_ARGS];
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700829 const char *env[] = {
830 NULL,
831 };
832
Yusuke Sato91f11f02016-12-02 16:15:13 -0800833 argv[arg_index++] = config->run_setfiles;
834 argv[arg_index++] = "-r";
835 argv[arg_index++] = c->runfsroot;
836 argv[arg_index++] = context_path;
837 if (arg_index + num_destinations >= MAX_NUM_SETFILES_ARGS)
838 _exit(-E2BIG);
839 for (i = 0; i < num_destinations; ++i) {
840 argv[arg_index++] = destinations[i];
841 }
842 argv[arg_index] = NULL;
843
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700844 execve(argv[0], (char *const*)argv, (char *const*)env);
845
846 /* Command failed to exec if execve returns. */
847 _exit(-errno);
848 }
849 free(context_path);
850 if (pid < 0)
851 return -errno;
852 do {
853 rc = waitpid(pid, &status, 0);
854 } while (rc == -1 && errno == EINTR);
855 if (rc < 0)
856 return -errno;
857 return status;
858}
859
Mike Frysinger412dbd22017-01-06 01:50:34 -0500860/* Find a free loop device and attach it. */
861static int loopdev_setup(char **loopdev_ret, const char *source)
862{
863 int ret = 0;
864 int source_fd = -1;
865 int control_fd = -1;
866 int loop_fd = -1;
867 char *loopdev = NULL;
868
869 source_fd = open(source, O_RDONLY|O_CLOEXEC);
870 if (source_fd < 0)
871 goto error;
872
873 control_fd = open(loopdev_ctl, O_RDWR|O_NOFOLLOW|O_CLOEXEC);
874 if (control_fd < 0)
875 goto error;
876
877 while (1) {
878 int num = ioctl(control_fd, LOOP_CTL_GET_FREE);
879 if (num < 0)
880 goto error;
881
882 if (asprintf(&loopdev, "/dev/loop%i", num) < 0)
883 goto error;
884
885 loop_fd = open(loopdev, O_RDONLY|O_NOFOLLOW|O_CLOEXEC);
886 if (loop_fd < 0)
887 goto error;
888
889 if (ioctl(loop_fd, LOOP_SET_FD, source_fd) == 0)
890 break;
891
892 if (errno != EBUSY)
893 goto error;
894
895 /* Clean up resources for the next pass. */
896 free(loopdev);
897 close(loop_fd);
898 }
899
900 *loopdev_ret = loopdev;
901 goto exit;
902
903error:
904 ret = -errno;
905 free(loopdev);
906exit:
907 if (source_fd != -1)
908 close(source_fd);
909 if (control_fd != -1)
910 close(control_fd);
911 if (loop_fd != -1)
912 close(loop_fd);
913 return ret;
914}
915
916/* Detach the specified loop device. */
917static int loopdev_detach(const char *loopdev)
918{
919 int ret = 0;
920 int fd;
921
922 fd = open(loopdev, O_RDONLY|O_NOFOLLOW|O_CLOEXEC);
923 if (fd < 0)
924 goto error;
925 if (ioctl(fd, LOOP_CLR_FD) < 0)
926 goto error;
927
928 goto exit;
929
930error:
931 ret = -errno;
932exit:
933 if (fd != -1)
934 close(fd);
935 return ret;
936}
937
Mike Frysinger05e594e2017-01-10 02:11:08 -0500938/* Create a new device mapper target for the source. */
939static int dm_setup(char **dm_path_ret, char **dm_name_ret, const char *source,
940 const char *verity_cmdline)
941{
942 int ret = 0;
943#if USE_device_mapper
944 char *p;
945 char *dm_path = NULL;
946 char *dm_name = NULL;
947 char *verity = NULL;
948 struct dm_task *dmt = NULL;
949 uint32_t cookie = 0;
950
951 /* Normalize the name into something unique-esque. */
952 if (asprintf(&dm_name, "cros-containers-%s", source) < 0)
953 goto error;
954 p = dm_name;
955 while ((p = strchr(p, '/')) != NULL)
956 *p++ = '_';
957
958 /* Get the /dev path for the higher levels to mount. */
959 if (asprintf(&dm_path, "%s%s", dm_dev_prefix, dm_name) < 0)
960 goto error;
961
962 /* Insert the source path in the verity command line. */
963 size_t source_len = strlen(source);
964 verity = malloc(strlen(verity_cmdline) + source_len * 2 + 1);
965 strcpy(verity, verity_cmdline);
966 while ((p = strstr(verity, "@DEV@")) != NULL) {
967 memmove(p + source_len, p + 5, strlen(p + 5) + 1);
968 memcpy(p, source, source_len);
969 }
970
971 /* Extract the first three parameters for dm-verity settings. */
972 char ttype[20];
973 unsigned long long start, size;
974 int n;
975 if (sscanf(verity, "%llu %llu %10s %n", &start, &size, ttype, &n) != 3)
976 goto error;
977
978 /* Finally create the device mapper. */
979 dmt = dm_task_create(DM_DEVICE_CREATE);
980 if (dmt == NULL)
981 goto error;
982
983 if (!dm_task_set_name(dmt, dm_name))
984 goto error;
985
986 if (!dm_task_set_ro(dmt))
987 goto error;
988
989 if (!dm_task_add_target(dmt, start, size, ttype, verity + n))
990 goto error;
991
992 if (!dm_task_set_cookie(dmt, &cookie, 0))
993 goto error;
994
995 if (!dm_task_run(dmt))
996 goto error;
997
998 /* Make sure the node exists before we continue. */
999 dm_udev_wait(cookie);
1000
1001 *dm_path_ret = dm_path;
1002 *dm_name_ret = dm_name;
1003 goto exit;
1004
1005error:
1006 ret = -errno;
1007 free(dm_name);
1008 free(dm_path);
1009exit:
1010 free(verity);
1011 if (dmt)
1012 dm_task_destroy(dmt);
1013#endif
1014 return ret;
1015}
1016
1017/* Tear down the device mapper target. */
1018static int dm_detach(const char *dm_name)
1019{
1020 int ret = 0;
1021#if USE_device_mapper
1022 struct dm_task *dmt;
1023
1024 dmt = dm_task_create(DM_DEVICE_REMOVE);
1025 if (dmt == NULL)
1026 goto error;
1027
1028 if (!dm_task_set_name(dmt, dm_name))
1029 goto error;
1030
1031 if (!dm_task_run(dmt))
1032 goto error;
1033
1034 goto exit;
1035
1036error:
1037 ret = -errno;
1038exit:
1039 dm_task_destroy(dmt);
1040#endif
1041 return ret;
1042}
1043
Dylan Reide040c6b2016-05-02 18:49:02 -07001044/*
1045 * Unmounts anything we mounted in this mount namespace in the opposite order
1046 * that they were mounted.
1047 */
1048static int unmount_external_mounts(struct container *c)
1049{
1050 int ret = 0;
1051
1052 while (c->num_ext_mounts) {
1053 c->num_ext_mounts--;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001054 if (!c->ext_mounts[c->num_ext_mounts])
1055 continue;
Dylan Reide040c6b2016-05-02 18:49:02 -07001056 if (umount(c->ext_mounts[c->num_ext_mounts]))
1057 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001058 FREE_AND_NULL(c->ext_mounts[c->num_ext_mounts]);
Dylan Reide040c6b2016-05-02 18:49:02 -07001059 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001060 FREE_AND_NULL(c->ext_mounts);
Mike Frysinger412dbd22017-01-06 01:50:34 -05001061
1062 while (c->num_loopdevs) {
1063 c->num_loopdevs--;
1064 if (loopdev_detach(c->loopdevs[c->num_loopdevs]))
1065 ret = -errno;
1066 FREE_AND_NULL(c->loopdevs[c->num_loopdevs]);
1067 }
1068 FREE_AND_NULL(c->loopdevs);
1069
Mike Frysinger05e594e2017-01-10 02:11:08 -05001070 while (c->num_device_mappers) {
1071 c->num_device_mappers--;
1072 if (dm_detach(c->device_mappers[c->num_device_mappers]))
1073 ret = -errno;
1074 FREE_AND_NULL(c->device_mappers[c->num_device_mappers]);
1075 }
1076 FREE_AND_NULL(c->device_mappers);
1077
Dylan Reide040c6b2016-05-02 18:49:02 -07001078 return ret;
1079}
1080
Junichi Uekawa5d272772016-07-21 16:07:19 +09001081/*
1082 * Match mount_one in minijail, mount one mountpoint with
1083 * consideration for combination of MS_BIND/MS_RDONLY flag.
1084 */
1085static int mount_external(const char *src, const char *dest, const char *type,
1086 unsigned long flags, const void *data)
1087{
1088 int remount_ro = 0;
1089
1090 /*
1091 * R/O bind mounts have to be remounted since 'bind' and 'ro'
1092 * can't both be specified in the original bind mount.
1093 * Remount R/O after the initial mount.
1094 */
1095 if ((flags & MS_BIND) && (flags & MS_RDONLY)) {
1096 remount_ro = 1;
1097 flags &= ~MS_RDONLY;
1098 }
1099
1100 if (mount(src, dest, type, flags, data) == -1)
1101 return -1;
1102
1103 if (remount_ro) {
1104 flags |= MS_RDONLY;
1105 if (mount(src, dest, NULL, flags | MS_REMOUNT, data) == -1)
1106 return -1;
1107 }
1108
1109 return 0;
1110}
1111
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001112static int do_container_mount(struct container *c,
Stephen Barber1a398c72017-01-23 12:39:44 -08001113 const struct container_config *config,
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001114 const struct container_mount *mnt)
1115{
Mike Frysinger05e594e2017-01-10 02:11:08 -05001116 char *dm_source = NULL;
Mike Frysinger412dbd22017-01-06 01:50:34 -05001117 char *loop_source = NULL;
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001118 char *source = NULL;
1119 char *dest = NULL;
1120 int rc = 0;
1121
1122 if (asprintf(&dest, "%s%s", c->runfsroot, mnt->destination) < 0)
1123 return -errno;
1124
1125 /*
1126 * If it's a bind mount relative to rootfs, append source to
1127 * rootfs path, otherwise source path is absolute.
1128 */
1129 if ((mnt->flags & MS_BIND) && mnt->source[0] != '/') {
1130 if (asprintf(&source, "%s/%s", c->runfsroot, mnt->source) < 0)
1131 goto error_free_return;
Mike Frysingerb22acdf2017-01-08 02:02:35 -05001132 } else if (mnt->loopback && mnt->source[0] != '/' && c->config_root) {
1133 if (asprintf(&source, "%s/%s", c->config_root, mnt->source) < 0)
1134 goto error_free_return;
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001135 } else {
1136 if (asprintf(&source, "%s", mnt->source) < 0)
1137 goto error_free_return;
1138 }
1139
Dylan Reidbd5234c2017-06-06 21:20:07 -07001140 // Only create the destinations for external mounts, minijail will take
1141 // care of those mounted in the new namespace.
1142 if (mnt->create && !mnt->mount_in_ns) {
Stephen Barber1a398c72017-01-23 12:39:44 -08001143 rc = setup_mount_destination(config, mnt, source, dest);
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001144 if (rc)
1145 goto error_free_return;
1146 }
Mike Frysinger412dbd22017-01-06 01:50:34 -05001147 if (mnt->loopback) {
1148 /* Record this loopback file for cleanup later. */
1149 loop_source = source;
1150 source = NULL;
1151 rc = loopdev_setup(&source, loop_source);
1152 if (rc)
1153 goto error_free_return;
1154
Mike Frysinger05e594e2017-01-10 02:11:08 -05001155 /* Save this to cleanup when shutting down. */
Mike Frysinger412dbd22017-01-06 01:50:34 -05001156 rc = strdup_and_free(&c->loopdevs[c->num_loopdevs], source);
1157 if (rc)
1158 goto error_free_return;
1159 c->num_loopdevs++;
1160 }
Mike Frysinger05e594e2017-01-10 02:11:08 -05001161 if (mnt->verity) {
1162 /* Set this device up via dm-verity. */
1163 char *dm_name;
1164 dm_source = source;
1165 source = NULL;
1166 rc = dm_setup(&source, &dm_name, dm_source, mnt->verity);
1167 if (rc)
1168 goto error_free_return;
1169
1170 /* Save this to cleanup when shutting down. */
1171 rc = strdup_and_free(&c->device_mappers[c->num_device_mappers],
1172 dm_name);
1173 free(dm_name);
1174 if (rc)
1175 goto error_free_return;
1176 c->num_device_mappers++;
1177 }
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001178 if (mnt->mount_in_ns) {
1179 /* We can mount this with minijail. */
Dylan Reid36b9c012016-06-24 18:27:08 -07001180 rc = minijail_mount_with_data(c->jail, source, mnt->destination,
1181 mnt->type, mnt->flags, mnt->data);
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001182 if (rc)
1183 goto error_free_return;
1184 } else {
1185 /* Mount this externally and unmount it on exit. */
Junichi Uekawa5d272772016-07-21 16:07:19 +09001186 if (mount_external(source, dest, mnt->type, mnt->flags,
1187 mnt->data))
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001188 goto error_free_return;
1189 /* Save this to unmount when shutting down. */
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001190 rc = strdup_and_free(&c->ext_mounts[c->num_ext_mounts], dest);
1191 if (rc)
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001192 goto error_free_return;
1193 c->num_ext_mounts++;
1194 }
1195
1196 goto exit;
1197
1198error_free_return:
1199 if (!rc)
1200 rc = -errno;
1201exit:
Mike Frysinger05e594e2017-01-10 02:11:08 -05001202 free(dm_source);
Mike Frysinger412dbd22017-01-06 01:50:34 -05001203 free(loop_source);
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001204 free(source);
1205 free(dest);
1206 return rc;
1207}
1208
Dylan Reide040c6b2016-05-02 18:49:02 -07001209static int do_container_mounts(struct container *c,
1210 const struct container_config *config)
Dylan Reid7daf9982016-04-28 16:55:42 -07001211{
1212 unsigned int i;
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -07001213 int rc = 0;
Dylan Reid7daf9982016-04-28 16:55:42 -07001214
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001215 unmount_external_mounts(c);
Dylan Reide040c6b2016-05-02 18:49:02 -07001216 /*
1217 * Allocate space to track anything we mount in our mount namespace.
1218 * This over-allocates as it has space for all mounts.
1219 */
1220 c->ext_mounts = calloc(config->num_mounts, sizeof(*c->ext_mounts));
1221 if (!c->ext_mounts)
1222 return -errno;
Mike Frysinger412dbd22017-01-06 01:50:34 -05001223 c->loopdevs = calloc(config->num_mounts, sizeof(*c->loopdevs));
1224 if (!c->loopdevs)
1225 return -errno;
Mike Frysinger05e594e2017-01-10 02:11:08 -05001226 c->device_mappers = calloc(config->num_mounts, sizeof(*c->device_mappers));
1227 if (!c->device_mappers)
1228 return -errno;
Dylan Reide040c6b2016-05-02 18:49:02 -07001229
1230 for (i = 0; i < config->num_mounts; ++i) {
Stephen Barber1a398c72017-01-23 12:39:44 -08001231 rc = do_container_mount(c, config, &config->mounts[i]);
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001232 if (rc)
1233 goto error_free_return;
Dylan Reid7daf9982016-04-28 16:55:42 -07001234 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001235
Dylan Reid7daf9982016-04-28 16:55:42 -07001236 return 0;
Dylan Reid2149be92016-04-28 18:38:57 -07001237
1238error_free_return:
Dylan Reide040c6b2016-05-02 18:49:02 -07001239 unmount_external_mounts(c);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -07001240 return rc;
Dylan Reid7daf9982016-04-28 16:55:42 -07001241}
1242
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001243static int container_create_device(const struct container *c,
Stephen Barber1a398c72017-01-23 12:39:44 -08001244 const struct container_config *config,
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001245 const struct container_device *dev,
1246 int minor)
1247{
1248 char *path = NULL;
1249 int rc = 0;
1250 int mode;
Stephen Barber1a398c72017-01-23 12:39:44 -08001251 int uid_userns, gid_userns;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001252
1253 switch (dev->type) {
1254 case 'b':
1255 mode = S_IFBLK;
1256 break;
1257 case 'c':
1258 mode = S_IFCHR;
1259 break;
1260 default:
1261 return -EINVAL;
1262 }
1263 mode |= dev->fs_permissions;
1264
Stephen Barber1a398c72017-01-23 12:39:44 -08001265 uid_userns = get_userns_outside_id(config->uid_map, dev->uid);
1266 if (uid_userns < 0)
1267 return uid_userns;
1268 gid_userns = get_userns_outside_id(config->gid_map, dev->gid);
1269 if (gid_userns < 0)
1270 return gid_userns;
1271
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001272 if (asprintf(&path, "%s%s", c->runfsroot, dev->path) < 0)
1273 goto error_free_return;
1274 if (mknod(path, mode, makedev(dev->major, minor)) && errno != EEXIST)
1275 goto error_free_return;
Stephen Barber1a398c72017-01-23 12:39:44 -08001276 if (chown(path, uid_userns, gid_userns))
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001277 goto error_free_return;
1278 if (chmod(path, dev->fs_permissions))
1279 goto error_free_return;
1280
1281 goto exit;
1282
1283error_free_return:
1284 rc = -errno;
1285exit:
1286 free(path);
1287 return rc;
1288}
1289
Stephen Barber1a398c72017-01-23 12:39:44 -08001290
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001291static int mount_runfs(struct container *c, const struct container_config *config)
Dylan Reid837c74a2016-01-22 17:25:21 -08001292{
Dylan Reidb3621832016-03-24 10:24:57 -07001293 static const mode_t root_dir_mode = 0660;
Dylan Reide040c6b2016-05-02 18:49:02 -07001294 const char *rootfs = config->rootfs;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001295 char *runfs_template = NULL;
Stephen Barber1a398c72017-01-23 12:39:44 -08001296 int uid_userns, gid_userns;
Dylan Reid837c74a2016-01-22 17:25:21 -08001297
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001298 if (asprintf(&runfs_template, "%s/%s_XXXXXX", c->rundir, c->name) < 0)
1299 return -ENOMEM;
1300
1301 c->runfs = mkdtemp(runfs_template);
1302 if (!c->runfs) {
1303 free(runfs_template);
1304 return -errno;
1305 }
1306
Stephen Barber1a398c72017-01-23 12:39:44 -08001307 uid_userns = get_userns_outside_id(config->uid_map, config->uid);
1308 if (uid_userns < 0)
1309 return uid_userns;
1310 gid_userns = get_userns_outside_id(config->gid_map, config->gid);
1311 if (gid_userns < 0)
1312 return gid_userns;
1313
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001314 /* Make sure the container uid can access the rootfs. */
1315 if (chmod(c->runfs, 0700))
1316 return -errno;
Stephen Barber1a398c72017-01-23 12:39:44 -08001317 if (chown(c->runfs, uid_userns, gid_userns))
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001318 return -errno;
1319
1320 if (asprintf(&c->runfsroot, "%s/root", c->runfs) < 0)
1321 return -errno;
1322
1323 if (mkdir(c->runfsroot, root_dir_mode))
1324 return -errno;
1325 if (chmod(c->runfsroot, root_dir_mode))
1326 return -errno;
1327
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -07001328 if (mount(rootfs, c->runfsroot, "", MS_BIND, NULL))
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001329 return -errno;
1330
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -07001331 /* MS_BIND ignores any flags passed to it (except MS_REC). We need a
1332 * second call to mount() to actually set them.
1333 */
1334 if (config->rootfs_mount_flags &&
1335 mount(rootfs, c->runfsroot, "",
1336 config->rootfs_mount_flags, NULL)) {
1337 return -errno;
1338 }
1339
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001340 return 0;
1341}
1342
Dylan Reidacedff92017-03-31 17:41:40 -07001343static int device_setup(struct container *c,
1344 const struct container_config *config)
1345{
Dylan Reid43d4e5c2017-04-05 09:40:11 -07001346 int rc;
1347 size_t i;
Dylan Reidacedff92017-03-31 17:41:40 -07001348
1349 c->cgroup->ops->deny_all_devices(c->cgroup);
1350
Dylan Reid4843d6b2017-03-31 18:14:30 -07001351 for (i = 0; i < config->num_cgroup_devices; i++) {
1352 const struct container_cgroup_device *dev =
1353 &config->cgroup_devices[i];
1354 rc = c->cgroup->ops->add_device(c->cgroup,
1355 dev->allow,
1356 dev->major,
1357 dev->minor,
1358 dev->read,
1359 dev->write,
1360 dev->modify,
1361 dev->type);
1362 if (rc)
1363 return rc;
1364 }
1365
Dylan Reidacedff92017-03-31 17:41:40 -07001366 for (i = 0; i < config->num_devices; i++) {
1367 const struct container_device *dev = &config->devices[i];
1368 int minor = dev->minor;
1369
1370 if (dev->copy_minor) {
1371 struct stat st_buff;
1372 if (stat(dev->path, &st_buff) < 0)
1373 continue;
1374 minor = minor(st_buff.st_rdev);
1375 }
1376 if (minor >= 0) {
1377 rc = container_create_device(c, config, dev, minor);
1378 if (rc)
1379 return rc;
1380 }
Dylan Reidacedff92017-03-31 17:41:40 -07001381 }
1382
1383 for (i = 0; i < c->num_loopdevs; ++i) {
1384 struct stat st;
1385
Dylan Reid43d4e5c2017-04-05 09:40:11 -07001386 rc = stat(c->loopdevs[i], &st);
1387 if (rc < 0)
1388 return -errno;
Dylan Reid4843d6b2017-03-31 18:14:30 -07001389 rc = c->cgroup->ops->add_device(c->cgroup, 1, major(st.st_rdev),
Dylan Reidacedff92017-03-31 17:41:40 -07001390 minor(st.st_rdev),
1391 1, 0, 0, 'b');
1392 if (rc)
1393 return rc;
1394 }
1395
1396 return 0;
1397}
1398
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001399int container_start(struct container *c, const struct container_config *config)
1400{
1401 int rc = 0;
1402 unsigned int i;
Stephen Barber1a398c72017-01-23 12:39:44 -08001403 int cgroup_uid, cgroup_gid;
Yusuke Sato91f11f02016-12-02 16:15:13 -08001404 char **destinations;
1405 size_t num_destinations;
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001406
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001407 if (!c)
1408 return -EINVAL;
Dylan Reide040c6b2016-05-02 18:49:02 -07001409 if (!config)
1410 return -EINVAL;
1411 if (!config->program_argv || !config->program_argv[0])
1412 return -EINVAL;
1413
Mike Frysingerb22acdf2017-01-08 02:02:35 -05001414 if (config->config_root) {
1415 c->config_root = strdup(config->config_root);
1416 if (!c->config_root) {
1417 rc = -ENOMEM;
1418 goto error_rmdir;
1419 }
1420 }
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001421 if (config->premounted_runfs) {
1422 c->runfs = NULL;
1423 c->runfsroot = strdup(config->premounted_runfs);
1424 if (!c->runfsroot) {
1425 rc = -ENOMEM;
1426 goto error_rmdir;
1427 }
1428 } else {
1429 rc = mount_runfs(c, config);
1430 if (rc)
1431 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001432 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001433
1434 c->jail = minijail_new();
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001435 if (!c->jail)
Luis Hector Chavez945af482016-06-03 08:39:34 -07001436 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001437
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -07001438 rc = do_container_mounts(c, config);
1439 if (rc)
Dylan Reid7daf9982016-04-28 16:55:42 -07001440 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001441
Stephen Barber1a398c72017-01-23 12:39:44 -08001442 cgroup_uid = get_userns_outside_id(config->uid_map,
1443 config->cgroup_owner);
1444 if (cgroup_uid < 0) {
1445 rc = cgroup_uid;
1446 goto error_rmdir;
1447 }
1448 cgroup_gid = get_userns_outside_id(config->gid_map,
1449 config->cgroup_group);
1450 if (cgroup_gid < 0) {
1451 rc = cgroup_gid;
1452 goto error_rmdir;
1453 }
1454
Dylan Reida9966422016-07-21 10:11:34 -07001455 c->cgroup = container_cgroup_new(c->name,
1456 "/sys/fs/cgroup",
1457 config->cgroup_parent,
Stephen Barber1a398c72017-01-23 12:39:44 -08001458 cgroup_uid,
1459 cgroup_gid);
Dylan Reida9966422016-07-21 10:11:34 -07001460 if (!c->cgroup)
1461 goto error_rmdir;
1462
Keshav Santhanam268fa032016-07-14 09:59:24 -07001463 /* Must be root to modify device cgroup or mknod */
1464 if (getuid() == 0) {
Dylan Reidacedff92017-03-31 17:41:40 -07001465 if (device_setup(c, config))
1466 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001467 }
1468
Dylan Reidd7229582016-04-27 17:08:40 -07001469 /* Potentailly run setfiles on mounts configured outside of the jail */
Yusuke Sato91f11f02016-12-02 16:15:13 -08001470 destinations = calloc(config->num_mounts, sizeof(char *));
1471 num_destinations = 0;
Dylan Reide040c6b2016-05-02 18:49:02 -07001472 for (i = 0; i < config->num_mounts; i++) {
1473 const struct container_mount *mnt = &config->mounts[i];
Yusuke Sato91f11f02016-12-02 16:15:13 -08001474 char* dest = mnt->destination;
Dylan Reidd7229582016-04-27 17:08:40 -07001475
1476 if (mnt->mount_in_ns)
1477 continue;
Junichi Uekawa5d272772016-07-21 16:07:19 +09001478 if (mnt->flags & MS_RDONLY)
1479 continue;
Yusuke Sato91f11f02016-12-02 16:15:13 -08001480
Yusuke Satod33db432016-12-05 16:24:37 -08001481 /* A hack to avoid setfiles on /data and /cache. */
1482 if (!strcmp(dest, "/data") || !strcmp(dest, "/cache"))
Yusuke Sato91f11f02016-12-02 16:15:13 -08001483 continue;
1484
1485 if (asprintf(&dest, "%s%s", c->runfsroot, mnt->destination) < 0) {
1486 size_t j;
1487 for (j = 0; j < num_destinations; ++j) {
1488 free(destinations[j]);
1489 }
1490 free(destinations);
Dylan Reidd7229582016-04-27 17:08:40 -07001491 goto error_rmdir;
Yusuke Sato91f11f02016-12-02 16:15:13 -08001492 }
1493
1494 destinations[num_destinations++] = dest;
Dylan Reidd7229582016-04-27 17:08:40 -07001495 }
Yusuke Sato91f11f02016-12-02 16:15:13 -08001496 if (num_destinations) {
1497 size_t i;
1498 rc = run_setfiles_command(c, config, destinations, num_destinations);
1499 for (i = 0; i < num_destinations; ++i) {
1500 free(destinations[i]);
1501 }
1502 }
1503 free(destinations);
1504 if (rc)
1505 goto error_rmdir;
Dylan Reidd7229582016-04-27 17:08:40 -07001506
Chinyue Chenfac909e2016-06-24 14:17:42 +08001507 /* Setup CPU cgroup params. */
1508 if (config->cpu_cgparams.shares) {
1509 rc = c->cgroup->ops->set_cpu_shares(
1510 c->cgroup, config->cpu_cgparams.shares);
1511 if (rc)
1512 goto error_rmdir;
1513 }
1514 if (config->cpu_cgparams.period) {
1515 rc = c->cgroup->ops->set_cpu_quota(
1516 c->cgroup, config->cpu_cgparams.quota);
1517 if (rc)
1518 goto error_rmdir;
1519 rc = c->cgroup->ops->set_cpu_period(
1520 c->cgroup, config->cpu_cgparams.period);
1521 if (rc)
1522 goto error_rmdir;
1523 }
1524 if (config->cpu_cgparams.rt_period) {
1525 rc = c->cgroup->ops->set_cpu_rt_runtime(
1526 c->cgroup, config->cpu_cgparams.rt_runtime);
1527 if (rc)
1528 goto error_rmdir;
1529 rc = c->cgroup->ops->set_cpu_rt_period(
1530 c->cgroup, config->cpu_cgparams.rt_period);
1531 if (rc)
1532 goto error_rmdir;
1533 }
1534
Dylan Reid837c74a2016-01-22 17:25:21 -08001535 /* Setup and start the container with libminijail. */
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001536 if (config->pid_file_path) {
1537 c->pid_file_path = strdup(config->pid_file_path);
1538 if (!c->pid_file_path) {
1539 rc = -ENOMEM;
1540 goto error_rmdir;
1541 }
1542 } else if (c->runfs) {
1543 if (asprintf(&c->pid_file_path, "%s/container.pid", c->runfs) < 0) {
1544 rc = -ENOMEM;
1545 goto error_rmdir;
1546 }
1547 }
1548
1549 if (c->pid_file_path)
1550 minijail_write_pid_file(c->jail, c->pid_file_path);
Dylan Reid837c74a2016-01-22 17:25:21 -08001551 minijail_reset_signal_mask(c->jail);
1552
1553 /* Setup container namespaces. */
1554 minijail_namespace_ipc(c->jail);
1555 minijail_namespace_vfs(c->jail);
Keshav Santhanam1b6bf672016-08-10 18:35:12 -07001556 if (!config->share_host_netns)
1557 minijail_namespace_net(c->jail);
Dylan Reid837c74a2016-01-22 17:25:21 -08001558 minijail_namespace_pids(c->jail);
Dylan Reid837c74a2016-01-22 17:25:21 -08001559 minijail_namespace_user(c->jail);
Mike Frysingerfbd60552017-01-03 17:28:48 -05001560 if (getuid() != 0)
1561 minijail_namespace_user_disable_setgroups(c->jail);
Dylan Reidc6ca1042016-07-11 15:03:27 -07001562 minijail_namespace_cgroups(c->jail);
Dylan Reide040c6b2016-05-02 18:49:02 -07001563 rc = minijail_uidmap(c->jail, config->uid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -08001564 if (rc)
1565 goto error_rmdir;
Dylan Reide040c6b2016-05-02 18:49:02 -07001566 rc = minijail_gidmap(c->jail, config->gid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -08001567 if (rc)
1568 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001569
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001570 /* Set the UID/GID inside the container if not 0. */
Stephen Barber1a398c72017-01-23 12:39:44 -08001571 if (get_userns_outside_id(config->uid_map, config->uid) < 0)
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001572 goto error_rmdir;
Stephen Barber1a398c72017-01-23 12:39:44 -08001573 else if (config->uid > 0)
1574 minijail_change_uid(c->jail, config->uid);
1575 if (get_userns_outside_id(config->gid_map, config->gid) < 0)
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001576 goto error_rmdir;
Stephen Barber1a398c72017-01-23 12:39:44 -08001577 else if (config->gid > 0)
1578 minijail_change_gid(c->jail, config->gid);
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001579
Dylan Reid837c74a2016-01-22 17:25:21 -08001580 rc = minijail_enter_pivot_root(c->jail, c->runfsroot);
1581 if (rc)
1582 goto error_rmdir;
1583
1584 /* Add the cgroups configured above. */
Dmitry Torokhov0d253a62017-01-05 09:41:33 -08001585 for (i = 0; i < NUM_CGROUP_TYPES; i++) {
1586 if (c->cgroup->cgroup_tasks_paths[i]) {
1587 rc = minijail_add_to_cgroup(c->jail,
1588 c->cgroup->cgroup_tasks_paths[i]);
1589 if (rc)
1590 goto error_rmdir;
1591 }
1592 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001593
Dylan Reide040c6b2016-05-02 18:49:02 -07001594 if (config->alt_syscall_table)
1595 minijail_use_alt_syscall(c->jail, config->alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -08001596
Dylan Reid93fa4602017-06-06 13:39:31 -07001597 for (i = 0; i < config->num_rlimits; i++) {
1598 const struct container_rlimit *lim = &config->rlimits[i];
1599 rc = minijail_rlimit(c->jail, lim->type, lim->cur,
1600 lim->max);
1601 if (rc)
1602 goto error_rmdir;
1603 }
1604
1605
Dylan Reid837c74a2016-01-22 17:25:21 -08001606 minijail_run_as_init(c->jail);
1607
Dylan Reid3da683b2016-04-05 03:35:35 -07001608 /* TODO(dgreid) - remove this once shared mounts are cleaned up. */
1609 minijail_skip_remount_private(c->jail);
1610
Dylan Reidc4335842016-11-11 10:24:52 -08001611 if (!config->keep_fds_open)
1612 minijail_close_open_fds(c->jail);
Luis Hector Chaveze18e7d42016-10-12 07:35:32 -07001613
Luis Hector Chavezff5978f2017-06-27 12:52:58 -07001614 if (config->use_capmask) {
1615 minijail_use_caps(c->jail, config->capmask);
1616 if (config->use_capmask_ambient) {
1617 minijail_set_ambient_caps(c->jail);
1618 }
Luis Hector Chavezcd44ba72017-06-30 13:01:38 -07001619 if (config->securebits_skip_mask) {
1620 minijail_skip_setting_securebits(c->jail,
1621 config->securebits_skip_mask);
1622 }
Luis Hector Chavezff5978f2017-06-27 12:52:58 -07001623 }
1624
Luis Hector Chavezdac65c32017-07-21 10:30:23 -07001625 if (!config->do_init)
1626 minijail_run_as_init(c->jail);
1627
Dylan Reid837c74a2016-01-22 17:25:21 -08001628 rc = minijail_run_pid_pipes_no_preload(c->jail,
Dylan Reide040c6b2016-05-02 18:49:02 -07001629 config->program_argv[0],
1630 config->program_argv,
Dylan Reid837c74a2016-01-22 17:25:21 -08001631 &c->init_pid, NULL, NULL,
1632 NULL);
1633 if (rc)
1634 goto error_rmdir;
1635 return 0;
1636
1637error_rmdir:
Luis Hector Chavez945af482016-06-03 08:39:34 -07001638 if (!rc)
1639 rc = -errno;
1640 container_teardown(c);
Dylan Reid837c74a2016-01-22 17:25:21 -08001641 return rc;
1642}
1643
1644const char *container_root(struct container *c)
1645{
1646 return c->runfs;
1647}
1648
1649int container_pid(struct container *c)
1650{
1651 return c->init_pid;
1652}
1653
1654static int container_teardown(struct container *c)
1655{
Dylan Reid837c74a2016-01-22 17:25:21 -08001656 int ret = 0;
1657
Dylan Reide040c6b2016-05-02 18:49:02 -07001658 unmount_external_mounts(c);
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001659 if (c->runfsroot && c->runfs) {
Luis Hector Chavez945af482016-06-03 08:39:34 -07001660 if (umount(c->runfsroot))
1661 ret = -errno;
1662 if (rmdir(c->runfsroot))
1663 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001664 FREE_AND_NULL(c->runfsroot);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001665 }
1666 if (c->pid_file_path) {
1667 if (unlink(c->pid_file_path))
1668 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001669 FREE_AND_NULL(c->pid_file_path);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001670 }
1671 if (c->runfs) {
1672 if (rmdir(c->runfs))
1673 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001674 FREE_AND_NULL(c->runfs);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001675 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001676 return ret;
1677}
1678
1679int container_wait(struct container *c)
1680{
Dylan Reidcf745c52016-04-22 10:18:03 -07001681 int rc;
1682
1683 do {
1684 rc = minijail_wait(c->jail);
Luis Hector Chavez4641e852016-06-02 15:40:19 -07001685 } while (rc == -EINTR);
Dylan Reidcf745c52016-04-22 10:18:03 -07001686
Luis Hector Chavez945af482016-06-03 08:39:34 -07001687 // If the process had already been reaped, still perform teardown.
1688 if (rc == -ECHILD || rc >= 0) {
Dylan Reidcf745c52016-04-22 10:18:03 -07001689 rc = container_teardown(c);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001690 }
Dylan Reidcf745c52016-04-22 10:18:03 -07001691 return rc;
Dylan Reid837c74a2016-01-22 17:25:21 -08001692}
1693
1694int container_kill(struct container *c)
1695{
Luis Hector Chavez945af482016-06-03 08:39:34 -07001696 if (kill(c->init_pid, SIGKILL) && errno != ESRCH)
Dylan Reid837c74a2016-01-22 17:25:21 -08001697 return -errno;
1698 return container_wait(c);
1699}