blob: c59fae1b1bf81bdc7adf28f6690be8c167336264 [file] [log] [blame]
Dylan Reid837c74a2016-01-22 17:25:21 -08001/* Copyright 2016 The Chromium OS Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file.
4 */
5
6#define _GNU_SOURCE /* For asprintf */
7
8#include <errno.h>
9#include <fcntl.h>
Mike Frysinger05e594e2017-01-10 02:11:08 -050010#if USE_device_mapper
11#include <libdevmapper.h>
12#endif
Dylan Reid837c74a2016-01-22 17:25:21 -080013#include <malloc.h>
14#include <signal.h>
Luis Hector Chavezff5978f2017-06-27 12:52:58 -070015#include <stdint.h>
Dylan Reid837c74a2016-01-22 17:25:21 -080016#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19#include <sys/mount.h>
20#include <sys/stat.h>
21#include <sys/types.h>
Dylan Reid2bd9ea92016-04-07 20:57:47 -070022#include <sys/wait.h>
Dylan Reid837c74a2016-01-22 17:25:21 -080023#include <unistd.h>
24
Mike Frysinger412dbd22017-01-06 01:50:34 -050025#include <linux/loop.h>
26
Dylan Reid837c74a2016-01-22 17:25:21 -080027#include "container_cgroup.h"
28#include "libcontainer.h"
29#include "libminijail.h"
30
Luis Hector Chavez479b95f2016-06-06 08:01:05 -070031#define FREE_AND_NULL(ptr) \
32do { \
33 free(ptr); \
34 ptr = NULL; \
35} while(0)
36
Yusuke Sato91f11f02016-12-02 16:15:13 -080037#define MAX_NUM_SETFILES_ARGS 128
Dylan Reid93fa4602017-06-06 13:39:31 -070038#define MAX_RLIMITS 32 // Linux defines 15 at the time of writing.
Yusuke Sato91f11f02016-12-02 16:15:13 -080039
Mike Frysinger412dbd22017-01-06 01:50:34 -050040static const char loopdev_ctl[] = "/dev/loop-control";
Mike Frysinger05e594e2017-01-10 02:11:08 -050041#if USE_device_mapper
42static const char dm_dev_prefix[] = "/dev/mapper/";
43#endif
Mike Frysinger412dbd22017-01-06 01:50:34 -050044
Luis Hector Chavez945af482016-06-03 08:39:34 -070045static int container_teardown(struct container *c);
46
Luis Hector Chavez479b95f2016-06-06 08:01:05 -070047static int strdup_and_free(char **dest, const char *src)
48{
49 char *copy = strdup(src);
50 if (!copy)
51 return -ENOMEM;
52 if (*dest)
53 free(*dest);
54 *dest = copy;
55 return 0;
56}
57
Dylan Reid837c74a2016-01-22 17:25:21 -080058struct container_mount {
59 char *name;
60 char *source;
61 char *destination;
62 char *type;
63 char *data;
Mike Frysinger05e594e2017-01-10 02:11:08 -050064 char *verity;
Dylan Reid837c74a2016-01-22 17:25:21 -080065 int flags;
66 int uid;
67 int gid;
68 int mode;
69 int mount_in_ns; /* True if mount should happen in new vfs ns */
70 int create; /* True if target should be created if it doesn't exist */
Mike Frysinger412dbd22017-01-06 01:50:34 -050071 int loopback; /* True if target should be mounted via loopback */
Dylan Reid837c74a2016-01-22 17:25:21 -080072};
73
74struct container_device {
75 char type; /* 'c' or 'b' for char or block */
76 char *path;
77 int fs_permissions;
78 int major;
79 int minor;
Dylan Reid355d5e42016-04-29 16:53:31 -070080 int copy_minor; /* Copy the minor from existing node, ignores |minor| */
Dylan Reid837c74a2016-01-22 17:25:21 -080081 int uid;
82 int gid;
Dylan Reid4843d6b2017-03-31 18:14:30 -070083};
84
85struct container_cgroup_device {
86 int allow;
87 char type;
88 int major; /* -1 means all */
89 int minor; /* -1 means all */
90 int read;
91 int write;
92 int modify;
Dylan Reid837c74a2016-01-22 17:25:21 -080093};
94
Chinyue Chenfac909e2016-06-24 14:17:42 +080095struct container_cpu_cgroup {
96 int shares;
97 int quota;
98 int period;
99 int rt_runtime;
100 int rt_period;
101};
102
Dylan Reid93fa4602017-06-06 13:39:31 -0700103struct container_rlimit {
104 int type;
105 uint32_t cur;
106 uint32_t max;
107};
108
Dylan Reid837c74a2016-01-22 17:25:21 -0800109/*
110 * Structure that configures how the container is run.
111 *
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500112 * config_root - Path to the root of the container itself.
Dylan Reid837c74a2016-01-22 17:25:21 -0800113 * rootfs - Path to the root of the container's filesystem.
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -0700114 * rootfs_mount_flags - Flags that will be passed to mount() for the rootfs.
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700115 * premounted_runfs - Path to where the container will be run.
116 * pid_file_path - Path to the file where the pid should be written.
Dylan Reid837c74a2016-01-22 17:25:21 -0800117 * program_argv - The program to run and args, e.g. "/sbin/init".
118 * num_args - Number of args in program_argv.
Dylan Reid1874feb2016-06-22 17:53:50 -0700119 * uid - The uid the container will run as.
Dylan Reid837c74a2016-01-22 17:25:21 -0800120 * uid_map - Mapping of UIDs in the container, e.g. "0 100000 1024"
Dylan Reid1874feb2016-06-22 17:53:50 -0700121 * gid - The gid the container will run as.
Dylan Reid837c74a2016-01-22 17:25:21 -0800122 * gid_map - Mapping of GIDs in the container, e.g. "0 100000 1024"
123 * alt_syscall_table - Syscall table to use or NULL if none.
124 * mounts - Filesystems to mount in the new namespace.
125 * num_mounts - Number of above.
126 * devices - Device nodes to create.
127 * num_devices - Number of above.
Dylan Reid4843d6b2017-03-31 18:14:30 -0700128 * cgroup_devices - Device node cgroup permissions.
129 * num_cgroup_devices - Number of above.
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700130 * run_setfiles - Should run setfiles on mounts to enable selinux.
Chinyue Chenfac909e2016-06-24 14:17:42 +0800131 * cpu_cgparams - CPU cgroup params.
Dylan Reid9e724af2016-07-21 09:58:07 -0700132 * cgroup_parent - Parent dir for cgroup creation
133 * cgroup_owner - uid to own the created cgroups
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700134 * cgroup_group - gid to own the created cgroups
Keshav Santhanam1b6bf672016-08-10 18:35:12 -0700135 * share_host_netns - Enable sharing of the host network namespace.
Dylan Reidc4335842016-11-11 10:24:52 -0800136 * keep_fds_open - Allow the child process to keep open FDs (for stdin/out/err).
Dylan Reid93fa4602017-06-06 13:39:31 -0700137 * rlimits - Array of rlimits for the contained process.
138 * num_rlimits - The number of elements in `rlimits`.
Luis Hector Chavezcd44ba72017-06-30 13:01:38 -0700139 * securebits_skip_mask - The mask of securebits to skip when restricting caps.
Dylan Reid837c74a2016-01-22 17:25:21 -0800140 */
141struct container_config {
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500142 char *config_root;
Dylan Reid837c74a2016-01-22 17:25:21 -0800143 char *rootfs;
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -0700144 unsigned long rootfs_mount_flags;
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700145 char *premounted_runfs;
146 char *pid_file_path;
Dylan Reid837c74a2016-01-22 17:25:21 -0800147 char **program_argv;
148 size_t num_args;
Dylan Reid1874feb2016-06-22 17:53:50 -0700149 uid_t uid;
Dylan Reid837c74a2016-01-22 17:25:21 -0800150 char *uid_map;
Dylan Reid1874feb2016-06-22 17:53:50 -0700151 gid_t gid;
Dylan Reid837c74a2016-01-22 17:25:21 -0800152 char *gid_map;
153 char *alt_syscall_table;
154 struct container_mount *mounts;
155 size_t num_mounts;
156 struct container_device *devices;
157 size_t num_devices;
Dylan Reid4843d6b2017-03-31 18:14:30 -0700158 struct container_cgroup_device *cgroup_devices;
159 size_t num_cgroup_devices;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700160 char *run_setfiles;
Chinyue Chenfac909e2016-06-24 14:17:42 +0800161 struct container_cpu_cgroup cpu_cgparams;
Dylan Reid9e724af2016-07-21 09:58:07 -0700162 char *cgroup_parent;
163 uid_t cgroup_owner;
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700164 gid_t cgroup_group;
Keshav Santhanam1b6bf672016-08-10 18:35:12 -0700165 int share_host_netns;
Dylan Reidc4335842016-11-11 10:24:52 -0800166 int keep_fds_open;
Dylan Reid93fa4602017-06-06 13:39:31 -0700167 struct container_rlimit rlimits[MAX_RLIMITS];
168 int num_rlimits;
Luis Hector Chavezff5978f2017-06-27 12:52:58 -0700169 int use_capmask;
170 int use_capmask_ambient;
171 uint64_t capmask;
Luis Hector Chavezcd44ba72017-06-30 13:01:38 -0700172 uint64_t securebits_skip_mask;
Dylan Reid837c74a2016-01-22 17:25:21 -0800173};
174
175struct container_config *container_config_create()
176{
177 return calloc(1, sizeof(struct container_config));
178}
179
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700180static void container_free_program_args(struct container_config *c)
181{
182 int i;
183
184 if (!c->program_argv)
185 return;
186 for (i = 0; i < c->num_args; ++i) {
187 FREE_AND_NULL(c->program_argv[i]);
188 }
189 FREE_AND_NULL(c->program_argv);
190}
191
192static void container_config_free_mount(struct container_mount *mount)
193{
194 FREE_AND_NULL(mount->name);
195 FREE_AND_NULL(mount->source);
196 FREE_AND_NULL(mount->destination);
197 FREE_AND_NULL(mount->type);
198 FREE_AND_NULL(mount->data);
199}
200
201static void container_config_free_device(struct container_device *device)
202{
203 FREE_AND_NULL(device->path);
204}
205
Dylan Reid837c74a2016-01-22 17:25:21 -0800206void container_config_destroy(struct container_config *c)
207{
208 size_t i;
209
210 if (c == NULL)
211 return;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700212 FREE_AND_NULL(c->rootfs);
213 container_free_program_args(c);
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700214 FREE_AND_NULL(c->premounted_runfs);
215 FREE_AND_NULL(c->pid_file_path);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700216 FREE_AND_NULL(c->uid_map);
217 FREE_AND_NULL(c->gid_map);
218 FREE_AND_NULL(c->alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -0800219 for (i = 0; i < c->num_mounts; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700220 container_config_free_mount(&c->mounts[i]);
Dylan Reid837c74a2016-01-22 17:25:21 -0800221 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700222 FREE_AND_NULL(c->mounts);
Dylan Reid837c74a2016-01-22 17:25:21 -0800223 for (i = 0; i < c->num_devices; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700224 container_config_free_device(&c->devices[i]);
Dylan Reid837c74a2016-01-22 17:25:21 -0800225 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700226 FREE_AND_NULL(c->devices);
Dylan Reida34f8162017-05-10 11:33:11 -0700227 FREE_AND_NULL(c->cgroup_devices);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700228 FREE_AND_NULL(c->run_setfiles);
Dylan Reid9e724af2016-07-21 09:58:07 -0700229 FREE_AND_NULL(c->cgroup_parent);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700230 FREE_AND_NULL(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800231}
232
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500233int container_config_config_root(struct container_config *c,
234 const char *config_root)
235{
236 return strdup_and_free(&c->config_root, config_root);
237}
238
239const char *container_config_get_config_root(const struct container_config *c)
240{
241 return c->config_root;
242}
243
Dylan Reid837c74a2016-01-22 17:25:21 -0800244int container_config_rootfs(struct container_config *c, const char *rootfs)
245{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700246 return strdup_and_free(&c->rootfs, rootfs);
Dylan Reid837c74a2016-01-22 17:25:21 -0800247}
248
Dylan Reid11456722016-05-02 11:24:50 -0700249const char *container_config_get_rootfs(const struct container_config *c)
250{
251 return c->rootfs;
252}
253
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -0700254void container_config_rootfs_mount_flags(struct container_config *c,
255 unsigned long rootfs_mount_flags)
256{
257 /* Since we are going to add MS_REMOUNT anyways, add it here so we can
258 * simply check against zero later. MS_BIND is also added to avoid
259 * re-mounting the original filesystem, since the rootfs is always
260 * bind-mounted.
261 */
262 c->rootfs_mount_flags = MS_REMOUNT | MS_BIND | rootfs_mount_flags;
263}
264
265unsigned long container_config_get_rootfs_mount_flags(
266 const struct container_config *c)
267{
268 return c->rootfs_mount_flags;
269}
270
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700271int container_config_premounted_runfs(struct container_config *c, const char *runfs)
272{
273 return strdup_and_free(&c->premounted_runfs, runfs);
274}
275
276const char *container_config_get_premounted_runfs(const struct container_config *c)
277{
278 return c->premounted_runfs;
279}
280
281int container_config_pid_file(struct container_config *c, const char *path)
282{
283 return strdup_and_free(&c->pid_file_path, path);
284}
285
286const char *container_config_get_pid_file(const struct container_config *c)
287{
288 return c->pid_file_path;
289}
290
Dylan Reid837c74a2016-01-22 17:25:21 -0800291int container_config_program_argv(struct container_config *c,
Dylan Reid17fd53f2016-11-18 19:14:41 -0800292 const char **argv, size_t num_args)
Dylan Reid837c74a2016-01-22 17:25:21 -0800293{
294 size_t i;
295
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700296 container_free_program_args(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800297 c->num_args = num_args;
298 c->program_argv = calloc(num_args + 1, sizeof(char *));
299 if (!c->program_argv)
300 return -ENOMEM;
301 for (i = 0; i < num_args; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700302 if (strdup_and_free(&c->program_argv[i], argv[i]))
303 goto error_free_return;
Dylan Reid837c74a2016-01-22 17:25:21 -0800304 }
305 c->program_argv[num_args] = NULL;
306 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700307
308error_free_return:
309 container_free_program_args(c);
310 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800311}
312
Dylan Reid11456722016-05-02 11:24:50 -0700313size_t container_config_get_num_program_args(const struct container_config *c)
314{
315 return c->num_args;
316}
317
318const char *container_config_get_program_arg(const struct container_config *c,
319 size_t index)
320{
321 if (index >= c->num_args)
322 return NULL;
323 return c->program_argv[index];
324}
325
Dylan Reid1874feb2016-06-22 17:53:50 -0700326void container_config_uid(struct container_config *c, uid_t uid)
327{
328 c->uid = uid;
329}
330
331uid_t container_config_get_uid(const struct container_config *c)
332{
333 return c->uid;
334}
335
Dylan Reid837c74a2016-01-22 17:25:21 -0800336int container_config_uid_map(struct container_config *c, const char *uid_map)
337{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700338 return strdup_and_free(&c->uid_map, uid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800339}
340
Dylan Reid1874feb2016-06-22 17:53:50 -0700341void container_config_gid(struct container_config *c, gid_t gid)
342{
343 c->gid = gid;
344}
345
346gid_t container_config_get_gid(const struct container_config *c)
347{
348 return c->gid;
349}
350
Dylan Reid837c74a2016-01-22 17:25:21 -0800351int container_config_gid_map(struct container_config *c, const char *gid_map)
352{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700353 return strdup_and_free(&c->gid_map, gid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800354}
355
356int container_config_alt_syscall_table(struct container_config *c,
357 const char *alt_syscall_table)
358{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700359 return strdup_and_free(&c->alt_syscall_table, alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -0800360}
361
Dylan Reid93fa4602017-06-06 13:39:31 -0700362int container_config_add_rlimit(struct container_config *c, int type,
363 uint32_t cur, uint32_t max)
364{
365 if (c->num_rlimits >= MAX_RLIMITS) {
366 return -ENOMEM;
367 }
368 c->rlimits[c->num_rlimits].type = type;
369 c->rlimits[c->num_rlimits].cur = cur;
370 c->rlimits[c->num_rlimits].max = max;
371 c->num_rlimits++;
372 return 0;
373}
374
Dylan Reid837c74a2016-01-22 17:25:21 -0800375int container_config_add_mount(struct container_config *c,
376 const char *name,
377 const char *source,
378 const char *destination,
379 const char *type,
380 const char *data,
Mike Frysinger05e594e2017-01-10 02:11:08 -0500381 const char *verity,
Dylan Reid837c74a2016-01-22 17:25:21 -0800382 int flags,
383 int uid,
384 int gid,
385 int mode,
386 int mount_in_ns,
Mike Frysinger412dbd22017-01-06 01:50:34 -0500387 int create,
388 int loopback)
Dylan Reid837c74a2016-01-22 17:25:21 -0800389{
390 struct container_mount *mount_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700391 struct container_mount *current_mount;
Dylan Reid837c74a2016-01-22 17:25:21 -0800392
393 if (name == NULL || source == NULL ||
394 destination == NULL || type == NULL)
395 return -EINVAL;
396
397 mount_ptr = realloc(c->mounts,
398 sizeof(c->mounts[0]) * (c->num_mounts + 1));
399 if (!mount_ptr)
400 return -ENOMEM;
401 c->mounts = mount_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700402 current_mount = &c->mounts[c->num_mounts];
403 memset(current_mount, 0, sizeof(struct container_mount));
404
405 if (strdup_and_free(&current_mount->name, name))
406 goto error_free_return;
407 if (strdup_and_free(&current_mount->source, source))
408 goto error_free_return;
409 if (strdup_and_free(&current_mount->destination, destination))
410 goto error_free_return;
411 if (strdup_and_free(&current_mount->type, type))
412 goto error_free_return;
413 if (data && strdup_and_free(&current_mount->data, data))
414 goto error_free_return;
Mike Frysinger05e594e2017-01-10 02:11:08 -0500415 if (verity && strdup_and_free(&current_mount->verity, verity))
416 goto error_free_return;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700417 current_mount->flags = flags;
418 current_mount->uid = uid;
419 current_mount->gid = gid;
420 current_mount->mode = mode;
421 current_mount->mount_in_ns = mount_in_ns;
422 current_mount->create = create;
Mike Frysinger412dbd22017-01-06 01:50:34 -0500423 current_mount->loopback = loopback;
Dylan Reid837c74a2016-01-22 17:25:21 -0800424 ++c->num_mounts;
425 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700426
427error_free_return:
428 container_config_free_mount(current_mount);
429 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800430}
431
Dylan Reid4843d6b2017-03-31 18:14:30 -0700432int container_config_add_cgroup_device(struct container_config *c,
433 int allow,
434 char type,
435 int major,
436 int minor,
437 int read,
438 int write,
439 int modify)
440{
441 struct container_cgroup_device *dev_ptr;
442 struct container_cgroup_device *current_dev;
443
444 dev_ptr = realloc(c->cgroup_devices,
445 sizeof(c->cgroup_devices[0]) *
446 (c->num_cgroup_devices + 1));
447 if (!dev_ptr)
448 return -ENOMEM;
449 c->cgroup_devices = dev_ptr;
450
451 current_dev = &c->cgroup_devices[c->num_cgroup_devices];
452 memset(current_dev, 0, sizeof(struct container_cgroup_device));
453 current_dev->allow = allow;
454 current_dev->type = type;
455 current_dev->major = major;
456 current_dev->minor = minor;
457 current_dev->read = read;
458 current_dev->write = write;
459 current_dev->modify = modify;
460 ++c->num_cgroup_devices;
461
462 return 0;
463}
464
Dylan Reid837c74a2016-01-22 17:25:21 -0800465int container_config_add_device(struct container_config *c,
466 char type,
467 const char *path,
468 int fs_permissions,
469 int major,
470 int minor,
Dylan Reid355d5e42016-04-29 16:53:31 -0700471 int copy_minor,
Dylan Reid837c74a2016-01-22 17:25:21 -0800472 int uid,
473 int gid,
474 int read_allowed,
475 int write_allowed,
476 int modify_allowed)
477{
478 struct container_device *dev_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700479 struct container_device *current_dev;
Dylan Reid837c74a2016-01-22 17:25:21 -0800480
481 if (path == NULL)
482 return -EINVAL;
Dylan Reid355d5e42016-04-29 16:53:31 -0700483 /* If using a dynamic minor number, ensure that minor is -1. */
484 if (copy_minor && (minor != -1))
485 return -EINVAL;
486
Dylan Reid837c74a2016-01-22 17:25:21 -0800487 dev_ptr = realloc(c->devices,
488 sizeof(c->devices[0]) * (c->num_devices + 1));
489 if (!dev_ptr)
490 return -ENOMEM;
491 c->devices = dev_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700492 current_dev = &c->devices[c->num_devices];
493 memset(current_dev, 0, sizeof(struct container_device));
494
495 current_dev->type = type;
496 if (strdup_and_free(&current_dev->path, path))
497 goto error_free_return;
498 current_dev->fs_permissions = fs_permissions;
499 current_dev->major = major;
500 current_dev->minor = minor;
501 current_dev->copy_minor = copy_minor;
502 current_dev->uid = uid;
503 current_dev->gid = gid;
Dylan Reid4843d6b2017-03-31 18:14:30 -0700504 if (read_allowed || write_allowed || modify_allowed) {
505 if (container_config_add_cgroup_device(c,
506 1,
507 type,
508 major,
509 minor,
510 read_allowed,
511 write_allowed,
512 modify_allowed))
513 goto error_free_return;
514 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800515 ++c->num_devices;
516 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700517
518error_free_return:
519 container_config_free_device(current_dev);
520 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800521}
522
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700523int container_config_run_setfiles(struct container_config *c,
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700524 const char *setfiles_cmd)
525{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700526 return strdup_and_free(&c->run_setfiles, setfiles_cmd);
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700527}
Dylan Reid837c74a2016-01-22 17:25:21 -0800528
Dylan Reid11456722016-05-02 11:24:50 -0700529const char *container_config_get_run_setfiles(const struct container_config *c)
530{
531 return c->run_setfiles;
532}
533
Chinyue Chenfac909e2016-06-24 14:17:42 +0800534int container_config_set_cpu_shares(struct container_config *c, int shares)
535{
536 /* CPU shares must be 2 or higher. */
537 if (shares < 2)
538 return -EINVAL;
539
540 c->cpu_cgparams.shares = shares;
541 return 0;
542}
543
544int container_config_set_cpu_cfs_params(struct container_config *c,
545 int quota,
546 int period)
547{
548 /*
549 * quota could be set higher than period to utilize more than one CPU.
550 * quota could also be set as -1 to indicate the cgroup does not adhere
551 * to any CPU time restrictions.
552 */
553 if (quota <= 0 && quota != -1)
554 return -EINVAL;
555 if (period <= 0)
556 return -EINVAL;
557
558 c->cpu_cgparams.quota = quota;
559 c->cpu_cgparams.period = period;
560 return 0;
561}
562
563int container_config_set_cpu_rt_params(struct container_config *c,
564 int rt_runtime,
565 int rt_period)
566{
567 /*
568 * rt_runtime could be set as 0 to prevent the cgroup from using
569 * realtime CPU.
570 */
571 if (rt_runtime < 0 || rt_runtime >= rt_period)
572 return -EINVAL;
573
574 c->cpu_cgparams.rt_runtime = rt_runtime;
575 c->cpu_cgparams.rt_period = rt_period;
576 return 0;
577}
578
Chinyue Chen4f3fd682016-07-01 14:11:42 +0800579int container_config_get_cpu_shares(struct container_config *c)
580{
581 return c->cpu_cgparams.shares;
582}
583
584int container_config_get_cpu_quota(struct container_config *c)
585{
586 return c->cpu_cgparams.quota;
587}
588
589int container_config_get_cpu_period(struct container_config *c)
590{
591 return c->cpu_cgparams.period;
592}
593
594int container_config_get_cpu_rt_runtime(struct container_config *c)
595{
596 return c->cpu_cgparams.rt_runtime;
597}
598
599int container_config_get_cpu_rt_period(struct container_config *c)
600{
601 return c->cpu_cgparams.rt_period;
602}
603
Dylan Reid9e724af2016-07-21 09:58:07 -0700604int container_config_set_cgroup_parent(struct container_config *c,
605 const char *parent,
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700606 uid_t cgroup_owner, gid_t cgroup_group)
Dylan Reid9e724af2016-07-21 09:58:07 -0700607{
608 c->cgroup_owner = cgroup_owner;
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700609 c->cgroup_group = cgroup_group;
Dylan Reid9e724af2016-07-21 09:58:07 -0700610 return strdup_and_free(&c->cgroup_parent, parent);
611}
612
613const char *container_config_get_cgroup_parent(struct container_config *c)
614{
615 return c->cgroup_parent;
616}
617
Keshav Santhanam1b6bf672016-08-10 18:35:12 -0700618void container_config_share_host_netns(struct container_config *c)
619{
620 c->share_host_netns = 1;
621}
622
623int get_container_config_share_host_netns(struct container_config *c)
624{
625 return c->share_host_netns;
626}
627
Dylan Reidc4335842016-11-11 10:24:52 -0800628void container_config_keep_fds_open(struct container_config *c)
629{
630 c->keep_fds_open = 1;
631}
632
Luis Hector Chavezff5978f2017-06-27 12:52:58 -0700633void container_config_set_capmask(struct container_config *c,
634 uint64_t capmask,
635 int ambient)
636{
637 c->use_capmask = 1;
638 c->capmask = capmask;
639 c->use_capmask_ambient = ambient;
640}
641
Luis Hector Chavezcd44ba72017-06-30 13:01:38 -0700642void container_config_set_securebits_skip_mask(struct container_config *c,
643 uint64_t securebits_skip_mask)
644{
645 c->securebits_skip_mask = securebits_skip_mask;
646}
647
Dylan Reid837c74a2016-01-22 17:25:21 -0800648/*
649 * Container manipulation
650 */
651struct container {
Dylan Reid837c74a2016-01-22 17:25:21 -0800652 struct container_cgroup *cgroup;
653 struct minijail *jail;
654 pid_t init_pid;
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500655 char *config_root;
Dylan Reid837c74a2016-01-22 17:25:21 -0800656 char *runfs;
657 char *rundir;
658 char *runfsroot;
659 char *pid_file_path;
Dylan Reide040c6b2016-05-02 18:49:02 -0700660 char **ext_mounts; /* Mounts made outside of the minijail */
661 size_t num_ext_mounts;
Mike Frysinger412dbd22017-01-06 01:50:34 -0500662 char **loopdevs;
663 size_t num_loopdevs;
Mike Frysinger05e594e2017-01-10 02:11:08 -0500664 char **device_mappers;
665 size_t num_device_mappers;
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700666 char *name;
Dylan Reid837c74a2016-01-22 17:25:21 -0800667};
668
669struct container *container_new(const char *name,
Dylan Reide040c6b2016-05-02 18:49:02 -0700670 const char *rundir)
Dylan Reid837c74a2016-01-22 17:25:21 -0800671{
672 struct container *c;
673
Dylan Reid837c74a2016-01-22 17:25:21 -0800674 c = calloc(1, sizeof(*c));
Dylan Reidb435c682016-04-12 04:17:49 -0700675 if (!c)
676 return NULL;
Dylan Reid837c74a2016-01-22 17:25:21 -0800677 c->rundir = strdup(rundir);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700678 c->name = strdup(name);
Dylan Reida9966422016-07-21 10:11:34 -0700679 if (!c->rundir || !c->name) {
Dylan Reid684975e2016-05-02 15:44:47 -0700680 container_destroy(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800681 return NULL;
Dylan Reidb435c682016-04-12 04:17:49 -0700682 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800683 return c;
684}
685
686void container_destroy(struct container *c)
687{
Dylan Reid684975e2016-05-02 15:44:47 -0700688 if (c->cgroup)
689 container_cgroup_destroy(c->cgroup);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700690 if (c->jail)
691 minijail_destroy(c->jail);
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500692 FREE_AND_NULL(c->config_root);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700693 FREE_AND_NULL(c->name);
694 FREE_AND_NULL(c->rundir);
695 FREE_AND_NULL(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800696}
697
Stephen Barber1a398c72017-01-23 12:39:44 -0800698/*
699 * Given a uid/gid map of "inside1 outside1 length1, ...", and an id
700 * inside of the user namespace, return the equivalent outside id, or
701 * return < 0 on error.
702 */
703static int get_userns_outside_id(const char *map, int id)
704{
705 char *map_copy, *mapping, *saveptr1, *saveptr2;
706 int inside, outside, length;
707 int result = 0;
708 errno = 0;
709
710 if (asprintf(&map_copy, "%s", map) < 0)
711 return -ENOMEM;
712
713 mapping = strtok_r(map_copy, ",", &saveptr1);
714 while (mapping) {
715 inside = strtol(strtok_r(mapping, " ", &saveptr2), NULL, 10);
716 outside = strtol(strtok_r(NULL, " ", &saveptr2), NULL, 10);
717 length = strtol(strtok_r(NULL, "\0", &saveptr2), NULL, 10);
718 if (errno) {
719 goto error_free_return;
720 } else if (inside < 0 || outside < 0 || length < 0) {
721 errno = EINVAL;
722 goto error_free_return;
723 }
724
725 if (id >= inside && id <= (inside + length)) {
726 result = (id - inside) + outside;
727 goto exit;
728 }
729
730 mapping = strtok_r(NULL, ",", &saveptr1);
731 }
732 errno = EINVAL;
733
734error_free_return:
735 result = -errno;
736exit:
737 free(map_copy);
738 return result;
739}
740
Dylan Reid837c74a2016-01-22 17:25:21 -0800741static int make_dir(const char *path, int uid, int gid, int mode)
742{
743 if (mkdir(path, mode))
744 return -errno;
745 if (chmod(path, mode))
746 return -errno;
747 if (chown(path, uid, gid))
748 return -errno;
749 return 0;
750}
751
752static int touch_file(const char *path, int uid, int gid, int mode)
753{
754 int rc;
755 int fd = open(path, O_RDWR | O_CREAT, mode);
756 if (fd < 0)
757 return -errno;
758 rc = fchown(fd, uid, gid);
759 close(fd);
760
761 if (rc)
762 return -errno;
763 return 0;
764}
765
766/* Make sure the mount target exists in the new rootfs. Create if needed and
767 * possible.
768 */
Stephen Barber1a398c72017-01-23 12:39:44 -0800769static int setup_mount_destination(const struct container_config *config,
770 const struct container_mount *mnt,
Dylan Reid2149be92016-04-28 18:38:57 -0700771 const char *source,
Dylan Reid837c74a2016-01-22 17:25:21 -0800772 const char *dest)
773{
Stephen Barber1a398c72017-01-23 12:39:44 -0800774 int uid_userns, gid_userns;
Dylan Reid837c74a2016-01-22 17:25:21 -0800775 int rc;
776 struct stat st_buf;
777
778 rc = stat(dest, &st_buf);
779 if (rc == 0) /* destination exists */
780 return 0;
781
782 /* Try to create the destination. Either make directory or touch a file
783 * depending on the source type.
784 */
Stephen Barber1a398c72017-01-23 12:39:44 -0800785 uid_userns = get_userns_outside_id(config->uid_map, mnt->uid);
786 if (uid_userns < 0)
787 return uid_userns;
788 gid_userns = get_userns_outside_id(config->gid_map, mnt->gid);
789 if (gid_userns < 0)
790 return gid_userns;
791
Dylan Reid2149be92016-04-28 18:38:57 -0700792 rc = stat(source, &st_buf);
Dylan Reid837c74a2016-01-22 17:25:21 -0800793 if (rc || S_ISDIR(st_buf.st_mode) || S_ISBLK(st_buf.st_mode))
Stephen Barber1a398c72017-01-23 12:39:44 -0800794 return make_dir(dest, uid_userns, gid_userns, mnt->mode);
Dylan Reid837c74a2016-01-22 17:25:21 -0800795
Stephen Barber1a398c72017-01-23 12:39:44 -0800796 return touch_file(dest, uid_userns, gid_userns, mnt->mode);
Dylan Reid837c74a2016-01-22 17:25:21 -0800797}
798
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700799/* Fork and exec the setfiles command to configure the selinux policy. */
Dylan Reide040c6b2016-05-02 18:49:02 -0700800static int run_setfiles_command(const struct container *c,
801 const struct container_config *config,
Yusuke Sato91f11f02016-12-02 16:15:13 -0800802 char *const *destinations, size_t num_destinations)
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700803{
804 int rc;
805 int status;
806 int pid;
807 char *context_path;
808
Dylan Reide040c6b2016-05-02 18:49:02 -0700809 if (!config->run_setfiles)
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700810 return 0;
811
812 if (asprintf(&context_path, "%s/file_contexts",
813 c->runfsroot) < 0)
814 return -errno;
815
816 pid = fork();
817 if (pid == 0) {
Yusuke Sato91f11f02016-12-02 16:15:13 -0800818 size_t i;
819 size_t arg_index = 0;
820 const char *argv[MAX_NUM_SETFILES_ARGS];
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700821 const char *env[] = {
822 NULL,
823 };
824
Yusuke Sato91f11f02016-12-02 16:15:13 -0800825 argv[arg_index++] = config->run_setfiles;
826 argv[arg_index++] = "-r";
827 argv[arg_index++] = c->runfsroot;
828 argv[arg_index++] = context_path;
829 if (arg_index + num_destinations >= MAX_NUM_SETFILES_ARGS)
830 _exit(-E2BIG);
831 for (i = 0; i < num_destinations; ++i) {
832 argv[arg_index++] = destinations[i];
833 }
834 argv[arg_index] = NULL;
835
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700836 execve(argv[0], (char *const*)argv, (char *const*)env);
837
838 /* Command failed to exec if execve returns. */
839 _exit(-errno);
840 }
841 free(context_path);
842 if (pid < 0)
843 return -errno;
844 do {
845 rc = waitpid(pid, &status, 0);
846 } while (rc == -1 && errno == EINTR);
847 if (rc < 0)
848 return -errno;
849 return status;
850}
851
Mike Frysinger412dbd22017-01-06 01:50:34 -0500852/* Find a free loop device and attach it. */
853static int loopdev_setup(char **loopdev_ret, const char *source)
854{
855 int ret = 0;
856 int source_fd = -1;
857 int control_fd = -1;
858 int loop_fd = -1;
859 char *loopdev = NULL;
860
861 source_fd = open(source, O_RDONLY|O_CLOEXEC);
862 if (source_fd < 0)
863 goto error;
864
865 control_fd = open(loopdev_ctl, O_RDWR|O_NOFOLLOW|O_CLOEXEC);
866 if (control_fd < 0)
867 goto error;
868
869 while (1) {
870 int num = ioctl(control_fd, LOOP_CTL_GET_FREE);
871 if (num < 0)
872 goto error;
873
874 if (asprintf(&loopdev, "/dev/loop%i", num) < 0)
875 goto error;
876
877 loop_fd = open(loopdev, O_RDONLY|O_NOFOLLOW|O_CLOEXEC);
878 if (loop_fd < 0)
879 goto error;
880
881 if (ioctl(loop_fd, LOOP_SET_FD, source_fd) == 0)
882 break;
883
884 if (errno != EBUSY)
885 goto error;
886
887 /* Clean up resources for the next pass. */
888 free(loopdev);
889 close(loop_fd);
890 }
891
892 *loopdev_ret = loopdev;
893 goto exit;
894
895error:
896 ret = -errno;
897 free(loopdev);
898exit:
899 if (source_fd != -1)
900 close(source_fd);
901 if (control_fd != -1)
902 close(control_fd);
903 if (loop_fd != -1)
904 close(loop_fd);
905 return ret;
906}
907
908/* Detach the specified loop device. */
909static int loopdev_detach(const char *loopdev)
910{
911 int ret = 0;
912 int fd;
913
914 fd = open(loopdev, O_RDONLY|O_NOFOLLOW|O_CLOEXEC);
915 if (fd < 0)
916 goto error;
917 if (ioctl(fd, LOOP_CLR_FD) < 0)
918 goto error;
919
920 goto exit;
921
922error:
923 ret = -errno;
924exit:
925 if (fd != -1)
926 close(fd);
927 return ret;
928}
929
Mike Frysinger05e594e2017-01-10 02:11:08 -0500930/* Create a new device mapper target for the source. */
931static int dm_setup(char **dm_path_ret, char **dm_name_ret, const char *source,
932 const char *verity_cmdline)
933{
934 int ret = 0;
935#if USE_device_mapper
936 char *p;
937 char *dm_path = NULL;
938 char *dm_name = NULL;
939 char *verity = NULL;
940 struct dm_task *dmt = NULL;
941 uint32_t cookie = 0;
942
943 /* Normalize the name into something unique-esque. */
944 if (asprintf(&dm_name, "cros-containers-%s", source) < 0)
945 goto error;
946 p = dm_name;
947 while ((p = strchr(p, '/')) != NULL)
948 *p++ = '_';
949
950 /* Get the /dev path for the higher levels to mount. */
951 if (asprintf(&dm_path, "%s%s", dm_dev_prefix, dm_name) < 0)
952 goto error;
953
954 /* Insert the source path in the verity command line. */
955 size_t source_len = strlen(source);
956 verity = malloc(strlen(verity_cmdline) + source_len * 2 + 1);
957 strcpy(verity, verity_cmdline);
958 while ((p = strstr(verity, "@DEV@")) != NULL) {
959 memmove(p + source_len, p + 5, strlen(p + 5) + 1);
960 memcpy(p, source, source_len);
961 }
962
963 /* Extract the first three parameters for dm-verity settings. */
964 char ttype[20];
965 unsigned long long start, size;
966 int n;
967 if (sscanf(verity, "%llu %llu %10s %n", &start, &size, ttype, &n) != 3)
968 goto error;
969
970 /* Finally create the device mapper. */
971 dmt = dm_task_create(DM_DEVICE_CREATE);
972 if (dmt == NULL)
973 goto error;
974
975 if (!dm_task_set_name(dmt, dm_name))
976 goto error;
977
978 if (!dm_task_set_ro(dmt))
979 goto error;
980
981 if (!dm_task_add_target(dmt, start, size, ttype, verity + n))
982 goto error;
983
984 if (!dm_task_set_cookie(dmt, &cookie, 0))
985 goto error;
986
987 if (!dm_task_run(dmt))
988 goto error;
989
990 /* Make sure the node exists before we continue. */
991 dm_udev_wait(cookie);
992
993 *dm_path_ret = dm_path;
994 *dm_name_ret = dm_name;
995 goto exit;
996
997error:
998 ret = -errno;
999 free(dm_name);
1000 free(dm_path);
1001exit:
1002 free(verity);
1003 if (dmt)
1004 dm_task_destroy(dmt);
1005#endif
1006 return ret;
1007}
1008
1009/* Tear down the device mapper target. */
1010static int dm_detach(const char *dm_name)
1011{
1012 int ret = 0;
1013#if USE_device_mapper
1014 struct dm_task *dmt;
1015
1016 dmt = dm_task_create(DM_DEVICE_REMOVE);
1017 if (dmt == NULL)
1018 goto error;
1019
1020 if (!dm_task_set_name(dmt, dm_name))
1021 goto error;
1022
1023 if (!dm_task_run(dmt))
1024 goto error;
1025
1026 goto exit;
1027
1028error:
1029 ret = -errno;
1030exit:
1031 dm_task_destroy(dmt);
1032#endif
1033 return ret;
1034}
1035
Dylan Reide040c6b2016-05-02 18:49:02 -07001036/*
1037 * Unmounts anything we mounted in this mount namespace in the opposite order
1038 * that they were mounted.
1039 */
1040static int unmount_external_mounts(struct container *c)
1041{
1042 int ret = 0;
1043
1044 while (c->num_ext_mounts) {
1045 c->num_ext_mounts--;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001046 if (!c->ext_mounts[c->num_ext_mounts])
1047 continue;
Dylan Reide040c6b2016-05-02 18:49:02 -07001048 if (umount(c->ext_mounts[c->num_ext_mounts]))
1049 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001050 FREE_AND_NULL(c->ext_mounts[c->num_ext_mounts]);
Dylan Reide040c6b2016-05-02 18:49:02 -07001051 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001052 FREE_AND_NULL(c->ext_mounts);
Mike Frysinger412dbd22017-01-06 01:50:34 -05001053
1054 while (c->num_loopdevs) {
1055 c->num_loopdevs--;
1056 if (loopdev_detach(c->loopdevs[c->num_loopdevs]))
1057 ret = -errno;
1058 FREE_AND_NULL(c->loopdevs[c->num_loopdevs]);
1059 }
1060 FREE_AND_NULL(c->loopdevs);
1061
Mike Frysinger05e594e2017-01-10 02:11:08 -05001062 while (c->num_device_mappers) {
1063 c->num_device_mappers--;
1064 if (dm_detach(c->device_mappers[c->num_device_mappers]))
1065 ret = -errno;
1066 FREE_AND_NULL(c->device_mappers[c->num_device_mappers]);
1067 }
1068 FREE_AND_NULL(c->device_mappers);
1069
Dylan Reide040c6b2016-05-02 18:49:02 -07001070 return ret;
1071}
1072
Junichi Uekawa5d272772016-07-21 16:07:19 +09001073/*
1074 * Match mount_one in minijail, mount one mountpoint with
1075 * consideration for combination of MS_BIND/MS_RDONLY flag.
1076 */
1077static int mount_external(const char *src, const char *dest, const char *type,
1078 unsigned long flags, const void *data)
1079{
1080 int remount_ro = 0;
1081
1082 /*
1083 * R/O bind mounts have to be remounted since 'bind' and 'ro'
1084 * can't both be specified in the original bind mount.
1085 * Remount R/O after the initial mount.
1086 */
1087 if ((flags & MS_BIND) && (flags & MS_RDONLY)) {
1088 remount_ro = 1;
1089 flags &= ~MS_RDONLY;
1090 }
1091
1092 if (mount(src, dest, type, flags, data) == -1)
1093 return -1;
1094
1095 if (remount_ro) {
1096 flags |= MS_RDONLY;
1097 if (mount(src, dest, NULL, flags | MS_REMOUNT, data) == -1)
1098 return -1;
1099 }
1100
1101 return 0;
1102}
1103
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001104static int do_container_mount(struct container *c,
Stephen Barber1a398c72017-01-23 12:39:44 -08001105 const struct container_config *config,
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001106 const struct container_mount *mnt)
1107{
Mike Frysinger05e594e2017-01-10 02:11:08 -05001108 char *dm_source = NULL;
Mike Frysinger412dbd22017-01-06 01:50:34 -05001109 char *loop_source = NULL;
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001110 char *source = NULL;
1111 char *dest = NULL;
1112 int rc = 0;
1113
1114 if (asprintf(&dest, "%s%s", c->runfsroot, mnt->destination) < 0)
1115 return -errno;
1116
1117 /*
1118 * If it's a bind mount relative to rootfs, append source to
1119 * rootfs path, otherwise source path is absolute.
1120 */
1121 if ((mnt->flags & MS_BIND) && mnt->source[0] != '/') {
1122 if (asprintf(&source, "%s/%s", c->runfsroot, mnt->source) < 0)
1123 goto error_free_return;
Mike Frysingerb22acdf2017-01-08 02:02:35 -05001124 } else if (mnt->loopback && mnt->source[0] != '/' && c->config_root) {
1125 if (asprintf(&source, "%s/%s", c->config_root, mnt->source) < 0)
1126 goto error_free_return;
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001127 } else {
1128 if (asprintf(&source, "%s", mnt->source) < 0)
1129 goto error_free_return;
1130 }
1131
Dylan Reidbd5234c2017-06-06 21:20:07 -07001132 // Only create the destinations for external mounts, minijail will take
1133 // care of those mounted in the new namespace.
1134 if (mnt->create && !mnt->mount_in_ns) {
Stephen Barber1a398c72017-01-23 12:39:44 -08001135 rc = setup_mount_destination(config, mnt, source, dest);
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001136 if (rc)
1137 goto error_free_return;
1138 }
Mike Frysinger412dbd22017-01-06 01:50:34 -05001139 if (mnt->loopback) {
1140 /* Record this loopback file for cleanup later. */
1141 loop_source = source;
1142 source = NULL;
1143 rc = loopdev_setup(&source, loop_source);
1144 if (rc)
1145 goto error_free_return;
1146
Mike Frysinger05e594e2017-01-10 02:11:08 -05001147 /* Save this to cleanup when shutting down. */
Mike Frysinger412dbd22017-01-06 01:50:34 -05001148 rc = strdup_and_free(&c->loopdevs[c->num_loopdevs], source);
1149 if (rc)
1150 goto error_free_return;
1151 c->num_loopdevs++;
1152 }
Mike Frysinger05e594e2017-01-10 02:11:08 -05001153 if (mnt->verity) {
1154 /* Set this device up via dm-verity. */
1155 char *dm_name;
1156 dm_source = source;
1157 source = NULL;
1158 rc = dm_setup(&source, &dm_name, dm_source, mnt->verity);
1159 if (rc)
1160 goto error_free_return;
1161
1162 /* Save this to cleanup when shutting down. */
1163 rc = strdup_and_free(&c->device_mappers[c->num_device_mappers],
1164 dm_name);
1165 free(dm_name);
1166 if (rc)
1167 goto error_free_return;
1168 c->num_device_mappers++;
1169 }
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001170 if (mnt->mount_in_ns) {
1171 /* We can mount this with minijail. */
Dylan Reid36b9c012016-06-24 18:27:08 -07001172 rc = minijail_mount_with_data(c->jail, source, mnt->destination,
1173 mnt->type, mnt->flags, mnt->data);
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001174 if (rc)
1175 goto error_free_return;
1176 } else {
1177 /* Mount this externally and unmount it on exit. */
Junichi Uekawa5d272772016-07-21 16:07:19 +09001178 if (mount_external(source, dest, mnt->type, mnt->flags,
1179 mnt->data))
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001180 goto error_free_return;
1181 /* Save this to unmount when shutting down. */
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001182 rc = strdup_and_free(&c->ext_mounts[c->num_ext_mounts], dest);
1183 if (rc)
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001184 goto error_free_return;
1185 c->num_ext_mounts++;
1186 }
1187
1188 goto exit;
1189
1190error_free_return:
1191 if (!rc)
1192 rc = -errno;
1193exit:
Mike Frysinger05e594e2017-01-10 02:11:08 -05001194 free(dm_source);
Mike Frysinger412dbd22017-01-06 01:50:34 -05001195 free(loop_source);
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001196 free(source);
1197 free(dest);
1198 return rc;
1199}
1200
Dylan Reide040c6b2016-05-02 18:49:02 -07001201static int do_container_mounts(struct container *c,
1202 const struct container_config *config)
Dylan Reid7daf9982016-04-28 16:55:42 -07001203{
1204 unsigned int i;
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -07001205 int rc = 0;
Dylan Reid7daf9982016-04-28 16:55:42 -07001206
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001207 unmount_external_mounts(c);
Dylan Reide040c6b2016-05-02 18:49:02 -07001208 /*
1209 * Allocate space to track anything we mount in our mount namespace.
1210 * This over-allocates as it has space for all mounts.
1211 */
1212 c->ext_mounts = calloc(config->num_mounts, sizeof(*c->ext_mounts));
1213 if (!c->ext_mounts)
1214 return -errno;
Mike Frysinger412dbd22017-01-06 01:50:34 -05001215 c->loopdevs = calloc(config->num_mounts, sizeof(*c->loopdevs));
1216 if (!c->loopdevs)
1217 return -errno;
Mike Frysinger05e594e2017-01-10 02:11:08 -05001218 c->device_mappers = calloc(config->num_mounts, sizeof(*c->device_mappers));
1219 if (!c->device_mappers)
1220 return -errno;
Dylan Reide040c6b2016-05-02 18:49:02 -07001221
1222 for (i = 0; i < config->num_mounts; ++i) {
Stephen Barber1a398c72017-01-23 12:39:44 -08001223 rc = do_container_mount(c, config, &config->mounts[i]);
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001224 if (rc)
1225 goto error_free_return;
Dylan Reid7daf9982016-04-28 16:55:42 -07001226 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001227
Dylan Reid7daf9982016-04-28 16:55:42 -07001228 return 0;
Dylan Reid2149be92016-04-28 18:38:57 -07001229
1230error_free_return:
Dylan Reide040c6b2016-05-02 18:49:02 -07001231 unmount_external_mounts(c);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -07001232 return rc;
Dylan Reid7daf9982016-04-28 16:55:42 -07001233}
1234
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001235static int container_create_device(const struct container *c,
Stephen Barber1a398c72017-01-23 12:39:44 -08001236 const struct container_config *config,
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001237 const struct container_device *dev,
1238 int minor)
1239{
1240 char *path = NULL;
1241 int rc = 0;
1242 int mode;
Stephen Barber1a398c72017-01-23 12:39:44 -08001243 int uid_userns, gid_userns;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001244
1245 switch (dev->type) {
1246 case 'b':
1247 mode = S_IFBLK;
1248 break;
1249 case 'c':
1250 mode = S_IFCHR;
1251 break;
1252 default:
1253 return -EINVAL;
1254 }
1255 mode |= dev->fs_permissions;
1256
Stephen Barber1a398c72017-01-23 12:39:44 -08001257 uid_userns = get_userns_outside_id(config->uid_map, dev->uid);
1258 if (uid_userns < 0)
1259 return uid_userns;
1260 gid_userns = get_userns_outside_id(config->gid_map, dev->gid);
1261 if (gid_userns < 0)
1262 return gid_userns;
1263
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001264 if (asprintf(&path, "%s%s", c->runfsroot, dev->path) < 0)
1265 goto error_free_return;
1266 if (mknod(path, mode, makedev(dev->major, minor)) && errno != EEXIST)
1267 goto error_free_return;
Stephen Barber1a398c72017-01-23 12:39:44 -08001268 if (chown(path, uid_userns, gid_userns))
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001269 goto error_free_return;
1270 if (chmod(path, dev->fs_permissions))
1271 goto error_free_return;
1272
1273 goto exit;
1274
1275error_free_return:
1276 rc = -errno;
1277exit:
1278 free(path);
1279 return rc;
1280}
1281
Stephen Barber1a398c72017-01-23 12:39:44 -08001282
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001283static int mount_runfs(struct container *c, const struct container_config *config)
Dylan Reid837c74a2016-01-22 17:25:21 -08001284{
Dylan Reidb3621832016-03-24 10:24:57 -07001285 static const mode_t root_dir_mode = 0660;
Dylan Reide040c6b2016-05-02 18:49:02 -07001286 const char *rootfs = config->rootfs;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001287 char *runfs_template = NULL;
Stephen Barber1a398c72017-01-23 12:39:44 -08001288 int uid_userns, gid_userns;
Dylan Reid837c74a2016-01-22 17:25:21 -08001289
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001290 if (asprintf(&runfs_template, "%s/%s_XXXXXX", c->rundir, c->name) < 0)
1291 return -ENOMEM;
1292
1293 c->runfs = mkdtemp(runfs_template);
1294 if (!c->runfs) {
1295 free(runfs_template);
1296 return -errno;
1297 }
1298
Stephen Barber1a398c72017-01-23 12:39:44 -08001299 uid_userns = get_userns_outside_id(config->uid_map, config->uid);
1300 if (uid_userns < 0)
1301 return uid_userns;
1302 gid_userns = get_userns_outside_id(config->gid_map, config->gid);
1303 if (gid_userns < 0)
1304 return gid_userns;
1305
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001306 /* Make sure the container uid can access the rootfs. */
1307 if (chmod(c->runfs, 0700))
1308 return -errno;
Stephen Barber1a398c72017-01-23 12:39:44 -08001309 if (chown(c->runfs, uid_userns, gid_userns))
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001310 return -errno;
1311
1312 if (asprintf(&c->runfsroot, "%s/root", c->runfs) < 0)
1313 return -errno;
1314
1315 if (mkdir(c->runfsroot, root_dir_mode))
1316 return -errno;
1317 if (chmod(c->runfsroot, root_dir_mode))
1318 return -errno;
1319
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -07001320 if (mount(rootfs, c->runfsroot, "", MS_BIND, NULL))
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001321 return -errno;
1322
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -07001323 /* MS_BIND ignores any flags passed to it (except MS_REC). We need a
1324 * second call to mount() to actually set them.
1325 */
1326 if (config->rootfs_mount_flags &&
1327 mount(rootfs, c->runfsroot, "",
1328 config->rootfs_mount_flags, NULL)) {
1329 return -errno;
1330 }
1331
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001332 return 0;
1333}
1334
Dylan Reidacedff92017-03-31 17:41:40 -07001335static int device_setup(struct container *c,
1336 const struct container_config *config)
1337{
Dylan Reid43d4e5c2017-04-05 09:40:11 -07001338 int rc;
1339 size_t i;
Dylan Reidacedff92017-03-31 17:41:40 -07001340
1341 c->cgroup->ops->deny_all_devices(c->cgroup);
1342
Dylan Reid4843d6b2017-03-31 18:14:30 -07001343 for (i = 0; i < config->num_cgroup_devices; i++) {
1344 const struct container_cgroup_device *dev =
1345 &config->cgroup_devices[i];
1346 rc = c->cgroup->ops->add_device(c->cgroup,
1347 dev->allow,
1348 dev->major,
1349 dev->minor,
1350 dev->read,
1351 dev->write,
1352 dev->modify,
1353 dev->type);
1354 if (rc)
1355 return rc;
1356 }
1357
Dylan Reidacedff92017-03-31 17:41:40 -07001358 for (i = 0; i < config->num_devices; i++) {
1359 const struct container_device *dev = &config->devices[i];
1360 int minor = dev->minor;
1361
1362 if (dev->copy_minor) {
1363 struct stat st_buff;
1364 if (stat(dev->path, &st_buff) < 0)
1365 continue;
1366 minor = minor(st_buff.st_rdev);
1367 }
1368 if (minor >= 0) {
1369 rc = container_create_device(c, config, dev, minor);
1370 if (rc)
1371 return rc;
1372 }
Dylan Reidacedff92017-03-31 17:41:40 -07001373 }
1374
1375 for (i = 0; i < c->num_loopdevs; ++i) {
1376 struct stat st;
1377
Dylan Reid43d4e5c2017-04-05 09:40:11 -07001378 rc = stat(c->loopdevs[i], &st);
1379 if (rc < 0)
1380 return -errno;
Dylan Reid4843d6b2017-03-31 18:14:30 -07001381 rc = c->cgroup->ops->add_device(c->cgroup, 1, major(st.st_rdev),
Dylan Reidacedff92017-03-31 17:41:40 -07001382 minor(st.st_rdev),
1383 1, 0, 0, 'b');
1384 if (rc)
1385 return rc;
1386 }
1387
1388 return 0;
1389}
1390
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001391int container_start(struct container *c, const struct container_config *config)
1392{
1393 int rc = 0;
1394 unsigned int i;
Stephen Barber1a398c72017-01-23 12:39:44 -08001395 int cgroup_uid, cgroup_gid;
Yusuke Sato91f11f02016-12-02 16:15:13 -08001396 char **destinations;
1397 size_t num_destinations;
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001398
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001399 if (!c)
1400 return -EINVAL;
Dylan Reide040c6b2016-05-02 18:49:02 -07001401 if (!config)
1402 return -EINVAL;
1403 if (!config->program_argv || !config->program_argv[0])
1404 return -EINVAL;
1405
Mike Frysingerb22acdf2017-01-08 02:02:35 -05001406 if (config->config_root) {
1407 c->config_root = strdup(config->config_root);
1408 if (!c->config_root) {
1409 rc = -ENOMEM;
1410 goto error_rmdir;
1411 }
1412 }
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001413 if (config->premounted_runfs) {
1414 c->runfs = NULL;
1415 c->runfsroot = strdup(config->premounted_runfs);
1416 if (!c->runfsroot) {
1417 rc = -ENOMEM;
1418 goto error_rmdir;
1419 }
1420 } else {
1421 rc = mount_runfs(c, config);
1422 if (rc)
1423 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001424 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001425
1426 c->jail = minijail_new();
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001427 if (!c->jail)
Luis Hector Chavez945af482016-06-03 08:39:34 -07001428 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001429
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -07001430 rc = do_container_mounts(c, config);
1431 if (rc)
Dylan Reid7daf9982016-04-28 16:55:42 -07001432 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001433
Stephen Barber1a398c72017-01-23 12:39:44 -08001434 cgroup_uid = get_userns_outside_id(config->uid_map,
1435 config->cgroup_owner);
1436 if (cgroup_uid < 0) {
1437 rc = cgroup_uid;
1438 goto error_rmdir;
1439 }
1440 cgroup_gid = get_userns_outside_id(config->gid_map,
1441 config->cgroup_group);
1442 if (cgroup_gid < 0) {
1443 rc = cgroup_gid;
1444 goto error_rmdir;
1445 }
1446
Dylan Reida9966422016-07-21 10:11:34 -07001447 c->cgroup = container_cgroup_new(c->name,
1448 "/sys/fs/cgroup",
1449 config->cgroup_parent,
Stephen Barber1a398c72017-01-23 12:39:44 -08001450 cgroup_uid,
1451 cgroup_gid);
Dylan Reida9966422016-07-21 10:11:34 -07001452 if (!c->cgroup)
1453 goto error_rmdir;
1454
Keshav Santhanam268fa032016-07-14 09:59:24 -07001455 /* Must be root to modify device cgroup or mknod */
1456 if (getuid() == 0) {
Dylan Reidacedff92017-03-31 17:41:40 -07001457 if (device_setup(c, config))
1458 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001459 }
1460
Dylan Reidd7229582016-04-27 17:08:40 -07001461 /* Potentailly run setfiles on mounts configured outside of the jail */
Yusuke Sato91f11f02016-12-02 16:15:13 -08001462 destinations = calloc(config->num_mounts, sizeof(char *));
1463 num_destinations = 0;
Dylan Reide040c6b2016-05-02 18:49:02 -07001464 for (i = 0; i < config->num_mounts; i++) {
1465 const struct container_mount *mnt = &config->mounts[i];
Yusuke Sato91f11f02016-12-02 16:15:13 -08001466 char* dest = mnt->destination;
Dylan Reidd7229582016-04-27 17:08:40 -07001467
1468 if (mnt->mount_in_ns)
1469 continue;
Junichi Uekawa5d272772016-07-21 16:07:19 +09001470 if (mnt->flags & MS_RDONLY)
1471 continue;
Yusuke Sato91f11f02016-12-02 16:15:13 -08001472
Yusuke Satod33db432016-12-05 16:24:37 -08001473 /* A hack to avoid setfiles on /data and /cache. */
1474 if (!strcmp(dest, "/data") || !strcmp(dest, "/cache"))
Yusuke Sato91f11f02016-12-02 16:15:13 -08001475 continue;
1476
1477 if (asprintf(&dest, "%s%s", c->runfsroot, mnt->destination) < 0) {
1478 size_t j;
1479 for (j = 0; j < num_destinations; ++j) {
1480 free(destinations[j]);
1481 }
1482 free(destinations);
Dylan Reidd7229582016-04-27 17:08:40 -07001483 goto error_rmdir;
Yusuke Sato91f11f02016-12-02 16:15:13 -08001484 }
1485
1486 destinations[num_destinations++] = dest;
Dylan Reidd7229582016-04-27 17:08:40 -07001487 }
Yusuke Sato91f11f02016-12-02 16:15:13 -08001488 if (num_destinations) {
1489 size_t i;
1490 rc = run_setfiles_command(c, config, destinations, num_destinations);
1491 for (i = 0; i < num_destinations; ++i) {
1492 free(destinations[i]);
1493 }
1494 }
1495 free(destinations);
1496 if (rc)
1497 goto error_rmdir;
Dylan Reidd7229582016-04-27 17:08:40 -07001498
Chinyue Chenfac909e2016-06-24 14:17:42 +08001499 /* Setup CPU cgroup params. */
1500 if (config->cpu_cgparams.shares) {
1501 rc = c->cgroup->ops->set_cpu_shares(
1502 c->cgroup, config->cpu_cgparams.shares);
1503 if (rc)
1504 goto error_rmdir;
1505 }
1506 if (config->cpu_cgparams.period) {
1507 rc = c->cgroup->ops->set_cpu_quota(
1508 c->cgroup, config->cpu_cgparams.quota);
1509 if (rc)
1510 goto error_rmdir;
1511 rc = c->cgroup->ops->set_cpu_period(
1512 c->cgroup, config->cpu_cgparams.period);
1513 if (rc)
1514 goto error_rmdir;
1515 }
1516 if (config->cpu_cgparams.rt_period) {
1517 rc = c->cgroup->ops->set_cpu_rt_runtime(
1518 c->cgroup, config->cpu_cgparams.rt_runtime);
1519 if (rc)
1520 goto error_rmdir;
1521 rc = c->cgroup->ops->set_cpu_rt_period(
1522 c->cgroup, config->cpu_cgparams.rt_period);
1523 if (rc)
1524 goto error_rmdir;
1525 }
1526
Dylan Reid837c74a2016-01-22 17:25:21 -08001527 /* Setup and start the container with libminijail. */
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001528 if (config->pid_file_path) {
1529 c->pid_file_path = strdup(config->pid_file_path);
1530 if (!c->pid_file_path) {
1531 rc = -ENOMEM;
1532 goto error_rmdir;
1533 }
1534 } else if (c->runfs) {
1535 if (asprintf(&c->pid_file_path, "%s/container.pid", c->runfs) < 0) {
1536 rc = -ENOMEM;
1537 goto error_rmdir;
1538 }
1539 }
1540
1541 if (c->pid_file_path)
1542 minijail_write_pid_file(c->jail, c->pid_file_path);
Dylan Reid837c74a2016-01-22 17:25:21 -08001543 minijail_reset_signal_mask(c->jail);
1544
1545 /* Setup container namespaces. */
1546 minijail_namespace_ipc(c->jail);
1547 minijail_namespace_vfs(c->jail);
Keshav Santhanam1b6bf672016-08-10 18:35:12 -07001548 if (!config->share_host_netns)
1549 minijail_namespace_net(c->jail);
Dylan Reid837c74a2016-01-22 17:25:21 -08001550 minijail_namespace_pids(c->jail);
Dylan Reid837c74a2016-01-22 17:25:21 -08001551 minijail_namespace_user(c->jail);
Mike Frysingerfbd60552017-01-03 17:28:48 -05001552 if (getuid() != 0)
1553 minijail_namespace_user_disable_setgroups(c->jail);
Dylan Reidc6ca1042016-07-11 15:03:27 -07001554 minijail_namespace_cgroups(c->jail);
Dylan Reide040c6b2016-05-02 18:49:02 -07001555 rc = minijail_uidmap(c->jail, config->uid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -08001556 if (rc)
1557 goto error_rmdir;
Dylan Reide040c6b2016-05-02 18:49:02 -07001558 rc = minijail_gidmap(c->jail, config->gid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -08001559 if (rc)
1560 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001561
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001562 /* Set the UID/GID inside the container if not 0. */
Stephen Barber1a398c72017-01-23 12:39:44 -08001563 if (get_userns_outside_id(config->uid_map, config->uid) < 0)
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001564 goto error_rmdir;
Stephen Barber1a398c72017-01-23 12:39:44 -08001565 else if (config->uid > 0)
1566 minijail_change_uid(c->jail, config->uid);
1567 if (get_userns_outside_id(config->gid_map, config->gid) < 0)
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001568 goto error_rmdir;
Stephen Barber1a398c72017-01-23 12:39:44 -08001569 else if (config->gid > 0)
1570 minijail_change_gid(c->jail, config->gid);
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001571
Dylan Reid837c74a2016-01-22 17:25:21 -08001572 rc = minijail_enter_pivot_root(c->jail, c->runfsroot);
1573 if (rc)
1574 goto error_rmdir;
1575
1576 /* Add the cgroups configured above. */
Dmitry Torokhov0d253a62017-01-05 09:41:33 -08001577 for (i = 0; i < NUM_CGROUP_TYPES; i++) {
1578 if (c->cgroup->cgroup_tasks_paths[i]) {
1579 rc = minijail_add_to_cgroup(c->jail,
1580 c->cgroup->cgroup_tasks_paths[i]);
1581 if (rc)
1582 goto error_rmdir;
1583 }
1584 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001585
Dylan Reide040c6b2016-05-02 18:49:02 -07001586 if (config->alt_syscall_table)
1587 minijail_use_alt_syscall(c->jail, config->alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -08001588
Dylan Reid93fa4602017-06-06 13:39:31 -07001589 for (i = 0; i < config->num_rlimits; i++) {
1590 const struct container_rlimit *lim = &config->rlimits[i];
1591 rc = minijail_rlimit(c->jail, lim->type, lim->cur,
1592 lim->max);
1593 if (rc)
1594 goto error_rmdir;
1595 }
1596
1597
Dylan Reid837c74a2016-01-22 17:25:21 -08001598 minijail_run_as_init(c->jail);
1599
Dylan Reid3da683b2016-04-05 03:35:35 -07001600 /* TODO(dgreid) - remove this once shared mounts are cleaned up. */
1601 minijail_skip_remount_private(c->jail);
1602
Dylan Reidc4335842016-11-11 10:24:52 -08001603 if (!config->keep_fds_open)
1604 minijail_close_open_fds(c->jail);
Luis Hector Chaveze18e7d42016-10-12 07:35:32 -07001605
Luis Hector Chavezff5978f2017-06-27 12:52:58 -07001606 if (config->use_capmask) {
1607 minijail_use_caps(c->jail, config->capmask);
1608 if (config->use_capmask_ambient) {
1609 minijail_set_ambient_caps(c->jail);
1610 }
Luis Hector Chavezcd44ba72017-06-30 13:01:38 -07001611 if (config->securebits_skip_mask) {
1612 minijail_skip_setting_securebits(c->jail,
1613 config->securebits_skip_mask);
1614 }
Luis Hector Chavezff5978f2017-06-27 12:52:58 -07001615 }
1616
Dylan Reid837c74a2016-01-22 17:25:21 -08001617 rc = minijail_run_pid_pipes_no_preload(c->jail,
Dylan Reide040c6b2016-05-02 18:49:02 -07001618 config->program_argv[0],
1619 config->program_argv,
Dylan Reid837c74a2016-01-22 17:25:21 -08001620 &c->init_pid, NULL, NULL,
1621 NULL);
1622 if (rc)
1623 goto error_rmdir;
1624 return 0;
1625
1626error_rmdir:
Luis Hector Chavez945af482016-06-03 08:39:34 -07001627 if (!rc)
1628 rc = -errno;
1629 container_teardown(c);
Dylan Reid837c74a2016-01-22 17:25:21 -08001630 return rc;
1631}
1632
1633const char *container_root(struct container *c)
1634{
1635 return c->runfs;
1636}
1637
1638int container_pid(struct container *c)
1639{
1640 return c->init_pid;
1641}
1642
1643static int container_teardown(struct container *c)
1644{
Dylan Reid837c74a2016-01-22 17:25:21 -08001645 int ret = 0;
1646
Dylan Reide040c6b2016-05-02 18:49:02 -07001647 unmount_external_mounts(c);
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001648 if (c->runfsroot && c->runfs) {
Luis Hector Chavez945af482016-06-03 08:39:34 -07001649 if (umount(c->runfsroot))
1650 ret = -errno;
1651 if (rmdir(c->runfsroot))
1652 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001653 FREE_AND_NULL(c->runfsroot);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001654 }
1655 if (c->pid_file_path) {
1656 if (unlink(c->pid_file_path))
1657 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001658 FREE_AND_NULL(c->pid_file_path);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001659 }
1660 if (c->runfs) {
1661 if (rmdir(c->runfs))
1662 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001663 FREE_AND_NULL(c->runfs);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001664 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001665 return ret;
1666}
1667
1668int container_wait(struct container *c)
1669{
Dylan Reidcf745c52016-04-22 10:18:03 -07001670 int rc;
1671
1672 do {
1673 rc = minijail_wait(c->jail);
Luis Hector Chavez4641e852016-06-02 15:40:19 -07001674 } while (rc == -EINTR);
Dylan Reidcf745c52016-04-22 10:18:03 -07001675
Luis Hector Chavez945af482016-06-03 08:39:34 -07001676 // If the process had already been reaped, still perform teardown.
1677 if (rc == -ECHILD || rc >= 0) {
Dylan Reidcf745c52016-04-22 10:18:03 -07001678 rc = container_teardown(c);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001679 }
Dylan Reidcf745c52016-04-22 10:18:03 -07001680 return rc;
Dylan Reid837c74a2016-01-22 17:25:21 -08001681}
1682
1683int container_kill(struct container *c)
1684{
Luis Hector Chavez945af482016-06-03 08:39:34 -07001685 if (kill(c->init_pid, SIGKILL) && errno != ESRCH)
Dylan Reid837c74a2016-01-22 17:25:21 -08001686 return -errno;
1687 return container_wait(c);
1688}