blob: 6ddcf9badc974c7109378ccf19eae4d560b2153e [file] [log] [blame]
Dylan Reid837c74a2016-01-22 17:25:21 -08001/* Copyright 2016 The Chromium OS Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file.
4 */
5
6#define _GNU_SOURCE /* For asprintf */
7
8#include <errno.h>
9#include <fcntl.h>
Mike Frysinger05e594e2017-01-10 02:11:08 -050010#if USE_device_mapper
11#include <libdevmapper.h>
12#endif
Dylan Reid837c74a2016-01-22 17:25:21 -080013#include <malloc.h>
14#include <signal.h>
Luis Hector Chavezff5978f2017-06-27 12:52:58 -070015#include <stdint.h>
Dylan Reid837c74a2016-01-22 17:25:21 -080016#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
Luis Hector Chavez15e8e672017-07-20 15:13:27 -070019#include <syscall.h>
Dylan Reid837c74a2016-01-22 17:25:21 -080020#include <sys/mount.h>
21#include <sys/stat.h>
22#include <sys/types.h>
Dylan Reid2bd9ea92016-04-07 20:57:47 -070023#include <sys/wait.h>
Dylan Reid837c74a2016-01-22 17:25:21 -080024#include <unistd.h>
25
Mike Frysinger412dbd22017-01-06 01:50:34 -050026#include <linux/loop.h>
27
Dylan Reid837c74a2016-01-22 17:25:21 -080028#include "container_cgroup.h"
29#include "libcontainer.h"
30#include "libminijail.h"
31
Luis Hector Chavez479b95f2016-06-06 08:01:05 -070032#define FREE_AND_NULL(ptr) \
33do { \
34 free(ptr); \
35 ptr = NULL; \
36} while(0)
37
Yusuke Sato91f11f02016-12-02 16:15:13 -080038#define MAX_NUM_SETFILES_ARGS 128
Dylan Reid93fa4602017-06-06 13:39:31 -070039#define MAX_RLIMITS 32 // Linux defines 15 at the time of writing.
Yusuke Sato91f11f02016-12-02 16:15:13 -080040
Mike Frysinger412dbd22017-01-06 01:50:34 -050041static const char loopdev_ctl[] = "/dev/loop-control";
Mike Frysinger05e594e2017-01-10 02:11:08 -050042#if USE_device_mapper
43static const char dm_dev_prefix[] = "/dev/mapper/";
44#endif
Mike Frysinger412dbd22017-01-06 01:50:34 -050045
Luis Hector Chavez945af482016-06-03 08:39:34 -070046static int container_teardown(struct container *c);
47
Luis Hector Chavez479b95f2016-06-06 08:01:05 -070048static int strdup_and_free(char **dest, const char *src)
49{
50 char *copy = strdup(src);
51 if (!copy)
52 return -ENOMEM;
53 if (*dest)
54 free(*dest);
55 *dest = copy;
56 return 0;
57}
58
Dylan Reid837c74a2016-01-22 17:25:21 -080059struct container_mount {
60 char *name;
61 char *source;
62 char *destination;
63 char *type;
64 char *data;
Mike Frysinger05e594e2017-01-10 02:11:08 -050065 char *verity;
Dylan Reid837c74a2016-01-22 17:25:21 -080066 int flags;
67 int uid;
68 int gid;
69 int mode;
70 int mount_in_ns; /* True if mount should happen in new vfs ns */
71 int create; /* True if target should be created if it doesn't exist */
Mike Frysinger412dbd22017-01-06 01:50:34 -050072 int loopback; /* True if target should be mounted via loopback */
Dylan Reid837c74a2016-01-22 17:25:21 -080073};
74
75struct container_device {
76 char type; /* 'c' or 'b' for char or block */
77 char *path;
78 int fs_permissions;
79 int major;
80 int minor;
Dylan Reid355d5e42016-04-29 16:53:31 -070081 int copy_minor; /* Copy the minor from existing node, ignores |minor| */
Dylan Reid837c74a2016-01-22 17:25:21 -080082 int uid;
83 int gid;
Dylan Reid4843d6b2017-03-31 18:14:30 -070084};
85
86struct container_cgroup_device {
87 int allow;
88 char type;
89 int major; /* -1 means all */
90 int minor; /* -1 means all */
91 int read;
92 int write;
93 int modify;
Dylan Reid837c74a2016-01-22 17:25:21 -080094};
95
Chinyue Chenfac909e2016-06-24 14:17:42 +080096struct container_cpu_cgroup {
97 int shares;
98 int quota;
99 int period;
100 int rt_runtime;
101 int rt_period;
102};
103
Dylan Reid93fa4602017-06-06 13:39:31 -0700104struct container_rlimit {
105 int type;
106 uint32_t cur;
107 uint32_t max;
108};
109
Dylan Reid837c74a2016-01-22 17:25:21 -0800110/*
111 * Structure that configures how the container is run.
112 *
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500113 * config_root - Path to the root of the container itself.
Dylan Reid837c74a2016-01-22 17:25:21 -0800114 * rootfs - Path to the root of the container's filesystem.
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -0700115 * rootfs_mount_flags - Flags that will be passed to mount() for the rootfs.
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700116 * premounted_runfs - Path to where the container will be run.
117 * pid_file_path - Path to the file where the pid should be written.
Dylan Reid837c74a2016-01-22 17:25:21 -0800118 * program_argv - The program to run and args, e.g. "/sbin/init".
119 * num_args - Number of args in program_argv.
Dylan Reid1874feb2016-06-22 17:53:50 -0700120 * uid - The uid the container will run as.
Dylan Reid837c74a2016-01-22 17:25:21 -0800121 * uid_map - Mapping of UIDs in the container, e.g. "0 100000 1024"
Dylan Reid1874feb2016-06-22 17:53:50 -0700122 * gid - The gid the container will run as.
Dylan Reid837c74a2016-01-22 17:25:21 -0800123 * gid_map - Mapping of GIDs in the container, e.g. "0 100000 1024"
124 * alt_syscall_table - Syscall table to use or NULL if none.
125 * mounts - Filesystems to mount in the new namespace.
126 * num_mounts - Number of above.
127 * devices - Device nodes to create.
128 * num_devices - Number of above.
Dylan Reid4843d6b2017-03-31 18:14:30 -0700129 * cgroup_devices - Device node cgroup permissions.
130 * num_cgroup_devices - Number of above.
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700131 * run_setfiles - Should run setfiles on mounts to enable selinux.
Chinyue Chenfac909e2016-06-24 14:17:42 +0800132 * cpu_cgparams - CPU cgroup params.
Dylan Reid9e724af2016-07-21 09:58:07 -0700133 * cgroup_parent - Parent dir for cgroup creation
134 * cgroup_owner - uid to own the created cgroups
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700135 * cgroup_group - gid to own the created cgroups
Keshav Santhanam1b6bf672016-08-10 18:35:12 -0700136 * share_host_netns - Enable sharing of the host network namespace.
Dylan Reidc4335842016-11-11 10:24:52 -0800137 * keep_fds_open - Allow the child process to keep open FDs (for stdin/out/err).
Dylan Reid93fa4602017-06-06 13:39:31 -0700138 * rlimits - Array of rlimits for the contained process.
139 * num_rlimits - The number of elements in `rlimits`.
Luis Hector Chavezcd44ba72017-06-30 13:01:38 -0700140 * securebits_skip_mask - The mask of securebits to skip when restricting caps.
Luis Hector Chavezdac65c32017-07-21 10:30:23 -0700141 * do_init - Whether the container needs an extra process to be run as init.
Luis Hector Chavez15e8e672017-07-20 15:13:27 -0700142 * selinux_context - The SELinux context name the container will run under.
Dylan Reid837c74a2016-01-22 17:25:21 -0800143 */
144struct container_config {
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500145 char *config_root;
Dylan Reid837c74a2016-01-22 17:25:21 -0800146 char *rootfs;
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -0700147 unsigned long rootfs_mount_flags;
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700148 char *premounted_runfs;
149 char *pid_file_path;
Dylan Reid837c74a2016-01-22 17:25:21 -0800150 char **program_argv;
151 size_t num_args;
Dylan Reid1874feb2016-06-22 17:53:50 -0700152 uid_t uid;
Dylan Reid837c74a2016-01-22 17:25:21 -0800153 char *uid_map;
Dylan Reid1874feb2016-06-22 17:53:50 -0700154 gid_t gid;
Dylan Reid837c74a2016-01-22 17:25:21 -0800155 char *gid_map;
156 char *alt_syscall_table;
157 struct container_mount *mounts;
158 size_t num_mounts;
159 struct container_device *devices;
160 size_t num_devices;
Dylan Reid4843d6b2017-03-31 18:14:30 -0700161 struct container_cgroup_device *cgroup_devices;
162 size_t num_cgroup_devices;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700163 char *run_setfiles;
Chinyue Chenfac909e2016-06-24 14:17:42 +0800164 struct container_cpu_cgroup cpu_cgparams;
Dylan Reid9e724af2016-07-21 09:58:07 -0700165 char *cgroup_parent;
166 uid_t cgroup_owner;
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700167 gid_t cgroup_group;
Keshav Santhanam1b6bf672016-08-10 18:35:12 -0700168 int share_host_netns;
Dylan Reidc4335842016-11-11 10:24:52 -0800169 int keep_fds_open;
Dylan Reid93fa4602017-06-06 13:39:31 -0700170 struct container_rlimit rlimits[MAX_RLIMITS];
171 int num_rlimits;
Luis Hector Chavezff5978f2017-06-27 12:52:58 -0700172 int use_capmask;
173 int use_capmask_ambient;
174 uint64_t capmask;
Luis Hector Chavezcd44ba72017-06-30 13:01:38 -0700175 uint64_t securebits_skip_mask;
Luis Hector Chavezdac65c32017-07-21 10:30:23 -0700176 int do_init;
Luis Hector Chavez15e8e672017-07-20 15:13:27 -0700177 char *selinux_context;
Dylan Reid837c74a2016-01-22 17:25:21 -0800178};
179
180struct container_config *container_config_create()
181{
182 return calloc(1, sizeof(struct container_config));
183}
184
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700185static void container_free_program_args(struct container_config *c)
186{
187 int i;
188
189 if (!c->program_argv)
190 return;
191 for (i = 0; i < c->num_args; ++i) {
192 FREE_AND_NULL(c->program_argv[i]);
193 }
194 FREE_AND_NULL(c->program_argv);
195}
196
197static void container_config_free_mount(struct container_mount *mount)
198{
199 FREE_AND_NULL(mount->name);
200 FREE_AND_NULL(mount->source);
201 FREE_AND_NULL(mount->destination);
202 FREE_AND_NULL(mount->type);
203 FREE_AND_NULL(mount->data);
204}
205
206static void container_config_free_device(struct container_device *device)
207{
208 FREE_AND_NULL(device->path);
209}
210
Dylan Reid837c74a2016-01-22 17:25:21 -0800211void container_config_destroy(struct container_config *c)
212{
213 size_t i;
214
215 if (c == NULL)
216 return;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700217 FREE_AND_NULL(c->rootfs);
218 container_free_program_args(c);
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700219 FREE_AND_NULL(c->premounted_runfs);
220 FREE_AND_NULL(c->pid_file_path);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700221 FREE_AND_NULL(c->uid_map);
222 FREE_AND_NULL(c->gid_map);
223 FREE_AND_NULL(c->alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -0800224 for (i = 0; i < c->num_mounts; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700225 container_config_free_mount(&c->mounts[i]);
Dylan Reid837c74a2016-01-22 17:25:21 -0800226 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700227 FREE_AND_NULL(c->mounts);
Dylan Reid837c74a2016-01-22 17:25:21 -0800228 for (i = 0; i < c->num_devices; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700229 container_config_free_device(&c->devices[i]);
Dylan Reid837c74a2016-01-22 17:25:21 -0800230 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700231 FREE_AND_NULL(c->devices);
Dylan Reida34f8162017-05-10 11:33:11 -0700232 FREE_AND_NULL(c->cgroup_devices);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700233 FREE_AND_NULL(c->run_setfiles);
Dylan Reid9e724af2016-07-21 09:58:07 -0700234 FREE_AND_NULL(c->cgroup_parent);
Luis Hector Chavez15e8e672017-07-20 15:13:27 -0700235 FREE_AND_NULL(c->selinux_context);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700236 FREE_AND_NULL(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800237}
238
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500239int container_config_config_root(struct container_config *c,
240 const char *config_root)
241{
242 return strdup_and_free(&c->config_root, config_root);
243}
244
245const char *container_config_get_config_root(const struct container_config *c)
246{
247 return c->config_root;
248}
249
Dylan Reid837c74a2016-01-22 17:25:21 -0800250int container_config_rootfs(struct container_config *c, const char *rootfs)
251{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700252 return strdup_and_free(&c->rootfs, rootfs);
Dylan Reid837c74a2016-01-22 17:25:21 -0800253}
254
Dylan Reid11456722016-05-02 11:24:50 -0700255const char *container_config_get_rootfs(const struct container_config *c)
256{
257 return c->rootfs;
258}
259
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -0700260void container_config_rootfs_mount_flags(struct container_config *c,
261 unsigned long rootfs_mount_flags)
262{
263 /* Since we are going to add MS_REMOUNT anyways, add it here so we can
264 * simply check against zero later. MS_BIND is also added to avoid
265 * re-mounting the original filesystem, since the rootfs is always
266 * bind-mounted.
267 */
268 c->rootfs_mount_flags = MS_REMOUNT | MS_BIND | rootfs_mount_flags;
269}
270
271unsigned long container_config_get_rootfs_mount_flags(
272 const struct container_config *c)
273{
274 return c->rootfs_mount_flags;
275}
276
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700277int container_config_premounted_runfs(struct container_config *c, const char *runfs)
278{
279 return strdup_and_free(&c->premounted_runfs, runfs);
280}
281
282const char *container_config_get_premounted_runfs(const struct container_config *c)
283{
284 return c->premounted_runfs;
285}
286
287int container_config_pid_file(struct container_config *c, const char *path)
288{
289 return strdup_and_free(&c->pid_file_path, path);
290}
291
292const char *container_config_get_pid_file(const struct container_config *c)
293{
294 return c->pid_file_path;
295}
296
Dylan Reid837c74a2016-01-22 17:25:21 -0800297int container_config_program_argv(struct container_config *c,
Dylan Reid17fd53f2016-11-18 19:14:41 -0800298 const char **argv, size_t num_args)
Dylan Reid837c74a2016-01-22 17:25:21 -0800299{
300 size_t i;
301
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700302 container_free_program_args(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800303 c->num_args = num_args;
304 c->program_argv = calloc(num_args + 1, sizeof(char *));
305 if (!c->program_argv)
306 return -ENOMEM;
307 for (i = 0; i < num_args; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700308 if (strdup_and_free(&c->program_argv[i], argv[i]))
309 goto error_free_return;
Dylan Reid837c74a2016-01-22 17:25:21 -0800310 }
311 c->program_argv[num_args] = NULL;
312 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700313
314error_free_return:
315 container_free_program_args(c);
316 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800317}
318
Dylan Reid11456722016-05-02 11:24:50 -0700319size_t container_config_get_num_program_args(const struct container_config *c)
320{
321 return c->num_args;
322}
323
324const char *container_config_get_program_arg(const struct container_config *c,
325 size_t index)
326{
327 if (index >= c->num_args)
328 return NULL;
329 return c->program_argv[index];
330}
331
Dylan Reid1874feb2016-06-22 17:53:50 -0700332void container_config_uid(struct container_config *c, uid_t uid)
333{
334 c->uid = uid;
335}
336
337uid_t container_config_get_uid(const struct container_config *c)
338{
339 return c->uid;
340}
341
Dylan Reid837c74a2016-01-22 17:25:21 -0800342int container_config_uid_map(struct container_config *c, const char *uid_map)
343{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700344 return strdup_and_free(&c->uid_map, uid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800345}
346
Dylan Reid1874feb2016-06-22 17:53:50 -0700347void container_config_gid(struct container_config *c, gid_t gid)
348{
349 c->gid = gid;
350}
351
352gid_t container_config_get_gid(const struct container_config *c)
353{
354 return c->gid;
355}
356
Dylan Reid837c74a2016-01-22 17:25:21 -0800357int container_config_gid_map(struct container_config *c, const char *gid_map)
358{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700359 return strdup_and_free(&c->gid_map, gid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800360}
361
362int container_config_alt_syscall_table(struct container_config *c,
363 const char *alt_syscall_table)
364{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700365 return strdup_and_free(&c->alt_syscall_table, alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -0800366}
367
Dylan Reid93fa4602017-06-06 13:39:31 -0700368int container_config_add_rlimit(struct container_config *c, int type,
369 uint32_t cur, uint32_t max)
370{
371 if (c->num_rlimits >= MAX_RLIMITS) {
372 return -ENOMEM;
373 }
374 c->rlimits[c->num_rlimits].type = type;
375 c->rlimits[c->num_rlimits].cur = cur;
376 c->rlimits[c->num_rlimits].max = max;
377 c->num_rlimits++;
378 return 0;
379}
380
Dylan Reid837c74a2016-01-22 17:25:21 -0800381int container_config_add_mount(struct container_config *c,
382 const char *name,
383 const char *source,
384 const char *destination,
385 const char *type,
386 const char *data,
Mike Frysinger05e594e2017-01-10 02:11:08 -0500387 const char *verity,
Dylan Reid837c74a2016-01-22 17:25:21 -0800388 int flags,
389 int uid,
390 int gid,
391 int mode,
392 int mount_in_ns,
Mike Frysinger412dbd22017-01-06 01:50:34 -0500393 int create,
394 int loopback)
Dylan Reid837c74a2016-01-22 17:25:21 -0800395{
396 struct container_mount *mount_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700397 struct container_mount *current_mount;
Dylan Reid837c74a2016-01-22 17:25:21 -0800398
399 if (name == NULL || source == NULL ||
400 destination == NULL || type == NULL)
401 return -EINVAL;
402
403 mount_ptr = realloc(c->mounts,
404 sizeof(c->mounts[0]) * (c->num_mounts + 1));
405 if (!mount_ptr)
406 return -ENOMEM;
407 c->mounts = mount_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700408 current_mount = &c->mounts[c->num_mounts];
409 memset(current_mount, 0, sizeof(struct container_mount));
410
411 if (strdup_and_free(&current_mount->name, name))
412 goto error_free_return;
413 if (strdup_and_free(&current_mount->source, source))
414 goto error_free_return;
415 if (strdup_and_free(&current_mount->destination, destination))
416 goto error_free_return;
417 if (strdup_and_free(&current_mount->type, type))
418 goto error_free_return;
419 if (data && strdup_and_free(&current_mount->data, data))
420 goto error_free_return;
Mike Frysinger05e594e2017-01-10 02:11:08 -0500421 if (verity && strdup_and_free(&current_mount->verity, verity))
422 goto error_free_return;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700423 current_mount->flags = flags;
424 current_mount->uid = uid;
425 current_mount->gid = gid;
426 current_mount->mode = mode;
427 current_mount->mount_in_ns = mount_in_ns;
428 current_mount->create = create;
Mike Frysinger412dbd22017-01-06 01:50:34 -0500429 current_mount->loopback = loopback;
Dylan Reid837c74a2016-01-22 17:25:21 -0800430 ++c->num_mounts;
431 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700432
433error_free_return:
434 container_config_free_mount(current_mount);
435 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800436}
437
Dylan Reid4843d6b2017-03-31 18:14:30 -0700438int container_config_add_cgroup_device(struct container_config *c,
439 int allow,
440 char type,
441 int major,
442 int minor,
443 int read,
444 int write,
445 int modify)
446{
447 struct container_cgroup_device *dev_ptr;
448 struct container_cgroup_device *current_dev;
449
450 dev_ptr = realloc(c->cgroup_devices,
451 sizeof(c->cgroup_devices[0]) *
452 (c->num_cgroup_devices + 1));
453 if (!dev_ptr)
454 return -ENOMEM;
455 c->cgroup_devices = dev_ptr;
456
457 current_dev = &c->cgroup_devices[c->num_cgroup_devices];
458 memset(current_dev, 0, sizeof(struct container_cgroup_device));
459 current_dev->allow = allow;
460 current_dev->type = type;
461 current_dev->major = major;
462 current_dev->minor = minor;
463 current_dev->read = read;
464 current_dev->write = write;
465 current_dev->modify = modify;
466 ++c->num_cgroup_devices;
467
468 return 0;
469}
470
Dylan Reid837c74a2016-01-22 17:25:21 -0800471int container_config_add_device(struct container_config *c,
472 char type,
473 const char *path,
474 int fs_permissions,
475 int major,
476 int minor,
Dylan Reid355d5e42016-04-29 16:53:31 -0700477 int copy_minor,
Dylan Reid837c74a2016-01-22 17:25:21 -0800478 int uid,
479 int gid,
480 int read_allowed,
481 int write_allowed,
482 int modify_allowed)
483{
484 struct container_device *dev_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700485 struct container_device *current_dev;
Dylan Reid837c74a2016-01-22 17:25:21 -0800486
487 if (path == NULL)
488 return -EINVAL;
Dylan Reid355d5e42016-04-29 16:53:31 -0700489 /* If using a dynamic minor number, ensure that minor is -1. */
490 if (copy_minor && (minor != -1))
491 return -EINVAL;
492
Dylan Reid837c74a2016-01-22 17:25:21 -0800493 dev_ptr = realloc(c->devices,
494 sizeof(c->devices[0]) * (c->num_devices + 1));
495 if (!dev_ptr)
496 return -ENOMEM;
497 c->devices = dev_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700498 current_dev = &c->devices[c->num_devices];
499 memset(current_dev, 0, sizeof(struct container_device));
500
501 current_dev->type = type;
502 if (strdup_and_free(&current_dev->path, path))
503 goto error_free_return;
504 current_dev->fs_permissions = fs_permissions;
505 current_dev->major = major;
506 current_dev->minor = minor;
507 current_dev->copy_minor = copy_minor;
508 current_dev->uid = uid;
509 current_dev->gid = gid;
Dylan Reid4843d6b2017-03-31 18:14:30 -0700510 if (read_allowed || write_allowed || modify_allowed) {
511 if (container_config_add_cgroup_device(c,
512 1,
513 type,
514 major,
515 minor,
516 read_allowed,
517 write_allowed,
518 modify_allowed))
519 goto error_free_return;
520 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800521 ++c->num_devices;
522 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700523
524error_free_return:
525 container_config_free_device(current_dev);
526 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800527}
528
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700529int container_config_run_setfiles(struct container_config *c,
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700530 const char *setfiles_cmd)
531{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700532 return strdup_and_free(&c->run_setfiles, setfiles_cmd);
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700533}
Dylan Reid837c74a2016-01-22 17:25:21 -0800534
Dylan Reid11456722016-05-02 11:24:50 -0700535const char *container_config_get_run_setfiles(const struct container_config *c)
536{
537 return c->run_setfiles;
538}
539
Chinyue Chenfac909e2016-06-24 14:17:42 +0800540int container_config_set_cpu_shares(struct container_config *c, int shares)
541{
542 /* CPU shares must be 2 or higher. */
543 if (shares < 2)
544 return -EINVAL;
545
546 c->cpu_cgparams.shares = shares;
547 return 0;
548}
549
550int container_config_set_cpu_cfs_params(struct container_config *c,
551 int quota,
552 int period)
553{
554 /*
555 * quota could be set higher than period to utilize more than one CPU.
556 * quota could also be set as -1 to indicate the cgroup does not adhere
557 * to any CPU time restrictions.
558 */
559 if (quota <= 0 && quota != -1)
560 return -EINVAL;
561 if (period <= 0)
562 return -EINVAL;
563
564 c->cpu_cgparams.quota = quota;
565 c->cpu_cgparams.period = period;
566 return 0;
567}
568
569int container_config_set_cpu_rt_params(struct container_config *c,
570 int rt_runtime,
571 int rt_period)
572{
573 /*
574 * rt_runtime could be set as 0 to prevent the cgroup from using
575 * realtime CPU.
576 */
577 if (rt_runtime < 0 || rt_runtime >= rt_period)
578 return -EINVAL;
579
580 c->cpu_cgparams.rt_runtime = rt_runtime;
581 c->cpu_cgparams.rt_period = rt_period;
582 return 0;
583}
584
Chinyue Chen4f3fd682016-07-01 14:11:42 +0800585int container_config_get_cpu_shares(struct container_config *c)
586{
587 return c->cpu_cgparams.shares;
588}
589
590int container_config_get_cpu_quota(struct container_config *c)
591{
592 return c->cpu_cgparams.quota;
593}
594
595int container_config_get_cpu_period(struct container_config *c)
596{
597 return c->cpu_cgparams.period;
598}
599
600int container_config_get_cpu_rt_runtime(struct container_config *c)
601{
602 return c->cpu_cgparams.rt_runtime;
603}
604
605int container_config_get_cpu_rt_period(struct container_config *c)
606{
607 return c->cpu_cgparams.rt_period;
608}
609
Dylan Reid9e724af2016-07-21 09:58:07 -0700610int container_config_set_cgroup_parent(struct container_config *c,
611 const char *parent,
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700612 uid_t cgroup_owner, gid_t cgroup_group)
Dylan Reid9e724af2016-07-21 09:58:07 -0700613{
614 c->cgroup_owner = cgroup_owner;
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700615 c->cgroup_group = cgroup_group;
Dylan Reid9e724af2016-07-21 09:58:07 -0700616 return strdup_and_free(&c->cgroup_parent, parent);
617}
618
619const char *container_config_get_cgroup_parent(struct container_config *c)
620{
621 return c->cgroup_parent;
622}
623
Keshav Santhanam1b6bf672016-08-10 18:35:12 -0700624void container_config_share_host_netns(struct container_config *c)
625{
626 c->share_host_netns = 1;
627}
628
629int get_container_config_share_host_netns(struct container_config *c)
630{
631 return c->share_host_netns;
632}
633
Dylan Reidc4335842016-11-11 10:24:52 -0800634void container_config_keep_fds_open(struct container_config *c)
635{
636 c->keep_fds_open = 1;
637}
638
Luis Hector Chavezff5978f2017-06-27 12:52:58 -0700639void container_config_set_capmask(struct container_config *c,
640 uint64_t capmask,
641 int ambient)
642{
643 c->use_capmask = 1;
644 c->capmask = capmask;
645 c->use_capmask_ambient = ambient;
646}
647
Luis Hector Chavezcd44ba72017-06-30 13:01:38 -0700648void container_config_set_securebits_skip_mask(struct container_config *c,
649 uint64_t securebits_skip_mask)
650{
651 c->securebits_skip_mask = securebits_skip_mask;
652}
653
Luis Hector Chavezdac65c32017-07-21 10:30:23 -0700654void container_config_set_run_as_init(struct container_config *c,
655 int run_as_init)
656{
657 c->do_init = !run_as_init;
658}
659
Luis Hector Chavez15e8e672017-07-20 15:13:27 -0700660int container_config_set_selinux_context(struct container_config *c,
661 const char *context)
662{
663 if (!context)
664 return -EINVAL;
665 c->selinux_context = strdup(context);
666 if (c->selinux_context)
667 return -ENOMEM;
668 return 0;
669}
670
Dylan Reid837c74a2016-01-22 17:25:21 -0800671/*
672 * Container manipulation
673 */
674struct container {
Dylan Reid837c74a2016-01-22 17:25:21 -0800675 struct container_cgroup *cgroup;
676 struct minijail *jail;
677 pid_t init_pid;
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500678 char *config_root;
Dylan Reid837c74a2016-01-22 17:25:21 -0800679 char *runfs;
680 char *rundir;
681 char *runfsroot;
682 char *pid_file_path;
Dylan Reide040c6b2016-05-02 18:49:02 -0700683 char **ext_mounts; /* Mounts made outside of the minijail */
684 size_t num_ext_mounts;
Mike Frysinger412dbd22017-01-06 01:50:34 -0500685 char **loopdevs;
686 size_t num_loopdevs;
Mike Frysinger05e594e2017-01-10 02:11:08 -0500687 char **device_mappers;
688 size_t num_device_mappers;
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700689 char *name;
Dylan Reid837c74a2016-01-22 17:25:21 -0800690};
691
692struct container *container_new(const char *name,
Dylan Reide040c6b2016-05-02 18:49:02 -0700693 const char *rundir)
Dylan Reid837c74a2016-01-22 17:25:21 -0800694{
695 struct container *c;
696
Dylan Reid837c74a2016-01-22 17:25:21 -0800697 c = calloc(1, sizeof(*c));
Dylan Reidb435c682016-04-12 04:17:49 -0700698 if (!c)
699 return NULL;
Dylan Reid837c74a2016-01-22 17:25:21 -0800700 c->rundir = strdup(rundir);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700701 c->name = strdup(name);
Dylan Reida9966422016-07-21 10:11:34 -0700702 if (!c->rundir || !c->name) {
Dylan Reid684975e2016-05-02 15:44:47 -0700703 container_destroy(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800704 return NULL;
Dylan Reidb435c682016-04-12 04:17:49 -0700705 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800706 return c;
707}
708
709void container_destroy(struct container *c)
710{
Dylan Reid684975e2016-05-02 15:44:47 -0700711 if (c->cgroup)
712 container_cgroup_destroy(c->cgroup);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700713 if (c->jail)
714 minijail_destroy(c->jail);
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500715 FREE_AND_NULL(c->config_root);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700716 FREE_AND_NULL(c->name);
717 FREE_AND_NULL(c->rundir);
718 FREE_AND_NULL(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800719}
720
Stephen Barber1a398c72017-01-23 12:39:44 -0800721/*
722 * Given a uid/gid map of "inside1 outside1 length1, ...", and an id
723 * inside of the user namespace, return the equivalent outside id, or
724 * return < 0 on error.
725 */
726static int get_userns_outside_id(const char *map, int id)
727{
728 char *map_copy, *mapping, *saveptr1, *saveptr2;
729 int inside, outside, length;
730 int result = 0;
731 errno = 0;
732
733 if (asprintf(&map_copy, "%s", map) < 0)
734 return -ENOMEM;
735
736 mapping = strtok_r(map_copy, ",", &saveptr1);
737 while (mapping) {
738 inside = strtol(strtok_r(mapping, " ", &saveptr2), NULL, 10);
739 outside = strtol(strtok_r(NULL, " ", &saveptr2), NULL, 10);
740 length = strtol(strtok_r(NULL, "\0", &saveptr2), NULL, 10);
741 if (errno) {
742 goto error_free_return;
743 } else if (inside < 0 || outside < 0 || length < 0) {
744 errno = EINVAL;
745 goto error_free_return;
746 }
747
748 if (id >= inside && id <= (inside + length)) {
749 result = (id - inside) + outside;
750 goto exit;
751 }
752
753 mapping = strtok_r(NULL, ",", &saveptr1);
754 }
755 errno = EINVAL;
756
757error_free_return:
758 result = -errno;
759exit:
760 free(map_copy);
761 return result;
762}
763
Dylan Reid837c74a2016-01-22 17:25:21 -0800764static int make_dir(const char *path, int uid, int gid, int mode)
765{
766 if (mkdir(path, mode))
767 return -errno;
768 if (chmod(path, mode))
769 return -errno;
770 if (chown(path, uid, gid))
771 return -errno;
772 return 0;
773}
774
775static int touch_file(const char *path, int uid, int gid, int mode)
776{
777 int rc;
778 int fd = open(path, O_RDWR | O_CREAT, mode);
779 if (fd < 0)
780 return -errno;
781 rc = fchown(fd, uid, gid);
782 close(fd);
783
784 if (rc)
785 return -errno;
786 return 0;
787}
788
789/* Make sure the mount target exists in the new rootfs. Create if needed and
790 * possible.
791 */
Stephen Barber1a398c72017-01-23 12:39:44 -0800792static int setup_mount_destination(const struct container_config *config,
793 const struct container_mount *mnt,
Dylan Reid2149be92016-04-28 18:38:57 -0700794 const char *source,
Dylan Reid837c74a2016-01-22 17:25:21 -0800795 const char *dest)
796{
Stephen Barber1a398c72017-01-23 12:39:44 -0800797 int uid_userns, gid_userns;
Dylan Reid837c74a2016-01-22 17:25:21 -0800798 int rc;
799 struct stat st_buf;
800
801 rc = stat(dest, &st_buf);
802 if (rc == 0) /* destination exists */
803 return 0;
804
805 /* Try to create the destination. Either make directory or touch a file
806 * depending on the source type.
807 */
Stephen Barber1a398c72017-01-23 12:39:44 -0800808 uid_userns = get_userns_outside_id(config->uid_map, mnt->uid);
809 if (uid_userns < 0)
810 return uid_userns;
811 gid_userns = get_userns_outside_id(config->gid_map, mnt->gid);
812 if (gid_userns < 0)
813 return gid_userns;
814
Dylan Reid2149be92016-04-28 18:38:57 -0700815 rc = stat(source, &st_buf);
Dylan Reid837c74a2016-01-22 17:25:21 -0800816 if (rc || S_ISDIR(st_buf.st_mode) || S_ISBLK(st_buf.st_mode))
Stephen Barber1a398c72017-01-23 12:39:44 -0800817 return make_dir(dest, uid_userns, gid_userns, mnt->mode);
Dylan Reid837c74a2016-01-22 17:25:21 -0800818
Stephen Barber1a398c72017-01-23 12:39:44 -0800819 return touch_file(dest, uid_userns, gid_userns, mnt->mode);
Dylan Reid837c74a2016-01-22 17:25:21 -0800820}
821
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700822/* Fork and exec the setfiles command to configure the selinux policy. */
Dylan Reide040c6b2016-05-02 18:49:02 -0700823static int run_setfiles_command(const struct container *c,
824 const struct container_config *config,
Yusuke Sato91f11f02016-12-02 16:15:13 -0800825 char *const *destinations, size_t num_destinations)
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700826{
827 int rc;
828 int status;
829 int pid;
830 char *context_path;
831
Dylan Reide040c6b2016-05-02 18:49:02 -0700832 if (!config->run_setfiles)
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700833 return 0;
834
835 if (asprintf(&context_path, "%s/file_contexts",
836 c->runfsroot) < 0)
837 return -errno;
838
839 pid = fork();
840 if (pid == 0) {
Yusuke Sato91f11f02016-12-02 16:15:13 -0800841 size_t i;
842 size_t arg_index = 0;
843 const char *argv[MAX_NUM_SETFILES_ARGS];
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700844 const char *env[] = {
845 NULL,
846 };
847
Yusuke Sato91f11f02016-12-02 16:15:13 -0800848 argv[arg_index++] = config->run_setfiles;
849 argv[arg_index++] = "-r";
850 argv[arg_index++] = c->runfsroot;
851 argv[arg_index++] = context_path;
852 if (arg_index + num_destinations >= MAX_NUM_SETFILES_ARGS)
853 _exit(-E2BIG);
854 for (i = 0; i < num_destinations; ++i) {
855 argv[arg_index++] = destinations[i];
856 }
857 argv[arg_index] = NULL;
858
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700859 execve(argv[0], (char *const*)argv, (char *const*)env);
860
861 /* Command failed to exec if execve returns. */
862 _exit(-errno);
863 }
864 free(context_path);
865 if (pid < 0)
866 return -errno;
867 do {
868 rc = waitpid(pid, &status, 0);
869 } while (rc == -1 && errno == EINTR);
870 if (rc < 0)
871 return -errno;
872 return status;
873}
874
Mike Frysinger412dbd22017-01-06 01:50:34 -0500875/* Find a free loop device and attach it. */
876static int loopdev_setup(char **loopdev_ret, const char *source)
877{
878 int ret = 0;
879 int source_fd = -1;
880 int control_fd = -1;
881 int loop_fd = -1;
882 char *loopdev = NULL;
883
884 source_fd = open(source, O_RDONLY|O_CLOEXEC);
885 if (source_fd < 0)
886 goto error;
887
888 control_fd = open(loopdev_ctl, O_RDWR|O_NOFOLLOW|O_CLOEXEC);
889 if (control_fd < 0)
890 goto error;
891
892 while (1) {
893 int num = ioctl(control_fd, LOOP_CTL_GET_FREE);
894 if (num < 0)
895 goto error;
896
897 if (asprintf(&loopdev, "/dev/loop%i", num) < 0)
898 goto error;
899
900 loop_fd = open(loopdev, O_RDONLY|O_NOFOLLOW|O_CLOEXEC);
901 if (loop_fd < 0)
902 goto error;
903
904 if (ioctl(loop_fd, LOOP_SET_FD, source_fd) == 0)
905 break;
906
907 if (errno != EBUSY)
908 goto error;
909
910 /* Clean up resources for the next pass. */
911 free(loopdev);
912 close(loop_fd);
913 }
914
915 *loopdev_ret = loopdev;
916 goto exit;
917
918error:
919 ret = -errno;
920 free(loopdev);
921exit:
922 if (source_fd != -1)
923 close(source_fd);
924 if (control_fd != -1)
925 close(control_fd);
926 if (loop_fd != -1)
927 close(loop_fd);
928 return ret;
929}
930
931/* Detach the specified loop device. */
932static int loopdev_detach(const char *loopdev)
933{
934 int ret = 0;
935 int fd;
936
937 fd = open(loopdev, O_RDONLY|O_NOFOLLOW|O_CLOEXEC);
938 if (fd < 0)
939 goto error;
940 if (ioctl(fd, LOOP_CLR_FD) < 0)
941 goto error;
942
943 goto exit;
944
945error:
946 ret = -errno;
947exit:
948 if (fd != -1)
949 close(fd);
950 return ret;
951}
952
Mike Frysinger05e594e2017-01-10 02:11:08 -0500953/* Create a new device mapper target for the source. */
954static int dm_setup(char **dm_path_ret, char **dm_name_ret, const char *source,
955 const char *verity_cmdline)
956{
957 int ret = 0;
958#if USE_device_mapper
959 char *p;
960 char *dm_path = NULL;
961 char *dm_name = NULL;
962 char *verity = NULL;
963 struct dm_task *dmt = NULL;
964 uint32_t cookie = 0;
965
966 /* Normalize the name into something unique-esque. */
967 if (asprintf(&dm_name, "cros-containers-%s", source) < 0)
968 goto error;
969 p = dm_name;
970 while ((p = strchr(p, '/')) != NULL)
971 *p++ = '_';
972
973 /* Get the /dev path for the higher levels to mount. */
974 if (asprintf(&dm_path, "%s%s", dm_dev_prefix, dm_name) < 0)
975 goto error;
976
977 /* Insert the source path in the verity command line. */
978 size_t source_len = strlen(source);
979 verity = malloc(strlen(verity_cmdline) + source_len * 2 + 1);
980 strcpy(verity, verity_cmdline);
981 while ((p = strstr(verity, "@DEV@")) != NULL) {
982 memmove(p + source_len, p + 5, strlen(p + 5) + 1);
983 memcpy(p, source, source_len);
984 }
985
986 /* Extract the first three parameters for dm-verity settings. */
987 char ttype[20];
988 unsigned long long start, size;
989 int n;
990 if (sscanf(verity, "%llu %llu %10s %n", &start, &size, ttype, &n) != 3)
991 goto error;
992
993 /* Finally create the device mapper. */
994 dmt = dm_task_create(DM_DEVICE_CREATE);
995 if (dmt == NULL)
996 goto error;
997
998 if (!dm_task_set_name(dmt, dm_name))
999 goto error;
1000
1001 if (!dm_task_set_ro(dmt))
1002 goto error;
1003
1004 if (!dm_task_add_target(dmt, start, size, ttype, verity + n))
1005 goto error;
1006
1007 if (!dm_task_set_cookie(dmt, &cookie, 0))
1008 goto error;
1009
1010 if (!dm_task_run(dmt))
1011 goto error;
1012
1013 /* Make sure the node exists before we continue. */
1014 dm_udev_wait(cookie);
1015
1016 *dm_path_ret = dm_path;
1017 *dm_name_ret = dm_name;
1018 goto exit;
1019
1020error:
1021 ret = -errno;
1022 free(dm_name);
1023 free(dm_path);
1024exit:
1025 free(verity);
1026 if (dmt)
1027 dm_task_destroy(dmt);
1028#endif
1029 return ret;
1030}
1031
1032/* Tear down the device mapper target. */
1033static int dm_detach(const char *dm_name)
1034{
1035 int ret = 0;
1036#if USE_device_mapper
1037 struct dm_task *dmt;
1038
1039 dmt = dm_task_create(DM_DEVICE_REMOVE);
1040 if (dmt == NULL)
1041 goto error;
1042
1043 if (!dm_task_set_name(dmt, dm_name))
1044 goto error;
1045
1046 if (!dm_task_run(dmt))
1047 goto error;
1048
1049 goto exit;
1050
1051error:
1052 ret = -errno;
1053exit:
1054 dm_task_destroy(dmt);
1055#endif
1056 return ret;
1057}
1058
Dylan Reide040c6b2016-05-02 18:49:02 -07001059/*
1060 * Unmounts anything we mounted in this mount namespace in the opposite order
1061 * that they were mounted.
1062 */
1063static int unmount_external_mounts(struct container *c)
1064{
1065 int ret = 0;
1066
1067 while (c->num_ext_mounts) {
1068 c->num_ext_mounts--;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001069 if (!c->ext_mounts[c->num_ext_mounts])
1070 continue;
Dylan Reide040c6b2016-05-02 18:49:02 -07001071 if (umount(c->ext_mounts[c->num_ext_mounts]))
1072 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001073 FREE_AND_NULL(c->ext_mounts[c->num_ext_mounts]);
Dylan Reide040c6b2016-05-02 18:49:02 -07001074 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001075 FREE_AND_NULL(c->ext_mounts);
Mike Frysinger412dbd22017-01-06 01:50:34 -05001076
1077 while (c->num_loopdevs) {
1078 c->num_loopdevs--;
1079 if (loopdev_detach(c->loopdevs[c->num_loopdevs]))
1080 ret = -errno;
1081 FREE_AND_NULL(c->loopdevs[c->num_loopdevs]);
1082 }
1083 FREE_AND_NULL(c->loopdevs);
1084
Mike Frysinger05e594e2017-01-10 02:11:08 -05001085 while (c->num_device_mappers) {
1086 c->num_device_mappers--;
1087 if (dm_detach(c->device_mappers[c->num_device_mappers]))
1088 ret = -errno;
1089 FREE_AND_NULL(c->device_mappers[c->num_device_mappers]);
1090 }
1091 FREE_AND_NULL(c->device_mappers);
1092
Dylan Reide040c6b2016-05-02 18:49:02 -07001093 return ret;
1094}
1095
Junichi Uekawa5d272772016-07-21 16:07:19 +09001096/*
1097 * Match mount_one in minijail, mount one mountpoint with
1098 * consideration for combination of MS_BIND/MS_RDONLY flag.
1099 */
1100static int mount_external(const char *src, const char *dest, const char *type,
1101 unsigned long flags, const void *data)
1102{
1103 int remount_ro = 0;
1104
1105 /*
1106 * R/O bind mounts have to be remounted since 'bind' and 'ro'
1107 * can't both be specified in the original bind mount.
1108 * Remount R/O after the initial mount.
1109 */
1110 if ((flags & MS_BIND) && (flags & MS_RDONLY)) {
1111 remount_ro = 1;
1112 flags &= ~MS_RDONLY;
1113 }
1114
1115 if (mount(src, dest, type, flags, data) == -1)
1116 return -1;
1117
1118 if (remount_ro) {
1119 flags |= MS_RDONLY;
1120 if (mount(src, dest, NULL, flags | MS_REMOUNT, data) == -1)
1121 return -1;
1122 }
1123
1124 return 0;
1125}
1126
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001127static int do_container_mount(struct container *c,
Stephen Barber1a398c72017-01-23 12:39:44 -08001128 const struct container_config *config,
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001129 const struct container_mount *mnt)
1130{
Mike Frysinger05e594e2017-01-10 02:11:08 -05001131 char *dm_source = NULL;
Mike Frysinger412dbd22017-01-06 01:50:34 -05001132 char *loop_source = NULL;
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001133 char *source = NULL;
1134 char *dest = NULL;
1135 int rc = 0;
1136
1137 if (asprintf(&dest, "%s%s", c->runfsroot, mnt->destination) < 0)
1138 return -errno;
1139
1140 /*
1141 * If it's a bind mount relative to rootfs, append source to
1142 * rootfs path, otherwise source path is absolute.
1143 */
1144 if ((mnt->flags & MS_BIND) && mnt->source[0] != '/') {
1145 if (asprintf(&source, "%s/%s", c->runfsroot, mnt->source) < 0)
1146 goto error_free_return;
Mike Frysingerb22acdf2017-01-08 02:02:35 -05001147 } else if (mnt->loopback && mnt->source[0] != '/' && c->config_root) {
1148 if (asprintf(&source, "%s/%s", c->config_root, mnt->source) < 0)
1149 goto error_free_return;
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001150 } else {
1151 if (asprintf(&source, "%s", mnt->source) < 0)
1152 goto error_free_return;
1153 }
1154
Dylan Reidbd5234c2017-06-06 21:20:07 -07001155 // Only create the destinations for external mounts, minijail will take
1156 // care of those mounted in the new namespace.
1157 if (mnt->create && !mnt->mount_in_ns) {
Stephen Barber1a398c72017-01-23 12:39:44 -08001158 rc = setup_mount_destination(config, mnt, source, dest);
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001159 if (rc)
1160 goto error_free_return;
1161 }
Mike Frysinger412dbd22017-01-06 01:50:34 -05001162 if (mnt->loopback) {
1163 /* Record this loopback file for cleanup later. */
1164 loop_source = source;
1165 source = NULL;
1166 rc = loopdev_setup(&source, loop_source);
1167 if (rc)
1168 goto error_free_return;
1169
Mike Frysinger05e594e2017-01-10 02:11:08 -05001170 /* Save this to cleanup when shutting down. */
Mike Frysinger412dbd22017-01-06 01:50:34 -05001171 rc = strdup_and_free(&c->loopdevs[c->num_loopdevs], source);
1172 if (rc)
1173 goto error_free_return;
1174 c->num_loopdevs++;
1175 }
Mike Frysinger05e594e2017-01-10 02:11:08 -05001176 if (mnt->verity) {
1177 /* Set this device up via dm-verity. */
1178 char *dm_name;
1179 dm_source = source;
1180 source = NULL;
1181 rc = dm_setup(&source, &dm_name, dm_source, mnt->verity);
1182 if (rc)
1183 goto error_free_return;
1184
1185 /* Save this to cleanup when shutting down. */
1186 rc = strdup_and_free(&c->device_mappers[c->num_device_mappers],
1187 dm_name);
1188 free(dm_name);
1189 if (rc)
1190 goto error_free_return;
1191 c->num_device_mappers++;
1192 }
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001193 if (mnt->mount_in_ns) {
1194 /* We can mount this with minijail. */
Dylan Reid36b9c012016-06-24 18:27:08 -07001195 rc = minijail_mount_with_data(c->jail, source, mnt->destination,
1196 mnt->type, mnt->flags, mnt->data);
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001197 if (rc)
1198 goto error_free_return;
1199 } else {
1200 /* Mount this externally and unmount it on exit. */
Junichi Uekawa5d272772016-07-21 16:07:19 +09001201 if (mount_external(source, dest, mnt->type, mnt->flags,
1202 mnt->data))
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001203 goto error_free_return;
1204 /* Save this to unmount when shutting down. */
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001205 rc = strdup_and_free(&c->ext_mounts[c->num_ext_mounts], dest);
1206 if (rc)
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001207 goto error_free_return;
1208 c->num_ext_mounts++;
1209 }
1210
1211 goto exit;
1212
1213error_free_return:
1214 if (!rc)
1215 rc = -errno;
1216exit:
Mike Frysinger05e594e2017-01-10 02:11:08 -05001217 free(dm_source);
Mike Frysinger412dbd22017-01-06 01:50:34 -05001218 free(loop_source);
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001219 free(source);
1220 free(dest);
1221 return rc;
1222}
1223
Dylan Reide040c6b2016-05-02 18:49:02 -07001224static int do_container_mounts(struct container *c,
1225 const struct container_config *config)
Dylan Reid7daf9982016-04-28 16:55:42 -07001226{
1227 unsigned int i;
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -07001228 int rc = 0;
Dylan Reid7daf9982016-04-28 16:55:42 -07001229
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001230 unmount_external_mounts(c);
Dylan Reide040c6b2016-05-02 18:49:02 -07001231 /*
1232 * Allocate space to track anything we mount in our mount namespace.
1233 * This over-allocates as it has space for all mounts.
1234 */
1235 c->ext_mounts = calloc(config->num_mounts, sizeof(*c->ext_mounts));
1236 if (!c->ext_mounts)
1237 return -errno;
Mike Frysinger412dbd22017-01-06 01:50:34 -05001238 c->loopdevs = calloc(config->num_mounts, sizeof(*c->loopdevs));
1239 if (!c->loopdevs)
1240 return -errno;
Mike Frysinger05e594e2017-01-10 02:11:08 -05001241 c->device_mappers = calloc(config->num_mounts, sizeof(*c->device_mappers));
1242 if (!c->device_mappers)
1243 return -errno;
Dylan Reide040c6b2016-05-02 18:49:02 -07001244
1245 for (i = 0; i < config->num_mounts; ++i) {
Stephen Barber1a398c72017-01-23 12:39:44 -08001246 rc = do_container_mount(c, config, &config->mounts[i]);
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001247 if (rc)
1248 goto error_free_return;
Dylan Reid7daf9982016-04-28 16:55:42 -07001249 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001250
Dylan Reid7daf9982016-04-28 16:55:42 -07001251 return 0;
Dylan Reid2149be92016-04-28 18:38:57 -07001252
1253error_free_return:
Dylan Reide040c6b2016-05-02 18:49:02 -07001254 unmount_external_mounts(c);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -07001255 return rc;
Dylan Reid7daf9982016-04-28 16:55:42 -07001256}
1257
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001258static int container_create_device(const struct container *c,
Stephen Barber1a398c72017-01-23 12:39:44 -08001259 const struct container_config *config,
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001260 const struct container_device *dev,
1261 int minor)
1262{
1263 char *path = NULL;
1264 int rc = 0;
1265 int mode;
Stephen Barber1a398c72017-01-23 12:39:44 -08001266 int uid_userns, gid_userns;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001267
1268 switch (dev->type) {
1269 case 'b':
1270 mode = S_IFBLK;
1271 break;
1272 case 'c':
1273 mode = S_IFCHR;
1274 break;
1275 default:
1276 return -EINVAL;
1277 }
1278 mode |= dev->fs_permissions;
1279
Stephen Barber1a398c72017-01-23 12:39:44 -08001280 uid_userns = get_userns_outside_id(config->uid_map, dev->uid);
1281 if (uid_userns < 0)
1282 return uid_userns;
1283 gid_userns = get_userns_outside_id(config->gid_map, dev->gid);
1284 if (gid_userns < 0)
1285 return gid_userns;
1286
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001287 if (asprintf(&path, "%s%s", c->runfsroot, dev->path) < 0)
1288 goto error_free_return;
1289 if (mknod(path, mode, makedev(dev->major, minor)) && errno != EEXIST)
1290 goto error_free_return;
Stephen Barber1a398c72017-01-23 12:39:44 -08001291 if (chown(path, uid_userns, gid_userns))
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001292 goto error_free_return;
1293 if (chmod(path, dev->fs_permissions))
1294 goto error_free_return;
1295
1296 goto exit;
1297
1298error_free_return:
1299 rc = -errno;
1300exit:
1301 free(path);
1302 return rc;
1303}
1304
Stephen Barber1a398c72017-01-23 12:39:44 -08001305
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001306static int mount_runfs(struct container *c, const struct container_config *config)
Dylan Reid837c74a2016-01-22 17:25:21 -08001307{
Dylan Reidb3621832016-03-24 10:24:57 -07001308 static const mode_t root_dir_mode = 0660;
Dylan Reide040c6b2016-05-02 18:49:02 -07001309 const char *rootfs = config->rootfs;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001310 char *runfs_template = NULL;
Stephen Barber1a398c72017-01-23 12:39:44 -08001311 int uid_userns, gid_userns;
Dylan Reid837c74a2016-01-22 17:25:21 -08001312
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001313 if (asprintf(&runfs_template, "%s/%s_XXXXXX", c->rundir, c->name) < 0)
1314 return -ENOMEM;
1315
1316 c->runfs = mkdtemp(runfs_template);
1317 if (!c->runfs) {
1318 free(runfs_template);
1319 return -errno;
1320 }
1321
Stephen Barber1a398c72017-01-23 12:39:44 -08001322 uid_userns = get_userns_outside_id(config->uid_map, config->uid);
1323 if (uid_userns < 0)
1324 return uid_userns;
1325 gid_userns = get_userns_outside_id(config->gid_map, config->gid);
1326 if (gid_userns < 0)
1327 return gid_userns;
1328
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001329 /* Make sure the container uid can access the rootfs. */
1330 if (chmod(c->runfs, 0700))
1331 return -errno;
Stephen Barber1a398c72017-01-23 12:39:44 -08001332 if (chown(c->runfs, uid_userns, gid_userns))
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001333 return -errno;
1334
1335 if (asprintf(&c->runfsroot, "%s/root", c->runfs) < 0)
1336 return -errno;
1337
1338 if (mkdir(c->runfsroot, root_dir_mode))
1339 return -errno;
1340 if (chmod(c->runfsroot, root_dir_mode))
1341 return -errno;
1342
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -07001343 if (mount(rootfs, c->runfsroot, "", MS_BIND, NULL))
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001344 return -errno;
1345
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -07001346 /* MS_BIND ignores any flags passed to it (except MS_REC). We need a
1347 * second call to mount() to actually set them.
1348 */
1349 if (config->rootfs_mount_flags &&
1350 mount(rootfs, c->runfsroot, "",
1351 config->rootfs_mount_flags, NULL)) {
1352 return -errno;
1353 }
1354
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001355 return 0;
1356}
1357
Dylan Reidacedff92017-03-31 17:41:40 -07001358static int device_setup(struct container *c,
1359 const struct container_config *config)
1360{
Dylan Reid43d4e5c2017-04-05 09:40:11 -07001361 int rc;
1362 size_t i;
Dylan Reidacedff92017-03-31 17:41:40 -07001363
1364 c->cgroup->ops->deny_all_devices(c->cgroup);
1365
Dylan Reid4843d6b2017-03-31 18:14:30 -07001366 for (i = 0; i < config->num_cgroup_devices; i++) {
1367 const struct container_cgroup_device *dev =
1368 &config->cgroup_devices[i];
1369 rc = c->cgroup->ops->add_device(c->cgroup,
1370 dev->allow,
1371 dev->major,
1372 dev->minor,
1373 dev->read,
1374 dev->write,
1375 dev->modify,
1376 dev->type);
1377 if (rc)
1378 return rc;
1379 }
1380
Dylan Reidacedff92017-03-31 17:41:40 -07001381 for (i = 0; i < config->num_devices; i++) {
1382 const struct container_device *dev = &config->devices[i];
1383 int minor = dev->minor;
1384
1385 if (dev->copy_minor) {
1386 struct stat st_buff;
1387 if (stat(dev->path, &st_buff) < 0)
1388 continue;
1389 minor = minor(st_buff.st_rdev);
1390 }
1391 if (minor >= 0) {
1392 rc = container_create_device(c, config, dev, minor);
1393 if (rc)
1394 return rc;
1395 }
Dylan Reidacedff92017-03-31 17:41:40 -07001396 }
1397
1398 for (i = 0; i < c->num_loopdevs; ++i) {
1399 struct stat st;
1400
Dylan Reid43d4e5c2017-04-05 09:40:11 -07001401 rc = stat(c->loopdevs[i], &st);
1402 if (rc < 0)
1403 return -errno;
Dylan Reid4843d6b2017-03-31 18:14:30 -07001404 rc = c->cgroup->ops->add_device(c->cgroup, 1, major(st.st_rdev),
Dylan Reidacedff92017-03-31 17:41:40 -07001405 minor(st.st_rdev),
1406 1, 0, 0, 'b');
1407 if (rc)
1408 return rc;
1409 }
1410
1411 return 0;
1412}
1413
Luis Hector Chavez15e8e672017-07-20 15:13:27 -07001414static int setexeccon(void *payload)
1415{
1416 char *init_domain = (char *) payload;
1417 char exec_path[PATH_MAX];
1418 pid_t tid = syscall(SYS_gettid);
1419 int fd;
1420
1421 if (tid == -1) {
1422 return -errno;
1423 }
1424
1425 if (snprintf(exec_path, sizeof(exec_path),
1426 "/proc/self/task/%d/attr/exec", tid) < 0) {
1427 return -errno;
1428 }
1429
1430 fd = open(exec_path, O_WRONLY|O_CLOEXEC);
1431 if (fd == -1) {
1432 return -errno;
1433 }
1434
1435 if (write(fd, init_domain, strlen(init_domain)) !=
1436 (ssize_t) strlen(init_domain)) {
1437 return -errno;
1438 }
1439
1440 close(fd);
1441 return 0;
1442}
1443
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001444int container_start(struct container *c, const struct container_config *config)
1445{
1446 int rc = 0;
1447 unsigned int i;
Stephen Barber1a398c72017-01-23 12:39:44 -08001448 int cgroup_uid, cgroup_gid;
Yusuke Sato91f11f02016-12-02 16:15:13 -08001449 char **destinations;
1450 size_t num_destinations;
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001451
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001452 if (!c)
1453 return -EINVAL;
Dylan Reide040c6b2016-05-02 18:49:02 -07001454 if (!config)
1455 return -EINVAL;
1456 if (!config->program_argv || !config->program_argv[0])
1457 return -EINVAL;
1458
Mike Frysingerb22acdf2017-01-08 02:02:35 -05001459 if (config->config_root) {
1460 c->config_root = strdup(config->config_root);
1461 if (!c->config_root) {
1462 rc = -ENOMEM;
1463 goto error_rmdir;
1464 }
1465 }
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001466 if (config->premounted_runfs) {
1467 c->runfs = NULL;
1468 c->runfsroot = strdup(config->premounted_runfs);
1469 if (!c->runfsroot) {
1470 rc = -ENOMEM;
1471 goto error_rmdir;
1472 }
1473 } else {
1474 rc = mount_runfs(c, config);
1475 if (rc)
1476 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001477 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001478
1479 c->jail = minijail_new();
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001480 if (!c->jail)
Luis Hector Chavez945af482016-06-03 08:39:34 -07001481 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001482
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -07001483 rc = do_container_mounts(c, config);
1484 if (rc)
Dylan Reid7daf9982016-04-28 16:55:42 -07001485 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001486
Stephen Barber1a398c72017-01-23 12:39:44 -08001487 cgroup_uid = get_userns_outside_id(config->uid_map,
1488 config->cgroup_owner);
1489 if (cgroup_uid < 0) {
1490 rc = cgroup_uid;
1491 goto error_rmdir;
1492 }
1493 cgroup_gid = get_userns_outside_id(config->gid_map,
1494 config->cgroup_group);
1495 if (cgroup_gid < 0) {
1496 rc = cgroup_gid;
1497 goto error_rmdir;
1498 }
1499
Dylan Reida9966422016-07-21 10:11:34 -07001500 c->cgroup = container_cgroup_new(c->name,
1501 "/sys/fs/cgroup",
1502 config->cgroup_parent,
Stephen Barber1a398c72017-01-23 12:39:44 -08001503 cgroup_uid,
1504 cgroup_gid);
Dylan Reida9966422016-07-21 10:11:34 -07001505 if (!c->cgroup)
1506 goto error_rmdir;
1507
Keshav Santhanam268fa032016-07-14 09:59:24 -07001508 /* Must be root to modify device cgroup or mknod */
1509 if (getuid() == 0) {
Dylan Reidacedff92017-03-31 17:41:40 -07001510 if (device_setup(c, config))
1511 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001512 }
1513
Dylan Reidd7229582016-04-27 17:08:40 -07001514 /* Potentailly run setfiles on mounts configured outside of the jail */
Yusuke Sato91f11f02016-12-02 16:15:13 -08001515 destinations = calloc(config->num_mounts, sizeof(char *));
1516 num_destinations = 0;
Dylan Reide040c6b2016-05-02 18:49:02 -07001517 for (i = 0; i < config->num_mounts; i++) {
1518 const struct container_mount *mnt = &config->mounts[i];
Yusuke Sato91f11f02016-12-02 16:15:13 -08001519 char* dest = mnt->destination;
Dylan Reidd7229582016-04-27 17:08:40 -07001520
1521 if (mnt->mount_in_ns)
1522 continue;
Junichi Uekawa5d272772016-07-21 16:07:19 +09001523 if (mnt->flags & MS_RDONLY)
1524 continue;
Yusuke Sato91f11f02016-12-02 16:15:13 -08001525
Yusuke Satod33db432016-12-05 16:24:37 -08001526 /* A hack to avoid setfiles on /data and /cache. */
1527 if (!strcmp(dest, "/data") || !strcmp(dest, "/cache"))
Yusuke Sato91f11f02016-12-02 16:15:13 -08001528 continue;
1529
1530 if (asprintf(&dest, "%s%s", c->runfsroot, mnt->destination) < 0) {
1531 size_t j;
1532 for (j = 0; j < num_destinations; ++j) {
1533 free(destinations[j]);
1534 }
1535 free(destinations);
Dylan Reidd7229582016-04-27 17:08:40 -07001536 goto error_rmdir;
Yusuke Sato91f11f02016-12-02 16:15:13 -08001537 }
1538
1539 destinations[num_destinations++] = dest;
Dylan Reidd7229582016-04-27 17:08:40 -07001540 }
Yusuke Sato91f11f02016-12-02 16:15:13 -08001541 if (num_destinations) {
1542 size_t i;
1543 rc = run_setfiles_command(c, config, destinations, num_destinations);
1544 for (i = 0; i < num_destinations; ++i) {
1545 free(destinations[i]);
1546 }
1547 }
1548 free(destinations);
1549 if (rc)
1550 goto error_rmdir;
Dylan Reidd7229582016-04-27 17:08:40 -07001551
Chinyue Chenfac909e2016-06-24 14:17:42 +08001552 /* Setup CPU cgroup params. */
1553 if (config->cpu_cgparams.shares) {
1554 rc = c->cgroup->ops->set_cpu_shares(
1555 c->cgroup, config->cpu_cgparams.shares);
1556 if (rc)
1557 goto error_rmdir;
1558 }
1559 if (config->cpu_cgparams.period) {
1560 rc = c->cgroup->ops->set_cpu_quota(
1561 c->cgroup, config->cpu_cgparams.quota);
1562 if (rc)
1563 goto error_rmdir;
1564 rc = c->cgroup->ops->set_cpu_period(
1565 c->cgroup, config->cpu_cgparams.period);
1566 if (rc)
1567 goto error_rmdir;
1568 }
1569 if (config->cpu_cgparams.rt_period) {
1570 rc = c->cgroup->ops->set_cpu_rt_runtime(
1571 c->cgroup, config->cpu_cgparams.rt_runtime);
1572 if (rc)
1573 goto error_rmdir;
1574 rc = c->cgroup->ops->set_cpu_rt_period(
1575 c->cgroup, config->cpu_cgparams.rt_period);
1576 if (rc)
1577 goto error_rmdir;
1578 }
1579
Dylan Reid837c74a2016-01-22 17:25:21 -08001580 /* Setup and start the container with libminijail. */
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001581 if (config->pid_file_path) {
1582 c->pid_file_path = strdup(config->pid_file_path);
1583 if (!c->pid_file_path) {
1584 rc = -ENOMEM;
1585 goto error_rmdir;
1586 }
1587 } else if (c->runfs) {
1588 if (asprintf(&c->pid_file_path, "%s/container.pid", c->runfs) < 0) {
1589 rc = -ENOMEM;
1590 goto error_rmdir;
1591 }
1592 }
1593
1594 if (c->pid_file_path)
1595 minijail_write_pid_file(c->jail, c->pid_file_path);
Dylan Reid837c74a2016-01-22 17:25:21 -08001596 minijail_reset_signal_mask(c->jail);
1597
1598 /* Setup container namespaces. */
1599 minijail_namespace_ipc(c->jail);
1600 minijail_namespace_vfs(c->jail);
Keshav Santhanam1b6bf672016-08-10 18:35:12 -07001601 if (!config->share_host_netns)
1602 minijail_namespace_net(c->jail);
Dylan Reid837c74a2016-01-22 17:25:21 -08001603 minijail_namespace_pids(c->jail);
Dylan Reid837c74a2016-01-22 17:25:21 -08001604 minijail_namespace_user(c->jail);
Mike Frysingerfbd60552017-01-03 17:28:48 -05001605 if (getuid() != 0)
1606 minijail_namespace_user_disable_setgroups(c->jail);
Dylan Reidc6ca1042016-07-11 15:03:27 -07001607 minijail_namespace_cgroups(c->jail);
Dylan Reide040c6b2016-05-02 18:49:02 -07001608 rc = minijail_uidmap(c->jail, config->uid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -08001609 if (rc)
1610 goto error_rmdir;
Dylan Reide040c6b2016-05-02 18:49:02 -07001611 rc = minijail_gidmap(c->jail, config->gid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -08001612 if (rc)
1613 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001614
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001615 /* Set the UID/GID inside the container if not 0. */
Stephen Barber1a398c72017-01-23 12:39:44 -08001616 if (get_userns_outside_id(config->uid_map, config->uid) < 0)
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001617 goto error_rmdir;
Stephen Barber1a398c72017-01-23 12:39:44 -08001618 else if (config->uid > 0)
1619 minijail_change_uid(c->jail, config->uid);
1620 if (get_userns_outside_id(config->gid_map, config->gid) < 0)
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001621 goto error_rmdir;
Stephen Barber1a398c72017-01-23 12:39:44 -08001622 else if (config->gid > 0)
1623 minijail_change_gid(c->jail, config->gid);
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001624
Dylan Reid837c74a2016-01-22 17:25:21 -08001625 rc = minijail_enter_pivot_root(c->jail, c->runfsroot);
1626 if (rc)
1627 goto error_rmdir;
1628
1629 /* Add the cgroups configured above. */
Dmitry Torokhov0d253a62017-01-05 09:41:33 -08001630 for (i = 0; i < NUM_CGROUP_TYPES; i++) {
1631 if (c->cgroup->cgroup_tasks_paths[i]) {
1632 rc = minijail_add_to_cgroup(c->jail,
1633 c->cgroup->cgroup_tasks_paths[i]);
1634 if (rc)
1635 goto error_rmdir;
1636 }
1637 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001638
Dylan Reide040c6b2016-05-02 18:49:02 -07001639 if (config->alt_syscall_table)
1640 minijail_use_alt_syscall(c->jail, config->alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -08001641
Dylan Reid93fa4602017-06-06 13:39:31 -07001642 for (i = 0; i < config->num_rlimits; i++) {
1643 const struct container_rlimit *lim = &config->rlimits[i];
1644 rc = minijail_rlimit(c->jail, lim->type, lim->cur,
1645 lim->max);
1646 if (rc)
1647 goto error_rmdir;
1648 }
1649
Luis Hector Chavez15e8e672017-07-20 15:13:27 -07001650 if (config->selinux_context) {
1651 rc = minijail_add_hook(c->jail, &setexeccon,
1652 config->selinux_context,
1653 MINIJAIL_HOOK_EVENT_PRE_EXECVE);
1654 if (rc)
1655 goto error_rmdir;
1656 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001657
Dylan Reid3da683b2016-04-05 03:35:35 -07001658 /* TODO(dgreid) - remove this once shared mounts are cleaned up. */
1659 minijail_skip_remount_private(c->jail);
1660
Dylan Reidc4335842016-11-11 10:24:52 -08001661 if (!config->keep_fds_open)
1662 minijail_close_open_fds(c->jail);
Luis Hector Chaveze18e7d42016-10-12 07:35:32 -07001663
Luis Hector Chavezff5978f2017-06-27 12:52:58 -07001664 if (config->use_capmask) {
1665 minijail_use_caps(c->jail, config->capmask);
1666 if (config->use_capmask_ambient) {
1667 minijail_set_ambient_caps(c->jail);
1668 }
Luis Hector Chavezcd44ba72017-06-30 13:01:38 -07001669 if (config->securebits_skip_mask) {
1670 minijail_skip_setting_securebits(c->jail,
1671 config->securebits_skip_mask);
1672 }
Luis Hector Chavezff5978f2017-06-27 12:52:58 -07001673 }
1674
Luis Hector Chavezdac65c32017-07-21 10:30:23 -07001675 if (!config->do_init)
1676 minijail_run_as_init(c->jail);
1677
Dylan Reid837c74a2016-01-22 17:25:21 -08001678 rc = minijail_run_pid_pipes_no_preload(c->jail,
Dylan Reide040c6b2016-05-02 18:49:02 -07001679 config->program_argv[0],
1680 config->program_argv,
Dylan Reid837c74a2016-01-22 17:25:21 -08001681 &c->init_pid, NULL, NULL,
1682 NULL);
1683 if (rc)
1684 goto error_rmdir;
1685 return 0;
1686
1687error_rmdir:
Luis Hector Chavez945af482016-06-03 08:39:34 -07001688 if (!rc)
1689 rc = -errno;
1690 container_teardown(c);
Dylan Reid837c74a2016-01-22 17:25:21 -08001691 return rc;
1692}
1693
1694const char *container_root(struct container *c)
1695{
1696 return c->runfs;
1697}
1698
1699int container_pid(struct container *c)
1700{
1701 return c->init_pid;
1702}
1703
1704static int container_teardown(struct container *c)
1705{
Dylan Reid837c74a2016-01-22 17:25:21 -08001706 int ret = 0;
1707
Dylan Reide040c6b2016-05-02 18:49:02 -07001708 unmount_external_mounts(c);
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001709 if (c->runfsroot && c->runfs) {
Luis Hector Chavez945af482016-06-03 08:39:34 -07001710 if (umount(c->runfsroot))
1711 ret = -errno;
1712 if (rmdir(c->runfsroot))
1713 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001714 FREE_AND_NULL(c->runfsroot);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001715 }
1716 if (c->pid_file_path) {
1717 if (unlink(c->pid_file_path))
1718 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001719 FREE_AND_NULL(c->pid_file_path);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001720 }
1721 if (c->runfs) {
1722 if (rmdir(c->runfs))
1723 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001724 FREE_AND_NULL(c->runfs);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001725 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001726 return ret;
1727}
1728
1729int container_wait(struct container *c)
1730{
Dylan Reidcf745c52016-04-22 10:18:03 -07001731 int rc;
1732
1733 do {
1734 rc = minijail_wait(c->jail);
Luis Hector Chavez4641e852016-06-02 15:40:19 -07001735 } while (rc == -EINTR);
Dylan Reidcf745c52016-04-22 10:18:03 -07001736
Luis Hector Chavez945af482016-06-03 08:39:34 -07001737 // If the process had already been reaped, still perform teardown.
1738 if (rc == -ECHILD || rc >= 0) {
Dylan Reidcf745c52016-04-22 10:18:03 -07001739 rc = container_teardown(c);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001740 }
Dylan Reidcf745c52016-04-22 10:18:03 -07001741 return rc;
Dylan Reid837c74a2016-01-22 17:25:21 -08001742}
1743
1744int container_kill(struct container *c)
1745{
Luis Hector Chavez945af482016-06-03 08:39:34 -07001746 if (kill(c->init_pid, SIGKILL) && errno != ESRCH)
Dylan Reid837c74a2016-01-22 17:25:21 -08001747 return -errno;
1748 return container_wait(c);
1749}