blob: f96a5ae3c0db7621cc9a76436f6e97e6c019dd76 [file] [log] [blame]
Dylan Reid837c74a2016-01-22 17:25:21 -08001/* Copyright 2016 The Chromium OS Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file.
4 */
5
6#define _GNU_SOURCE /* For asprintf */
7
8#include <errno.h>
9#include <fcntl.h>
Mike Frysinger05e594e2017-01-10 02:11:08 -050010#if USE_device_mapper
11#include <libdevmapper.h>
12#endif
Dylan Reid837c74a2016-01-22 17:25:21 -080013#include <malloc.h>
14#include <signal.h>
Luis Hector Chavezff5978f2017-06-27 12:52:58 -070015#include <stdint.h>
Dylan Reid837c74a2016-01-22 17:25:21 -080016#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
Luis Hector Chavez15e8e672017-07-20 15:13:27 -070019#include <syscall.h>
Dylan Reid837c74a2016-01-22 17:25:21 -080020#include <sys/mount.h>
21#include <sys/stat.h>
22#include <sys/types.h>
Dylan Reid2bd9ea92016-04-07 20:57:47 -070023#include <sys/wait.h>
Dylan Reid837c74a2016-01-22 17:25:21 -080024#include <unistd.h>
25
Mike Frysinger412dbd22017-01-06 01:50:34 -050026#include <linux/loop.h>
27
Dylan Reid837c74a2016-01-22 17:25:21 -080028#include "container_cgroup.h"
29#include "libcontainer.h"
30#include "libminijail.h"
31
Luis Hector Chavez479b95f2016-06-06 08:01:05 -070032#define FREE_AND_NULL(ptr) \
33do { \
34 free(ptr); \
35 ptr = NULL; \
36} while(0)
37
Yusuke Sato91f11f02016-12-02 16:15:13 -080038#define MAX_NUM_SETFILES_ARGS 128
Dylan Reid93fa4602017-06-06 13:39:31 -070039#define MAX_RLIMITS 32 // Linux defines 15 at the time of writing.
Yusuke Sato91f11f02016-12-02 16:15:13 -080040
Mike Frysinger412dbd22017-01-06 01:50:34 -050041static const char loopdev_ctl[] = "/dev/loop-control";
Mike Frysinger05e594e2017-01-10 02:11:08 -050042#if USE_device_mapper
43static const char dm_dev_prefix[] = "/dev/mapper/";
44#endif
Mike Frysinger412dbd22017-01-06 01:50:34 -050045
Luis Hector Chavez945af482016-06-03 08:39:34 -070046static int container_teardown(struct container *c);
47
Luis Hector Chavez479b95f2016-06-06 08:01:05 -070048static int strdup_and_free(char **dest, const char *src)
49{
50 char *copy = strdup(src);
51 if (!copy)
52 return -ENOMEM;
53 if (*dest)
54 free(*dest);
55 *dest = copy;
56 return 0;
57}
58
Dylan Reid837c74a2016-01-22 17:25:21 -080059struct container_mount {
60 char *name;
61 char *source;
62 char *destination;
63 char *type;
64 char *data;
Mike Frysinger05e594e2017-01-10 02:11:08 -050065 char *verity;
Dylan Reid837c74a2016-01-22 17:25:21 -080066 int flags;
67 int uid;
68 int gid;
69 int mode;
70 int mount_in_ns; /* True if mount should happen in new vfs ns */
71 int create; /* True if target should be created if it doesn't exist */
Mike Frysinger412dbd22017-01-06 01:50:34 -050072 int loopback; /* True if target should be mounted via loopback */
Dylan Reid837c74a2016-01-22 17:25:21 -080073};
74
75struct container_device {
76 char type; /* 'c' or 'b' for char or block */
77 char *path;
78 int fs_permissions;
79 int major;
80 int minor;
Dylan Reid355d5e42016-04-29 16:53:31 -070081 int copy_minor; /* Copy the minor from existing node, ignores |minor| */
Dylan Reid837c74a2016-01-22 17:25:21 -080082 int uid;
83 int gid;
Dylan Reid4843d6b2017-03-31 18:14:30 -070084};
85
86struct container_cgroup_device {
87 int allow;
88 char type;
89 int major; /* -1 means all */
90 int minor; /* -1 means all */
91 int read;
92 int write;
93 int modify;
Dylan Reid837c74a2016-01-22 17:25:21 -080094};
95
Chinyue Chenfac909e2016-06-24 14:17:42 +080096struct container_cpu_cgroup {
97 int shares;
98 int quota;
99 int period;
100 int rt_runtime;
101 int rt_period;
102};
103
Dylan Reid93fa4602017-06-06 13:39:31 -0700104struct container_rlimit {
105 int type;
106 uint32_t cur;
107 uint32_t max;
108};
109
Dylan Reid837c74a2016-01-22 17:25:21 -0800110/*
111 * Structure that configures how the container is run.
112 *
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500113 * config_root - Path to the root of the container itself.
Dylan Reid837c74a2016-01-22 17:25:21 -0800114 * rootfs - Path to the root of the container's filesystem.
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -0700115 * rootfs_mount_flags - Flags that will be passed to mount() for the rootfs.
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700116 * premounted_runfs - Path to where the container will be run.
117 * pid_file_path - Path to the file where the pid should be written.
Dylan Reid837c74a2016-01-22 17:25:21 -0800118 * program_argv - The program to run and args, e.g. "/sbin/init".
119 * num_args - Number of args in program_argv.
Dylan Reid1874feb2016-06-22 17:53:50 -0700120 * uid - The uid the container will run as.
Dylan Reid837c74a2016-01-22 17:25:21 -0800121 * uid_map - Mapping of UIDs in the container, e.g. "0 100000 1024"
Dylan Reid1874feb2016-06-22 17:53:50 -0700122 * gid - The gid the container will run as.
Dylan Reid837c74a2016-01-22 17:25:21 -0800123 * gid_map - Mapping of GIDs in the container, e.g. "0 100000 1024"
124 * alt_syscall_table - Syscall table to use or NULL if none.
125 * mounts - Filesystems to mount in the new namespace.
126 * num_mounts - Number of above.
127 * devices - Device nodes to create.
128 * num_devices - Number of above.
Dylan Reid4843d6b2017-03-31 18:14:30 -0700129 * cgroup_devices - Device node cgroup permissions.
130 * num_cgroup_devices - Number of above.
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700131 * run_setfiles - Should run setfiles on mounts to enable selinux.
Chinyue Chenfac909e2016-06-24 14:17:42 +0800132 * cpu_cgparams - CPU cgroup params.
Dylan Reid9e724af2016-07-21 09:58:07 -0700133 * cgroup_parent - Parent dir for cgroup creation
134 * cgroup_owner - uid to own the created cgroups
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700135 * cgroup_group - gid to own the created cgroups
Keshav Santhanam1b6bf672016-08-10 18:35:12 -0700136 * share_host_netns - Enable sharing of the host network namespace.
Dylan Reidc4335842016-11-11 10:24:52 -0800137 * keep_fds_open - Allow the child process to keep open FDs (for stdin/out/err).
Dylan Reid93fa4602017-06-06 13:39:31 -0700138 * rlimits - Array of rlimits for the contained process.
139 * num_rlimits - The number of elements in `rlimits`.
Luis Hector Chavezcd44ba72017-06-30 13:01:38 -0700140 * securebits_skip_mask - The mask of securebits to skip when restricting caps.
Luis Hector Chavezdac65c32017-07-21 10:30:23 -0700141 * do_init - Whether the container needs an extra process to be run as init.
Luis Hector Chavez15e8e672017-07-20 15:13:27 -0700142 * selinux_context - The SELinux context name the container will run under.
Luis Hector Chavezf8e8f4c2017-08-01 01:09:39 -0700143 * pre_start_hook - A function pointer to be called prior to calling execve(2).
144 * pre_start_hook_payload - Parameter that will be passed to pre_start_hook().
Dylan Reid837c74a2016-01-22 17:25:21 -0800145 */
146struct container_config {
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500147 char *config_root;
Dylan Reid837c74a2016-01-22 17:25:21 -0800148 char *rootfs;
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -0700149 unsigned long rootfs_mount_flags;
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700150 char *premounted_runfs;
151 char *pid_file_path;
Dylan Reid837c74a2016-01-22 17:25:21 -0800152 char **program_argv;
153 size_t num_args;
Dylan Reid1874feb2016-06-22 17:53:50 -0700154 uid_t uid;
Dylan Reid837c74a2016-01-22 17:25:21 -0800155 char *uid_map;
Dylan Reid1874feb2016-06-22 17:53:50 -0700156 gid_t gid;
Dylan Reid837c74a2016-01-22 17:25:21 -0800157 char *gid_map;
158 char *alt_syscall_table;
159 struct container_mount *mounts;
160 size_t num_mounts;
161 struct container_device *devices;
162 size_t num_devices;
Dylan Reid4843d6b2017-03-31 18:14:30 -0700163 struct container_cgroup_device *cgroup_devices;
164 size_t num_cgroup_devices;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700165 char *run_setfiles;
Chinyue Chenfac909e2016-06-24 14:17:42 +0800166 struct container_cpu_cgroup cpu_cgparams;
Dylan Reid9e724af2016-07-21 09:58:07 -0700167 char *cgroup_parent;
168 uid_t cgroup_owner;
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700169 gid_t cgroup_group;
Keshav Santhanam1b6bf672016-08-10 18:35:12 -0700170 int share_host_netns;
Dylan Reidc4335842016-11-11 10:24:52 -0800171 int keep_fds_open;
Dylan Reid93fa4602017-06-06 13:39:31 -0700172 struct container_rlimit rlimits[MAX_RLIMITS];
173 int num_rlimits;
Luis Hector Chavezff5978f2017-06-27 12:52:58 -0700174 int use_capmask;
175 int use_capmask_ambient;
176 uint64_t capmask;
Luis Hector Chavezcd44ba72017-06-30 13:01:38 -0700177 uint64_t securebits_skip_mask;
Luis Hector Chavezdac65c32017-07-21 10:30:23 -0700178 int do_init;
Luis Hector Chavez15e8e672017-07-20 15:13:27 -0700179 char *selinux_context;
Luis Hector Chavezf8e8f4c2017-08-01 01:09:39 -0700180 minijail_hook_t pre_start_hook;
181 void *pre_start_hook_payload;
182 int *inherited_fds;
183 size_t inherited_fd_count;
Dylan Reid837c74a2016-01-22 17:25:21 -0800184};
185
186struct container_config *container_config_create()
187{
188 return calloc(1, sizeof(struct container_config));
189}
190
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700191static void container_free_program_args(struct container_config *c)
192{
193 int i;
194
195 if (!c->program_argv)
196 return;
197 for (i = 0; i < c->num_args; ++i) {
198 FREE_AND_NULL(c->program_argv[i]);
199 }
200 FREE_AND_NULL(c->program_argv);
201}
202
203static void container_config_free_mount(struct container_mount *mount)
204{
205 FREE_AND_NULL(mount->name);
206 FREE_AND_NULL(mount->source);
207 FREE_AND_NULL(mount->destination);
208 FREE_AND_NULL(mount->type);
209 FREE_AND_NULL(mount->data);
210}
211
212static void container_config_free_device(struct container_device *device)
213{
214 FREE_AND_NULL(device->path);
215}
216
Dylan Reid837c74a2016-01-22 17:25:21 -0800217void container_config_destroy(struct container_config *c)
218{
219 size_t i;
220
221 if (c == NULL)
222 return;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700223 FREE_AND_NULL(c->rootfs);
224 container_free_program_args(c);
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700225 FREE_AND_NULL(c->premounted_runfs);
226 FREE_AND_NULL(c->pid_file_path);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700227 FREE_AND_NULL(c->uid_map);
228 FREE_AND_NULL(c->gid_map);
229 FREE_AND_NULL(c->alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -0800230 for (i = 0; i < c->num_mounts; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700231 container_config_free_mount(&c->mounts[i]);
Dylan Reid837c74a2016-01-22 17:25:21 -0800232 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700233 FREE_AND_NULL(c->mounts);
Dylan Reid837c74a2016-01-22 17:25:21 -0800234 for (i = 0; i < c->num_devices; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700235 container_config_free_device(&c->devices[i]);
Dylan Reid837c74a2016-01-22 17:25:21 -0800236 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700237 FREE_AND_NULL(c->devices);
Dylan Reida34f8162017-05-10 11:33:11 -0700238 FREE_AND_NULL(c->cgroup_devices);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700239 FREE_AND_NULL(c->run_setfiles);
Dylan Reid9e724af2016-07-21 09:58:07 -0700240 FREE_AND_NULL(c->cgroup_parent);
Luis Hector Chavez15e8e672017-07-20 15:13:27 -0700241 FREE_AND_NULL(c->selinux_context);
Luis Hector Chavezf8e8f4c2017-08-01 01:09:39 -0700242 FREE_AND_NULL(c->inherited_fds);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700243 FREE_AND_NULL(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800244}
245
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500246int container_config_config_root(struct container_config *c,
247 const char *config_root)
248{
249 return strdup_and_free(&c->config_root, config_root);
250}
251
252const char *container_config_get_config_root(const struct container_config *c)
253{
254 return c->config_root;
255}
256
Dylan Reid837c74a2016-01-22 17:25:21 -0800257int container_config_rootfs(struct container_config *c, const char *rootfs)
258{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700259 return strdup_and_free(&c->rootfs, rootfs);
Dylan Reid837c74a2016-01-22 17:25:21 -0800260}
261
Dylan Reid11456722016-05-02 11:24:50 -0700262const char *container_config_get_rootfs(const struct container_config *c)
263{
264 return c->rootfs;
265}
266
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -0700267void container_config_rootfs_mount_flags(struct container_config *c,
268 unsigned long rootfs_mount_flags)
269{
270 /* Since we are going to add MS_REMOUNT anyways, add it here so we can
271 * simply check against zero later. MS_BIND is also added to avoid
272 * re-mounting the original filesystem, since the rootfs is always
273 * bind-mounted.
274 */
275 c->rootfs_mount_flags = MS_REMOUNT | MS_BIND | rootfs_mount_flags;
276}
277
278unsigned long container_config_get_rootfs_mount_flags(
279 const struct container_config *c)
280{
281 return c->rootfs_mount_flags;
282}
283
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700284int container_config_premounted_runfs(struct container_config *c, const char *runfs)
285{
286 return strdup_and_free(&c->premounted_runfs, runfs);
287}
288
289const char *container_config_get_premounted_runfs(const struct container_config *c)
290{
291 return c->premounted_runfs;
292}
293
294int container_config_pid_file(struct container_config *c, const char *path)
295{
296 return strdup_and_free(&c->pid_file_path, path);
297}
298
299const char *container_config_get_pid_file(const struct container_config *c)
300{
301 return c->pid_file_path;
302}
303
Dylan Reid837c74a2016-01-22 17:25:21 -0800304int container_config_program_argv(struct container_config *c,
Dylan Reid17fd53f2016-11-18 19:14:41 -0800305 const char **argv, size_t num_args)
Dylan Reid837c74a2016-01-22 17:25:21 -0800306{
307 size_t i;
308
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700309 container_free_program_args(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800310 c->num_args = num_args;
311 c->program_argv = calloc(num_args + 1, sizeof(char *));
312 if (!c->program_argv)
313 return -ENOMEM;
314 for (i = 0; i < num_args; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700315 if (strdup_and_free(&c->program_argv[i], argv[i]))
316 goto error_free_return;
Dylan Reid837c74a2016-01-22 17:25:21 -0800317 }
318 c->program_argv[num_args] = NULL;
319 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700320
321error_free_return:
322 container_free_program_args(c);
323 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800324}
325
Dylan Reid11456722016-05-02 11:24:50 -0700326size_t container_config_get_num_program_args(const struct container_config *c)
327{
328 return c->num_args;
329}
330
331const char *container_config_get_program_arg(const struct container_config *c,
332 size_t index)
333{
334 if (index >= c->num_args)
335 return NULL;
336 return c->program_argv[index];
337}
338
Dylan Reid1874feb2016-06-22 17:53:50 -0700339void container_config_uid(struct container_config *c, uid_t uid)
340{
341 c->uid = uid;
342}
343
344uid_t container_config_get_uid(const struct container_config *c)
345{
346 return c->uid;
347}
348
Dylan Reid837c74a2016-01-22 17:25:21 -0800349int container_config_uid_map(struct container_config *c, const char *uid_map)
350{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700351 return strdup_and_free(&c->uid_map, uid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800352}
353
Dylan Reid1874feb2016-06-22 17:53:50 -0700354void container_config_gid(struct container_config *c, gid_t gid)
355{
356 c->gid = gid;
357}
358
359gid_t container_config_get_gid(const struct container_config *c)
360{
361 return c->gid;
362}
363
Dylan Reid837c74a2016-01-22 17:25:21 -0800364int container_config_gid_map(struct container_config *c, const char *gid_map)
365{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700366 return strdup_and_free(&c->gid_map, gid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800367}
368
369int container_config_alt_syscall_table(struct container_config *c,
370 const char *alt_syscall_table)
371{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700372 return strdup_and_free(&c->alt_syscall_table, alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -0800373}
374
Dylan Reid93fa4602017-06-06 13:39:31 -0700375int container_config_add_rlimit(struct container_config *c, int type,
376 uint32_t cur, uint32_t max)
377{
378 if (c->num_rlimits >= MAX_RLIMITS) {
379 return -ENOMEM;
380 }
381 c->rlimits[c->num_rlimits].type = type;
382 c->rlimits[c->num_rlimits].cur = cur;
383 c->rlimits[c->num_rlimits].max = max;
384 c->num_rlimits++;
385 return 0;
386}
387
Dylan Reid837c74a2016-01-22 17:25:21 -0800388int container_config_add_mount(struct container_config *c,
389 const char *name,
390 const char *source,
391 const char *destination,
392 const char *type,
393 const char *data,
Mike Frysinger05e594e2017-01-10 02:11:08 -0500394 const char *verity,
Dylan Reid837c74a2016-01-22 17:25:21 -0800395 int flags,
396 int uid,
397 int gid,
398 int mode,
399 int mount_in_ns,
Mike Frysinger412dbd22017-01-06 01:50:34 -0500400 int create,
401 int loopback)
Dylan Reid837c74a2016-01-22 17:25:21 -0800402{
403 struct container_mount *mount_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700404 struct container_mount *current_mount;
Dylan Reid837c74a2016-01-22 17:25:21 -0800405
406 if (name == NULL || source == NULL ||
407 destination == NULL || type == NULL)
408 return -EINVAL;
409
410 mount_ptr = realloc(c->mounts,
411 sizeof(c->mounts[0]) * (c->num_mounts + 1));
412 if (!mount_ptr)
413 return -ENOMEM;
414 c->mounts = mount_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700415 current_mount = &c->mounts[c->num_mounts];
416 memset(current_mount, 0, sizeof(struct container_mount));
417
418 if (strdup_and_free(&current_mount->name, name))
419 goto error_free_return;
420 if (strdup_and_free(&current_mount->source, source))
421 goto error_free_return;
422 if (strdup_and_free(&current_mount->destination, destination))
423 goto error_free_return;
424 if (strdup_and_free(&current_mount->type, type))
425 goto error_free_return;
426 if (data && strdup_and_free(&current_mount->data, data))
427 goto error_free_return;
Mike Frysinger05e594e2017-01-10 02:11:08 -0500428 if (verity && strdup_and_free(&current_mount->verity, verity))
429 goto error_free_return;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700430 current_mount->flags = flags;
431 current_mount->uid = uid;
432 current_mount->gid = gid;
433 current_mount->mode = mode;
434 current_mount->mount_in_ns = mount_in_ns;
435 current_mount->create = create;
Mike Frysinger412dbd22017-01-06 01:50:34 -0500436 current_mount->loopback = loopback;
Dylan Reid837c74a2016-01-22 17:25:21 -0800437 ++c->num_mounts;
438 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700439
440error_free_return:
441 container_config_free_mount(current_mount);
442 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800443}
444
Dylan Reid4843d6b2017-03-31 18:14:30 -0700445int container_config_add_cgroup_device(struct container_config *c,
446 int allow,
447 char type,
448 int major,
449 int minor,
450 int read,
451 int write,
452 int modify)
453{
454 struct container_cgroup_device *dev_ptr;
455 struct container_cgroup_device *current_dev;
456
457 dev_ptr = realloc(c->cgroup_devices,
458 sizeof(c->cgroup_devices[0]) *
459 (c->num_cgroup_devices + 1));
460 if (!dev_ptr)
461 return -ENOMEM;
462 c->cgroup_devices = dev_ptr;
463
464 current_dev = &c->cgroup_devices[c->num_cgroup_devices];
465 memset(current_dev, 0, sizeof(struct container_cgroup_device));
466 current_dev->allow = allow;
467 current_dev->type = type;
468 current_dev->major = major;
469 current_dev->minor = minor;
470 current_dev->read = read;
471 current_dev->write = write;
472 current_dev->modify = modify;
473 ++c->num_cgroup_devices;
474
475 return 0;
476}
477
Dylan Reid837c74a2016-01-22 17:25:21 -0800478int container_config_add_device(struct container_config *c,
479 char type,
480 const char *path,
481 int fs_permissions,
482 int major,
483 int minor,
Dylan Reid355d5e42016-04-29 16:53:31 -0700484 int copy_minor,
Dylan Reid837c74a2016-01-22 17:25:21 -0800485 int uid,
486 int gid,
487 int read_allowed,
488 int write_allowed,
489 int modify_allowed)
490{
491 struct container_device *dev_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700492 struct container_device *current_dev;
Dylan Reid837c74a2016-01-22 17:25:21 -0800493
494 if (path == NULL)
495 return -EINVAL;
Dylan Reid355d5e42016-04-29 16:53:31 -0700496 /* If using a dynamic minor number, ensure that minor is -1. */
497 if (copy_minor && (minor != -1))
498 return -EINVAL;
499
Dylan Reid837c74a2016-01-22 17:25:21 -0800500 dev_ptr = realloc(c->devices,
501 sizeof(c->devices[0]) * (c->num_devices + 1));
502 if (!dev_ptr)
503 return -ENOMEM;
504 c->devices = dev_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700505 current_dev = &c->devices[c->num_devices];
506 memset(current_dev, 0, sizeof(struct container_device));
507
508 current_dev->type = type;
509 if (strdup_and_free(&current_dev->path, path))
510 goto error_free_return;
511 current_dev->fs_permissions = fs_permissions;
512 current_dev->major = major;
513 current_dev->minor = minor;
514 current_dev->copy_minor = copy_minor;
515 current_dev->uid = uid;
516 current_dev->gid = gid;
Dylan Reid4843d6b2017-03-31 18:14:30 -0700517 if (read_allowed || write_allowed || modify_allowed) {
518 if (container_config_add_cgroup_device(c,
519 1,
520 type,
521 major,
522 minor,
523 read_allowed,
524 write_allowed,
525 modify_allowed))
526 goto error_free_return;
527 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800528 ++c->num_devices;
529 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700530
531error_free_return:
532 container_config_free_device(current_dev);
533 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800534}
535
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700536int container_config_run_setfiles(struct container_config *c,
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700537 const char *setfiles_cmd)
538{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700539 return strdup_and_free(&c->run_setfiles, setfiles_cmd);
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700540}
Dylan Reid837c74a2016-01-22 17:25:21 -0800541
Dylan Reid11456722016-05-02 11:24:50 -0700542const char *container_config_get_run_setfiles(const struct container_config *c)
543{
544 return c->run_setfiles;
545}
546
Chinyue Chenfac909e2016-06-24 14:17:42 +0800547int container_config_set_cpu_shares(struct container_config *c, int shares)
548{
549 /* CPU shares must be 2 or higher. */
550 if (shares < 2)
551 return -EINVAL;
552
553 c->cpu_cgparams.shares = shares;
554 return 0;
555}
556
557int container_config_set_cpu_cfs_params(struct container_config *c,
558 int quota,
559 int period)
560{
561 /*
562 * quota could be set higher than period to utilize more than one CPU.
563 * quota could also be set as -1 to indicate the cgroup does not adhere
564 * to any CPU time restrictions.
565 */
566 if (quota <= 0 && quota != -1)
567 return -EINVAL;
568 if (period <= 0)
569 return -EINVAL;
570
571 c->cpu_cgparams.quota = quota;
572 c->cpu_cgparams.period = period;
573 return 0;
574}
575
576int container_config_set_cpu_rt_params(struct container_config *c,
577 int rt_runtime,
578 int rt_period)
579{
580 /*
581 * rt_runtime could be set as 0 to prevent the cgroup from using
582 * realtime CPU.
583 */
584 if (rt_runtime < 0 || rt_runtime >= rt_period)
585 return -EINVAL;
586
587 c->cpu_cgparams.rt_runtime = rt_runtime;
588 c->cpu_cgparams.rt_period = rt_period;
589 return 0;
590}
591
Chinyue Chen4f3fd682016-07-01 14:11:42 +0800592int container_config_get_cpu_shares(struct container_config *c)
593{
594 return c->cpu_cgparams.shares;
595}
596
597int container_config_get_cpu_quota(struct container_config *c)
598{
599 return c->cpu_cgparams.quota;
600}
601
602int container_config_get_cpu_period(struct container_config *c)
603{
604 return c->cpu_cgparams.period;
605}
606
607int container_config_get_cpu_rt_runtime(struct container_config *c)
608{
609 return c->cpu_cgparams.rt_runtime;
610}
611
612int container_config_get_cpu_rt_period(struct container_config *c)
613{
614 return c->cpu_cgparams.rt_period;
615}
616
Dylan Reid9e724af2016-07-21 09:58:07 -0700617int container_config_set_cgroup_parent(struct container_config *c,
618 const char *parent,
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700619 uid_t cgroup_owner, gid_t cgroup_group)
Dylan Reid9e724af2016-07-21 09:58:07 -0700620{
621 c->cgroup_owner = cgroup_owner;
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700622 c->cgroup_group = cgroup_group;
Dylan Reid9e724af2016-07-21 09:58:07 -0700623 return strdup_and_free(&c->cgroup_parent, parent);
624}
625
626const char *container_config_get_cgroup_parent(struct container_config *c)
627{
628 return c->cgroup_parent;
629}
630
Keshav Santhanam1b6bf672016-08-10 18:35:12 -0700631void container_config_share_host_netns(struct container_config *c)
632{
633 c->share_host_netns = 1;
634}
635
636int get_container_config_share_host_netns(struct container_config *c)
637{
638 return c->share_host_netns;
639}
640
Dylan Reidc4335842016-11-11 10:24:52 -0800641void container_config_keep_fds_open(struct container_config *c)
642{
643 c->keep_fds_open = 1;
644}
645
Luis Hector Chavezff5978f2017-06-27 12:52:58 -0700646void container_config_set_capmask(struct container_config *c,
647 uint64_t capmask,
648 int ambient)
649{
650 c->use_capmask = 1;
651 c->capmask = capmask;
652 c->use_capmask_ambient = ambient;
653}
654
Luis Hector Chavezcd44ba72017-06-30 13:01:38 -0700655void container_config_set_securebits_skip_mask(struct container_config *c,
656 uint64_t securebits_skip_mask)
657{
658 c->securebits_skip_mask = securebits_skip_mask;
659}
660
Luis Hector Chavezdac65c32017-07-21 10:30:23 -0700661void container_config_set_run_as_init(struct container_config *c,
662 int run_as_init)
663{
664 c->do_init = !run_as_init;
665}
666
Luis Hector Chavez15e8e672017-07-20 15:13:27 -0700667int container_config_set_selinux_context(struct container_config *c,
668 const char *context)
669{
670 if (!context)
671 return -EINVAL;
672 c->selinux_context = strdup(context);
673 if (c->selinux_context)
674 return -ENOMEM;
675 return 0;
676}
677
Luis Hector Chavezf8e8f4c2017-08-01 01:09:39 -0700678void container_config_set_pre_execve_hook(struct container_config *c,
679 int (*hook)(void*),
680 void *payload)
681{
682 c->pre_start_hook = hook;
683 c->pre_start_hook_payload = payload;
684}
685
686int container_config_inherit_fds(struct container_config *c,
687 int *inherited_fds,
688 size_t inherited_fd_count)
689{
690 if (c->inherited_fds)
691 return -EINVAL;
692 c->inherited_fds = calloc(inherited_fd_count, sizeof(int));
693 if (!c->inherited_fds)
694 return -ENOMEM;
695 memcpy(c->inherited_fds, inherited_fds,
696 inherited_fd_count * sizeof(int));
697 c->inherited_fd_count = inherited_fd_count;
698 return 0;
699}
700
Dylan Reid837c74a2016-01-22 17:25:21 -0800701/*
702 * Container manipulation
703 */
704struct container {
Dylan Reid837c74a2016-01-22 17:25:21 -0800705 struct container_cgroup *cgroup;
706 struct minijail *jail;
707 pid_t init_pid;
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500708 char *config_root;
Dylan Reid837c74a2016-01-22 17:25:21 -0800709 char *runfs;
710 char *rundir;
711 char *runfsroot;
712 char *pid_file_path;
Dylan Reide040c6b2016-05-02 18:49:02 -0700713 char **ext_mounts; /* Mounts made outside of the minijail */
714 size_t num_ext_mounts;
Mike Frysinger412dbd22017-01-06 01:50:34 -0500715 char **loopdevs;
716 size_t num_loopdevs;
Mike Frysinger05e594e2017-01-10 02:11:08 -0500717 char **device_mappers;
718 size_t num_device_mappers;
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700719 char *name;
Dylan Reid837c74a2016-01-22 17:25:21 -0800720};
721
722struct container *container_new(const char *name,
Dylan Reide040c6b2016-05-02 18:49:02 -0700723 const char *rundir)
Dylan Reid837c74a2016-01-22 17:25:21 -0800724{
725 struct container *c;
726
Dylan Reid837c74a2016-01-22 17:25:21 -0800727 c = calloc(1, sizeof(*c));
Dylan Reidb435c682016-04-12 04:17:49 -0700728 if (!c)
729 return NULL;
Dylan Reid837c74a2016-01-22 17:25:21 -0800730 c->rundir = strdup(rundir);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700731 c->name = strdup(name);
Dylan Reida9966422016-07-21 10:11:34 -0700732 if (!c->rundir || !c->name) {
Dylan Reid684975e2016-05-02 15:44:47 -0700733 container_destroy(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800734 return NULL;
Dylan Reidb435c682016-04-12 04:17:49 -0700735 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800736 return c;
737}
738
739void container_destroy(struct container *c)
740{
Dylan Reid684975e2016-05-02 15:44:47 -0700741 if (c->cgroup)
742 container_cgroup_destroy(c->cgroup);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700743 if (c->jail)
744 minijail_destroy(c->jail);
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500745 FREE_AND_NULL(c->config_root);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700746 FREE_AND_NULL(c->name);
747 FREE_AND_NULL(c->rundir);
748 FREE_AND_NULL(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800749}
750
Stephen Barber1a398c72017-01-23 12:39:44 -0800751/*
752 * Given a uid/gid map of "inside1 outside1 length1, ...", and an id
753 * inside of the user namespace, return the equivalent outside id, or
754 * return < 0 on error.
755 */
756static int get_userns_outside_id(const char *map, int id)
757{
758 char *map_copy, *mapping, *saveptr1, *saveptr2;
759 int inside, outside, length;
760 int result = 0;
761 errno = 0;
762
763 if (asprintf(&map_copy, "%s", map) < 0)
764 return -ENOMEM;
765
766 mapping = strtok_r(map_copy, ",", &saveptr1);
767 while (mapping) {
768 inside = strtol(strtok_r(mapping, " ", &saveptr2), NULL, 10);
769 outside = strtol(strtok_r(NULL, " ", &saveptr2), NULL, 10);
770 length = strtol(strtok_r(NULL, "\0", &saveptr2), NULL, 10);
771 if (errno) {
772 goto error_free_return;
773 } else if (inside < 0 || outside < 0 || length < 0) {
774 errno = EINVAL;
775 goto error_free_return;
776 }
777
778 if (id >= inside && id <= (inside + length)) {
779 result = (id - inside) + outside;
780 goto exit;
781 }
782
783 mapping = strtok_r(NULL, ",", &saveptr1);
784 }
785 errno = EINVAL;
786
787error_free_return:
788 result = -errno;
789exit:
790 free(map_copy);
791 return result;
792}
793
Dylan Reid837c74a2016-01-22 17:25:21 -0800794static int make_dir(const char *path, int uid, int gid, int mode)
795{
796 if (mkdir(path, mode))
797 return -errno;
798 if (chmod(path, mode))
799 return -errno;
800 if (chown(path, uid, gid))
801 return -errno;
802 return 0;
803}
804
805static int touch_file(const char *path, int uid, int gid, int mode)
806{
807 int rc;
808 int fd = open(path, O_RDWR | O_CREAT, mode);
809 if (fd < 0)
810 return -errno;
811 rc = fchown(fd, uid, gid);
812 close(fd);
813
814 if (rc)
815 return -errno;
816 return 0;
817}
818
819/* Make sure the mount target exists in the new rootfs. Create if needed and
820 * possible.
821 */
Stephen Barber1a398c72017-01-23 12:39:44 -0800822static int setup_mount_destination(const struct container_config *config,
823 const struct container_mount *mnt,
Dylan Reid2149be92016-04-28 18:38:57 -0700824 const char *source,
Dylan Reid837c74a2016-01-22 17:25:21 -0800825 const char *dest)
826{
Stephen Barber1a398c72017-01-23 12:39:44 -0800827 int uid_userns, gid_userns;
Dylan Reid837c74a2016-01-22 17:25:21 -0800828 int rc;
829 struct stat st_buf;
830
831 rc = stat(dest, &st_buf);
832 if (rc == 0) /* destination exists */
833 return 0;
834
835 /* Try to create the destination. Either make directory or touch a file
836 * depending on the source type.
837 */
Stephen Barber1a398c72017-01-23 12:39:44 -0800838 uid_userns = get_userns_outside_id(config->uid_map, mnt->uid);
839 if (uid_userns < 0)
840 return uid_userns;
841 gid_userns = get_userns_outside_id(config->gid_map, mnt->gid);
842 if (gid_userns < 0)
843 return gid_userns;
844
Dylan Reid2149be92016-04-28 18:38:57 -0700845 rc = stat(source, &st_buf);
Dylan Reid837c74a2016-01-22 17:25:21 -0800846 if (rc || S_ISDIR(st_buf.st_mode) || S_ISBLK(st_buf.st_mode))
Stephen Barber1a398c72017-01-23 12:39:44 -0800847 return make_dir(dest, uid_userns, gid_userns, mnt->mode);
Dylan Reid837c74a2016-01-22 17:25:21 -0800848
Stephen Barber1a398c72017-01-23 12:39:44 -0800849 return touch_file(dest, uid_userns, gid_userns, mnt->mode);
Dylan Reid837c74a2016-01-22 17:25:21 -0800850}
851
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700852/* Fork and exec the setfiles command to configure the selinux policy. */
Dylan Reide040c6b2016-05-02 18:49:02 -0700853static int run_setfiles_command(const struct container *c,
854 const struct container_config *config,
Yusuke Sato91f11f02016-12-02 16:15:13 -0800855 char *const *destinations, size_t num_destinations)
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700856{
857 int rc;
858 int status;
859 int pid;
860 char *context_path;
861
Dylan Reide040c6b2016-05-02 18:49:02 -0700862 if (!config->run_setfiles)
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700863 return 0;
864
865 if (asprintf(&context_path, "%s/file_contexts",
866 c->runfsroot) < 0)
867 return -errno;
868
869 pid = fork();
870 if (pid == 0) {
Yusuke Sato91f11f02016-12-02 16:15:13 -0800871 size_t i;
872 size_t arg_index = 0;
873 const char *argv[MAX_NUM_SETFILES_ARGS];
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700874 const char *env[] = {
875 NULL,
876 };
877
Yusuke Sato91f11f02016-12-02 16:15:13 -0800878 argv[arg_index++] = config->run_setfiles;
879 argv[arg_index++] = "-r";
880 argv[arg_index++] = c->runfsroot;
881 argv[arg_index++] = context_path;
882 if (arg_index + num_destinations >= MAX_NUM_SETFILES_ARGS)
883 _exit(-E2BIG);
884 for (i = 0; i < num_destinations; ++i) {
885 argv[arg_index++] = destinations[i];
886 }
887 argv[arg_index] = NULL;
888
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700889 execve(argv[0], (char *const*)argv, (char *const*)env);
890
891 /* Command failed to exec if execve returns. */
892 _exit(-errno);
893 }
894 free(context_path);
895 if (pid < 0)
896 return -errno;
897 do {
898 rc = waitpid(pid, &status, 0);
899 } while (rc == -1 && errno == EINTR);
900 if (rc < 0)
901 return -errno;
902 return status;
903}
904
Mike Frysinger412dbd22017-01-06 01:50:34 -0500905/* Find a free loop device and attach it. */
906static int loopdev_setup(char **loopdev_ret, const char *source)
907{
908 int ret = 0;
909 int source_fd = -1;
910 int control_fd = -1;
911 int loop_fd = -1;
912 char *loopdev = NULL;
913
914 source_fd = open(source, O_RDONLY|O_CLOEXEC);
915 if (source_fd < 0)
916 goto error;
917
918 control_fd = open(loopdev_ctl, O_RDWR|O_NOFOLLOW|O_CLOEXEC);
919 if (control_fd < 0)
920 goto error;
921
922 while (1) {
923 int num = ioctl(control_fd, LOOP_CTL_GET_FREE);
924 if (num < 0)
925 goto error;
926
927 if (asprintf(&loopdev, "/dev/loop%i", num) < 0)
928 goto error;
929
930 loop_fd = open(loopdev, O_RDONLY|O_NOFOLLOW|O_CLOEXEC);
931 if (loop_fd < 0)
932 goto error;
933
934 if (ioctl(loop_fd, LOOP_SET_FD, source_fd) == 0)
935 break;
936
937 if (errno != EBUSY)
938 goto error;
939
940 /* Clean up resources for the next pass. */
941 free(loopdev);
942 close(loop_fd);
943 }
944
945 *loopdev_ret = loopdev;
946 goto exit;
947
948error:
949 ret = -errno;
950 free(loopdev);
951exit:
952 if (source_fd != -1)
953 close(source_fd);
954 if (control_fd != -1)
955 close(control_fd);
956 if (loop_fd != -1)
957 close(loop_fd);
958 return ret;
959}
960
961/* Detach the specified loop device. */
962static int loopdev_detach(const char *loopdev)
963{
964 int ret = 0;
965 int fd;
966
967 fd = open(loopdev, O_RDONLY|O_NOFOLLOW|O_CLOEXEC);
968 if (fd < 0)
969 goto error;
970 if (ioctl(fd, LOOP_CLR_FD) < 0)
971 goto error;
972
973 goto exit;
974
975error:
976 ret = -errno;
977exit:
978 if (fd != -1)
979 close(fd);
980 return ret;
981}
982
Mike Frysinger05e594e2017-01-10 02:11:08 -0500983/* Create a new device mapper target for the source. */
984static int dm_setup(char **dm_path_ret, char **dm_name_ret, const char *source,
985 const char *verity_cmdline)
986{
987 int ret = 0;
988#if USE_device_mapper
989 char *p;
990 char *dm_path = NULL;
991 char *dm_name = NULL;
992 char *verity = NULL;
993 struct dm_task *dmt = NULL;
994 uint32_t cookie = 0;
995
996 /* Normalize the name into something unique-esque. */
997 if (asprintf(&dm_name, "cros-containers-%s", source) < 0)
998 goto error;
999 p = dm_name;
1000 while ((p = strchr(p, '/')) != NULL)
1001 *p++ = '_';
1002
1003 /* Get the /dev path for the higher levels to mount. */
1004 if (asprintf(&dm_path, "%s%s", dm_dev_prefix, dm_name) < 0)
1005 goto error;
1006
1007 /* Insert the source path in the verity command line. */
1008 size_t source_len = strlen(source);
1009 verity = malloc(strlen(verity_cmdline) + source_len * 2 + 1);
1010 strcpy(verity, verity_cmdline);
1011 while ((p = strstr(verity, "@DEV@")) != NULL) {
1012 memmove(p + source_len, p + 5, strlen(p + 5) + 1);
1013 memcpy(p, source, source_len);
1014 }
1015
1016 /* Extract the first three parameters for dm-verity settings. */
1017 char ttype[20];
1018 unsigned long long start, size;
1019 int n;
1020 if (sscanf(verity, "%llu %llu %10s %n", &start, &size, ttype, &n) != 3)
1021 goto error;
1022
1023 /* Finally create the device mapper. */
1024 dmt = dm_task_create(DM_DEVICE_CREATE);
1025 if (dmt == NULL)
1026 goto error;
1027
1028 if (!dm_task_set_name(dmt, dm_name))
1029 goto error;
1030
1031 if (!dm_task_set_ro(dmt))
1032 goto error;
1033
1034 if (!dm_task_add_target(dmt, start, size, ttype, verity + n))
1035 goto error;
1036
1037 if (!dm_task_set_cookie(dmt, &cookie, 0))
1038 goto error;
1039
1040 if (!dm_task_run(dmt))
1041 goto error;
1042
1043 /* Make sure the node exists before we continue. */
1044 dm_udev_wait(cookie);
1045
1046 *dm_path_ret = dm_path;
1047 *dm_name_ret = dm_name;
1048 goto exit;
1049
1050error:
1051 ret = -errno;
1052 free(dm_name);
1053 free(dm_path);
1054exit:
1055 free(verity);
1056 if (dmt)
1057 dm_task_destroy(dmt);
1058#endif
1059 return ret;
1060}
1061
1062/* Tear down the device mapper target. */
1063static int dm_detach(const char *dm_name)
1064{
1065 int ret = 0;
1066#if USE_device_mapper
1067 struct dm_task *dmt;
1068
1069 dmt = dm_task_create(DM_DEVICE_REMOVE);
1070 if (dmt == NULL)
1071 goto error;
1072
1073 if (!dm_task_set_name(dmt, dm_name))
1074 goto error;
1075
1076 if (!dm_task_run(dmt))
1077 goto error;
1078
1079 goto exit;
1080
1081error:
1082 ret = -errno;
1083exit:
1084 dm_task_destroy(dmt);
1085#endif
1086 return ret;
1087}
1088
Dylan Reide040c6b2016-05-02 18:49:02 -07001089/*
1090 * Unmounts anything we mounted in this mount namespace in the opposite order
1091 * that they were mounted.
1092 */
1093static int unmount_external_mounts(struct container *c)
1094{
1095 int ret = 0;
1096
1097 while (c->num_ext_mounts) {
1098 c->num_ext_mounts--;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001099 if (!c->ext_mounts[c->num_ext_mounts])
1100 continue;
Dylan Reide040c6b2016-05-02 18:49:02 -07001101 if (umount(c->ext_mounts[c->num_ext_mounts]))
1102 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001103 FREE_AND_NULL(c->ext_mounts[c->num_ext_mounts]);
Dylan Reide040c6b2016-05-02 18:49:02 -07001104 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001105 FREE_AND_NULL(c->ext_mounts);
Mike Frysinger412dbd22017-01-06 01:50:34 -05001106
1107 while (c->num_loopdevs) {
1108 c->num_loopdevs--;
1109 if (loopdev_detach(c->loopdevs[c->num_loopdevs]))
1110 ret = -errno;
1111 FREE_AND_NULL(c->loopdevs[c->num_loopdevs]);
1112 }
1113 FREE_AND_NULL(c->loopdevs);
1114
Mike Frysinger05e594e2017-01-10 02:11:08 -05001115 while (c->num_device_mappers) {
1116 c->num_device_mappers--;
1117 if (dm_detach(c->device_mappers[c->num_device_mappers]))
1118 ret = -errno;
1119 FREE_AND_NULL(c->device_mappers[c->num_device_mappers]);
1120 }
1121 FREE_AND_NULL(c->device_mappers);
1122
Dylan Reide040c6b2016-05-02 18:49:02 -07001123 return ret;
1124}
1125
Junichi Uekawa5d272772016-07-21 16:07:19 +09001126/*
1127 * Match mount_one in minijail, mount one mountpoint with
1128 * consideration for combination of MS_BIND/MS_RDONLY flag.
1129 */
1130static int mount_external(const char *src, const char *dest, const char *type,
1131 unsigned long flags, const void *data)
1132{
1133 int remount_ro = 0;
1134
1135 /*
1136 * R/O bind mounts have to be remounted since 'bind' and 'ro'
1137 * can't both be specified in the original bind mount.
1138 * Remount R/O after the initial mount.
1139 */
1140 if ((flags & MS_BIND) && (flags & MS_RDONLY)) {
1141 remount_ro = 1;
1142 flags &= ~MS_RDONLY;
1143 }
1144
1145 if (mount(src, dest, type, flags, data) == -1)
1146 return -1;
1147
1148 if (remount_ro) {
1149 flags |= MS_RDONLY;
1150 if (mount(src, dest, NULL, flags | MS_REMOUNT, data) == -1)
1151 return -1;
1152 }
1153
1154 return 0;
1155}
1156
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001157static int do_container_mount(struct container *c,
Stephen Barber1a398c72017-01-23 12:39:44 -08001158 const struct container_config *config,
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001159 const struct container_mount *mnt)
1160{
Mike Frysinger05e594e2017-01-10 02:11:08 -05001161 char *dm_source = NULL;
Mike Frysinger412dbd22017-01-06 01:50:34 -05001162 char *loop_source = NULL;
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001163 char *source = NULL;
1164 char *dest = NULL;
1165 int rc = 0;
1166
1167 if (asprintf(&dest, "%s%s", c->runfsroot, mnt->destination) < 0)
1168 return -errno;
1169
1170 /*
1171 * If it's a bind mount relative to rootfs, append source to
1172 * rootfs path, otherwise source path is absolute.
1173 */
1174 if ((mnt->flags & MS_BIND) && mnt->source[0] != '/') {
1175 if (asprintf(&source, "%s/%s", c->runfsroot, mnt->source) < 0)
1176 goto error_free_return;
Mike Frysingerb22acdf2017-01-08 02:02:35 -05001177 } else if (mnt->loopback && mnt->source[0] != '/' && c->config_root) {
1178 if (asprintf(&source, "%s/%s", c->config_root, mnt->source) < 0)
1179 goto error_free_return;
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001180 } else {
1181 if (asprintf(&source, "%s", mnt->source) < 0)
1182 goto error_free_return;
1183 }
1184
Dylan Reidbd5234c2017-06-06 21:20:07 -07001185 // Only create the destinations for external mounts, minijail will take
1186 // care of those mounted in the new namespace.
1187 if (mnt->create && !mnt->mount_in_ns) {
Stephen Barber1a398c72017-01-23 12:39:44 -08001188 rc = setup_mount_destination(config, mnt, source, dest);
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001189 if (rc)
1190 goto error_free_return;
1191 }
Mike Frysinger412dbd22017-01-06 01:50:34 -05001192 if (mnt->loopback) {
1193 /* Record this loopback file for cleanup later. */
1194 loop_source = source;
1195 source = NULL;
1196 rc = loopdev_setup(&source, loop_source);
1197 if (rc)
1198 goto error_free_return;
1199
Mike Frysinger05e594e2017-01-10 02:11:08 -05001200 /* Save this to cleanup when shutting down. */
Mike Frysinger412dbd22017-01-06 01:50:34 -05001201 rc = strdup_and_free(&c->loopdevs[c->num_loopdevs], source);
1202 if (rc)
1203 goto error_free_return;
1204 c->num_loopdevs++;
1205 }
Mike Frysinger05e594e2017-01-10 02:11:08 -05001206 if (mnt->verity) {
1207 /* Set this device up via dm-verity. */
1208 char *dm_name;
1209 dm_source = source;
1210 source = NULL;
1211 rc = dm_setup(&source, &dm_name, dm_source, mnt->verity);
1212 if (rc)
1213 goto error_free_return;
1214
1215 /* Save this to cleanup when shutting down. */
1216 rc = strdup_and_free(&c->device_mappers[c->num_device_mappers],
1217 dm_name);
1218 free(dm_name);
1219 if (rc)
1220 goto error_free_return;
1221 c->num_device_mappers++;
1222 }
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001223 if (mnt->mount_in_ns) {
1224 /* We can mount this with minijail. */
Dylan Reid36b9c012016-06-24 18:27:08 -07001225 rc = minijail_mount_with_data(c->jail, source, mnt->destination,
1226 mnt->type, mnt->flags, mnt->data);
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001227 if (rc)
1228 goto error_free_return;
1229 } else {
1230 /* Mount this externally and unmount it on exit. */
Junichi Uekawa5d272772016-07-21 16:07:19 +09001231 if (mount_external(source, dest, mnt->type, mnt->flags,
1232 mnt->data))
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001233 goto error_free_return;
1234 /* Save this to unmount when shutting down. */
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001235 rc = strdup_and_free(&c->ext_mounts[c->num_ext_mounts], dest);
1236 if (rc)
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001237 goto error_free_return;
1238 c->num_ext_mounts++;
1239 }
1240
1241 goto exit;
1242
1243error_free_return:
1244 if (!rc)
1245 rc = -errno;
1246exit:
Mike Frysinger05e594e2017-01-10 02:11:08 -05001247 free(dm_source);
Mike Frysinger412dbd22017-01-06 01:50:34 -05001248 free(loop_source);
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001249 free(source);
1250 free(dest);
1251 return rc;
1252}
1253
Dylan Reide040c6b2016-05-02 18:49:02 -07001254static int do_container_mounts(struct container *c,
1255 const struct container_config *config)
Dylan Reid7daf9982016-04-28 16:55:42 -07001256{
1257 unsigned int i;
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -07001258 int rc = 0;
Dylan Reid7daf9982016-04-28 16:55:42 -07001259
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001260 unmount_external_mounts(c);
Dylan Reide040c6b2016-05-02 18:49:02 -07001261 /*
1262 * Allocate space to track anything we mount in our mount namespace.
1263 * This over-allocates as it has space for all mounts.
1264 */
1265 c->ext_mounts = calloc(config->num_mounts, sizeof(*c->ext_mounts));
1266 if (!c->ext_mounts)
1267 return -errno;
Mike Frysinger412dbd22017-01-06 01:50:34 -05001268 c->loopdevs = calloc(config->num_mounts, sizeof(*c->loopdevs));
1269 if (!c->loopdevs)
1270 return -errno;
Mike Frysinger05e594e2017-01-10 02:11:08 -05001271 c->device_mappers = calloc(config->num_mounts, sizeof(*c->device_mappers));
1272 if (!c->device_mappers)
1273 return -errno;
Dylan Reide040c6b2016-05-02 18:49:02 -07001274
1275 for (i = 0; i < config->num_mounts; ++i) {
Stephen Barber1a398c72017-01-23 12:39:44 -08001276 rc = do_container_mount(c, config, &config->mounts[i]);
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001277 if (rc)
1278 goto error_free_return;
Dylan Reid7daf9982016-04-28 16:55:42 -07001279 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001280
Dylan Reid7daf9982016-04-28 16:55:42 -07001281 return 0;
Dylan Reid2149be92016-04-28 18:38:57 -07001282
1283error_free_return:
Dylan Reide040c6b2016-05-02 18:49:02 -07001284 unmount_external_mounts(c);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -07001285 return rc;
Dylan Reid7daf9982016-04-28 16:55:42 -07001286}
1287
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001288static int container_create_device(const struct container *c,
Stephen Barber1a398c72017-01-23 12:39:44 -08001289 const struct container_config *config,
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001290 const struct container_device *dev,
1291 int minor)
1292{
1293 char *path = NULL;
1294 int rc = 0;
1295 int mode;
Stephen Barber1a398c72017-01-23 12:39:44 -08001296 int uid_userns, gid_userns;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001297
1298 switch (dev->type) {
1299 case 'b':
1300 mode = S_IFBLK;
1301 break;
1302 case 'c':
1303 mode = S_IFCHR;
1304 break;
1305 default:
1306 return -EINVAL;
1307 }
1308 mode |= dev->fs_permissions;
1309
Stephen Barber1a398c72017-01-23 12:39:44 -08001310 uid_userns = get_userns_outside_id(config->uid_map, dev->uid);
1311 if (uid_userns < 0)
1312 return uid_userns;
1313 gid_userns = get_userns_outside_id(config->gid_map, dev->gid);
1314 if (gid_userns < 0)
1315 return gid_userns;
1316
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001317 if (asprintf(&path, "%s%s", c->runfsroot, dev->path) < 0)
1318 goto error_free_return;
1319 if (mknod(path, mode, makedev(dev->major, minor)) && errno != EEXIST)
1320 goto error_free_return;
Stephen Barber1a398c72017-01-23 12:39:44 -08001321 if (chown(path, uid_userns, gid_userns))
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001322 goto error_free_return;
1323 if (chmod(path, dev->fs_permissions))
1324 goto error_free_return;
1325
1326 goto exit;
1327
1328error_free_return:
1329 rc = -errno;
1330exit:
1331 free(path);
1332 return rc;
1333}
1334
Stephen Barber1a398c72017-01-23 12:39:44 -08001335
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001336static int mount_runfs(struct container *c, const struct container_config *config)
Dylan Reid837c74a2016-01-22 17:25:21 -08001337{
Dylan Reidb3621832016-03-24 10:24:57 -07001338 static const mode_t root_dir_mode = 0660;
Dylan Reide040c6b2016-05-02 18:49:02 -07001339 const char *rootfs = config->rootfs;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001340 char *runfs_template = NULL;
Stephen Barber1a398c72017-01-23 12:39:44 -08001341 int uid_userns, gid_userns;
Dylan Reid837c74a2016-01-22 17:25:21 -08001342
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001343 if (asprintf(&runfs_template, "%s/%s_XXXXXX", c->rundir, c->name) < 0)
1344 return -ENOMEM;
1345
1346 c->runfs = mkdtemp(runfs_template);
1347 if (!c->runfs) {
1348 free(runfs_template);
1349 return -errno;
1350 }
1351
Stephen Barber1a398c72017-01-23 12:39:44 -08001352 uid_userns = get_userns_outside_id(config->uid_map, config->uid);
1353 if (uid_userns < 0)
1354 return uid_userns;
1355 gid_userns = get_userns_outside_id(config->gid_map, config->gid);
1356 if (gid_userns < 0)
1357 return gid_userns;
1358
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001359 /* Make sure the container uid can access the rootfs. */
1360 if (chmod(c->runfs, 0700))
1361 return -errno;
Stephen Barber1a398c72017-01-23 12:39:44 -08001362 if (chown(c->runfs, uid_userns, gid_userns))
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001363 return -errno;
1364
1365 if (asprintf(&c->runfsroot, "%s/root", c->runfs) < 0)
1366 return -errno;
1367
1368 if (mkdir(c->runfsroot, root_dir_mode))
1369 return -errno;
1370 if (chmod(c->runfsroot, root_dir_mode))
1371 return -errno;
1372
Lev Rumyantsevcc625e62017-07-21 11:25:12 -07001373 if (mount(rootfs, c->runfsroot, "",
1374 MS_BIND | (config->rootfs_mount_flags & MS_REC), NULL)) {
1375 return -errno;
1376 }
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001377
Lev Rumyantsevcc625e62017-07-21 11:25:12 -07001378 /* MS_BIND ignores any flags passed to it (except MS_REC). We need a
1379 * second call to mount() to actually set them.
1380 */
1381 if (config->rootfs_mount_flags &&
1382 mount(rootfs, c->runfsroot, "",
1383 (config->rootfs_mount_flags & ~MS_REC), NULL)) {
1384 return -errno;
1385 }
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -07001386
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001387 return 0;
1388}
1389
Dylan Reidacedff92017-03-31 17:41:40 -07001390static int device_setup(struct container *c,
1391 const struct container_config *config)
1392{
Dylan Reid43d4e5c2017-04-05 09:40:11 -07001393 int rc;
1394 size_t i;
Dylan Reidacedff92017-03-31 17:41:40 -07001395
1396 c->cgroup->ops->deny_all_devices(c->cgroup);
1397
Dylan Reid4843d6b2017-03-31 18:14:30 -07001398 for (i = 0; i < config->num_cgroup_devices; i++) {
1399 const struct container_cgroup_device *dev =
1400 &config->cgroup_devices[i];
1401 rc = c->cgroup->ops->add_device(c->cgroup,
1402 dev->allow,
1403 dev->major,
1404 dev->minor,
1405 dev->read,
1406 dev->write,
1407 dev->modify,
1408 dev->type);
1409 if (rc)
1410 return rc;
1411 }
1412
Dylan Reidacedff92017-03-31 17:41:40 -07001413 for (i = 0; i < config->num_devices; i++) {
1414 const struct container_device *dev = &config->devices[i];
1415 int minor = dev->minor;
1416
1417 if (dev->copy_minor) {
1418 struct stat st_buff;
1419 if (stat(dev->path, &st_buff) < 0)
1420 continue;
1421 minor = minor(st_buff.st_rdev);
1422 }
1423 if (minor >= 0) {
1424 rc = container_create_device(c, config, dev, minor);
1425 if (rc)
1426 return rc;
1427 }
Dylan Reidacedff92017-03-31 17:41:40 -07001428 }
1429
1430 for (i = 0; i < c->num_loopdevs; ++i) {
1431 struct stat st;
1432
Dylan Reid43d4e5c2017-04-05 09:40:11 -07001433 rc = stat(c->loopdevs[i], &st);
1434 if (rc < 0)
1435 return -errno;
Dylan Reid4843d6b2017-03-31 18:14:30 -07001436 rc = c->cgroup->ops->add_device(c->cgroup, 1, major(st.st_rdev),
Dylan Reidacedff92017-03-31 17:41:40 -07001437 minor(st.st_rdev),
1438 1, 0, 0, 'b');
1439 if (rc)
1440 return rc;
1441 }
1442
1443 return 0;
1444}
1445
Luis Hector Chavez15e8e672017-07-20 15:13:27 -07001446static int setexeccon(void *payload)
1447{
1448 char *init_domain = (char *) payload;
1449 char exec_path[PATH_MAX];
1450 pid_t tid = syscall(SYS_gettid);
1451 int fd;
1452
1453 if (tid == -1) {
1454 return -errno;
1455 }
1456
1457 if (snprintf(exec_path, sizeof(exec_path),
1458 "/proc/self/task/%d/attr/exec", tid) < 0) {
1459 return -errno;
1460 }
1461
1462 fd = open(exec_path, O_WRONLY|O_CLOEXEC);
1463 if (fd == -1) {
1464 return -errno;
1465 }
1466
1467 if (write(fd, init_domain, strlen(init_domain)) !=
1468 (ssize_t) strlen(init_domain)) {
1469 return -errno;
1470 }
1471
1472 close(fd);
1473 return 0;
1474}
1475
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001476int container_start(struct container *c, const struct container_config *config)
1477{
1478 int rc = 0;
1479 unsigned int i;
Stephen Barber1a398c72017-01-23 12:39:44 -08001480 int cgroup_uid, cgroup_gid;
Yusuke Sato91f11f02016-12-02 16:15:13 -08001481 char **destinations;
1482 size_t num_destinations;
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001483
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001484 if (!c)
1485 return -EINVAL;
Dylan Reide040c6b2016-05-02 18:49:02 -07001486 if (!config)
1487 return -EINVAL;
1488 if (!config->program_argv || !config->program_argv[0])
1489 return -EINVAL;
1490
Mike Frysingerb22acdf2017-01-08 02:02:35 -05001491 if (config->config_root) {
1492 c->config_root = strdup(config->config_root);
1493 if (!c->config_root) {
1494 rc = -ENOMEM;
1495 goto error_rmdir;
1496 }
1497 }
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001498 if (config->premounted_runfs) {
1499 c->runfs = NULL;
1500 c->runfsroot = strdup(config->premounted_runfs);
1501 if (!c->runfsroot) {
1502 rc = -ENOMEM;
1503 goto error_rmdir;
1504 }
1505 } else {
1506 rc = mount_runfs(c, config);
1507 if (rc)
1508 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001509 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001510
1511 c->jail = minijail_new();
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001512 if (!c->jail)
Luis Hector Chavez945af482016-06-03 08:39:34 -07001513 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001514
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -07001515 rc = do_container_mounts(c, config);
1516 if (rc)
Dylan Reid7daf9982016-04-28 16:55:42 -07001517 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001518
Stephen Barber1a398c72017-01-23 12:39:44 -08001519 cgroup_uid = get_userns_outside_id(config->uid_map,
1520 config->cgroup_owner);
1521 if (cgroup_uid < 0) {
1522 rc = cgroup_uid;
1523 goto error_rmdir;
1524 }
1525 cgroup_gid = get_userns_outside_id(config->gid_map,
1526 config->cgroup_group);
1527 if (cgroup_gid < 0) {
1528 rc = cgroup_gid;
1529 goto error_rmdir;
1530 }
1531
Dylan Reida9966422016-07-21 10:11:34 -07001532 c->cgroup = container_cgroup_new(c->name,
1533 "/sys/fs/cgroup",
1534 config->cgroup_parent,
Stephen Barber1a398c72017-01-23 12:39:44 -08001535 cgroup_uid,
1536 cgroup_gid);
Dylan Reida9966422016-07-21 10:11:34 -07001537 if (!c->cgroup)
1538 goto error_rmdir;
1539
Keshav Santhanam268fa032016-07-14 09:59:24 -07001540 /* Must be root to modify device cgroup or mknod */
1541 if (getuid() == 0) {
Dylan Reidacedff92017-03-31 17:41:40 -07001542 if (device_setup(c, config))
1543 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001544 }
1545
Dylan Reidd7229582016-04-27 17:08:40 -07001546 /* Potentailly run setfiles on mounts configured outside of the jail */
Yusuke Sato91f11f02016-12-02 16:15:13 -08001547 destinations = calloc(config->num_mounts, sizeof(char *));
1548 num_destinations = 0;
Dylan Reide040c6b2016-05-02 18:49:02 -07001549 for (i = 0; i < config->num_mounts; i++) {
1550 const struct container_mount *mnt = &config->mounts[i];
Yusuke Sato91f11f02016-12-02 16:15:13 -08001551 char* dest = mnt->destination;
Dylan Reidd7229582016-04-27 17:08:40 -07001552
1553 if (mnt->mount_in_ns)
1554 continue;
Junichi Uekawa5d272772016-07-21 16:07:19 +09001555 if (mnt->flags & MS_RDONLY)
1556 continue;
Yusuke Sato91f11f02016-12-02 16:15:13 -08001557
Yusuke Satod33db432016-12-05 16:24:37 -08001558 /* A hack to avoid setfiles on /data and /cache. */
1559 if (!strcmp(dest, "/data") || !strcmp(dest, "/cache"))
Yusuke Sato91f11f02016-12-02 16:15:13 -08001560 continue;
1561
1562 if (asprintf(&dest, "%s%s", c->runfsroot, mnt->destination) < 0) {
1563 size_t j;
1564 for (j = 0; j < num_destinations; ++j) {
1565 free(destinations[j]);
1566 }
1567 free(destinations);
Dylan Reidd7229582016-04-27 17:08:40 -07001568 goto error_rmdir;
Yusuke Sato91f11f02016-12-02 16:15:13 -08001569 }
1570
1571 destinations[num_destinations++] = dest;
Dylan Reidd7229582016-04-27 17:08:40 -07001572 }
Yusuke Sato91f11f02016-12-02 16:15:13 -08001573 if (num_destinations) {
1574 size_t i;
1575 rc = run_setfiles_command(c, config, destinations, num_destinations);
1576 for (i = 0; i < num_destinations; ++i) {
1577 free(destinations[i]);
1578 }
1579 }
1580 free(destinations);
1581 if (rc)
1582 goto error_rmdir;
Dylan Reidd7229582016-04-27 17:08:40 -07001583
Chinyue Chenfac909e2016-06-24 14:17:42 +08001584 /* Setup CPU cgroup params. */
1585 if (config->cpu_cgparams.shares) {
1586 rc = c->cgroup->ops->set_cpu_shares(
1587 c->cgroup, config->cpu_cgparams.shares);
1588 if (rc)
1589 goto error_rmdir;
1590 }
1591 if (config->cpu_cgparams.period) {
1592 rc = c->cgroup->ops->set_cpu_quota(
1593 c->cgroup, config->cpu_cgparams.quota);
1594 if (rc)
1595 goto error_rmdir;
1596 rc = c->cgroup->ops->set_cpu_period(
1597 c->cgroup, config->cpu_cgparams.period);
1598 if (rc)
1599 goto error_rmdir;
1600 }
1601 if (config->cpu_cgparams.rt_period) {
1602 rc = c->cgroup->ops->set_cpu_rt_runtime(
1603 c->cgroup, config->cpu_cgparams.rt_runtime);
1604 if (rc)
1605 goto error_rmdir;
1606 rc = c->cgroup->ops->set_cpu_rt_period(
1607 c->cgroup, config->cpu_cgparams.rt_period);
1608 if (rc)
1609 goto error_rmdir;
1610 }
1611
Dylan Reid837c74a2016-01-22 17:25:21 -08001612 /* Setup and start the container with libminijail. */
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001613 if (config->pid_file_path) {
1614 c->pid_file_path = strdup(config->pid_file_path);
1615 if (!c->pid_file_path) {
1616 rc = -ENOMEM;
1617 goto error_rmdir;
1618 }
1619 } else if (c->runfs) {
1620 if (asprintf(&c->pid_file_path, "%s/container.pid", c->runfs) < 0) {
1621 rc = -ENOMEM;
1622 goto error_rmdir;
1623 }
1624 }
1625
1626 if (c->pid_file_path)
1627 minijail_write_pid_file(c->jail, c->pid_file_path);
Dylan Reid837c74a2016-01-22 17:25:21 -08001628 minijail_reset_signal_mask(c->jail);
1629
1630 /* Setup container namespaces. */
1631 minijail_namespace_ipc(c->jail);
1632 minijail_namespace_vfs(c->jail);
Keshav Santhanam1b6bf672016-08-10 18:35:12 -07001633 if (!config->share_host_netns)
1634 minijail_namespace_net(c->jail);
Dylan Reid837c74a2016-01-22 17:25:21 -08001635 minijail_namespace_pids(c->jail);
Dylan Reid837c74a2016-01-22 17:25:21 -08001636 minijail_namespace_user(c->jail);
Mike Frysingerfbd60552017-01-03 17:28:48 -05001637 if (getuid() != 0)
1638 minijail_namespace_user_disable_setgroups(c->jail);
Dylan Reidc6ca1042016-07-11 15:03:27 -07001639 minijail_namespace_cgroups(c->jail);
Dylan Reide040c6b2016-05-02 18:49:02 -07001640 rc = minijail_uidmap(c->jail, config->uid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -08001641 if (rc)
1642 goto error_rmdir;
Dylan Reide040c6b2016-05-02 18:49:02 -07001643 rc = minijail_gidmap(c->jail, config->gid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -08001644 if (rc)
1645 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001646
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001647 /* Set the UID/GID inside the container if not 0. */
Stephen Barber1a398c72017-01-23 12:39:44 -08001648 if (get_userns_outside_id(config->uid_map, config->uid) < 0)
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001649 goto error_rmdir;
Stephen Barber1a398c72017-01-23 12:39:44 -08001650 else if (config->uid > 0)
1651 minijail_change_uid(c->jail, config->uid);
1652 if (get_userns_outside_id(config->gid_map, config->gid) < 0)
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001653 goto error_rmdir;
Stephen Barber1a398c72017-01-23 12:39:44 -08001654 else if (config->gid > 0)
1655 minijail_change_gid(c->jail, config->gid);
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001656
Dylan Reid837c74a2016-01-22 17:25:21 -08001657 rc = minijail_enter_pivot_root(c->jail, c->runfsroot);
1658 if (rc)
1659 goto error_rmdir;
1660
1661 /* Add the cgroups configured above. */
Dmitry Torokhov0d253a62017-01-05 09:41:33 -08001662 for (i = 0; i < NUM_CGROUP_TYPES; i++) {
1663 if (c->cgroup->cgroup_tasks_paths[i]) {
1664 rc = minijail_add_to_cgroup(c->jail,
1665 c->cgroup->cgroup_tasks_paths[i]);
1666 if (rc)
1667 goto error_rmdir;
1668 }
1669 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001670
Dylan Reide040c6b2016-05-02 18:49:02 -07001671 if (config->alt_syscall_table)
1672 minijail_use_alt_syscall(c->jail, config->alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -08001673
Dylan Reid93fa4602017-06-06 13:39:31 -07001674 for (i = 0; i < config->num_rlimits; i++) {
1675 const struct container_rlimit *lim = &config->rlimits[i];
1676 rc = minijail_rlimit(c->jail, lim->type, lim->cur,
1677 lim->max);
1678 if (rc)
1679 goto error_rmdir;
1680 }
1681
Luis Hector Chavez15e8e672017-07-20 15:13:27 -07001682 if (config->selinux_context) {
1683 rc = minijail_add_hook(c->jail, &setexeccon,
1684 config->selinux_context,
1685 MINIJAIL_HOOK_EVENT_PRE_EXECVE);
1686 if (rc)
1687 goto error_rmdir;
1688 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001689
Luis Hector Chavezf8e8f4c2017-08-01 01:09:39 -07001690 if (config->pre_start_hook) {
1691 rc = minijail_add_hook(c->jail, config->pre_start_hook,
1692 config->pre_start_hook_payload,
1693 MINIJAIL_HOOK_EVENT_PRE_EXECVE);
1694 if (rc)
1695 goto error_rmdir;
1696 }
1697
1698 for (i = 0; i < config->inherited_fd_count; i++) {
1699 rc = minijail_preserve_fd(c->jail, config->inherited_fds[i],
1700 config->inherited_fds[i]);
1701 if (rc)
1702 goto error_rmdir;
1703 }
1704
Dylan Reid3da683b2016-04-05 03:35:35 -07001705 /* TODO(dgreid) - remove this once shared mounts are cleaned up. */
1706 minijail_skip_remount_private(c->jail);
1707
Dylan Reidc4335842016-11-11 10:24:52 -08001708 if (!config->keep_fds_open)
1709 minijail_close_open_fds(c->jail);
Luis Hector Chaveze18e7d42016-10-12 07:35:32 -07001710
Luis Hector Chavezff5978f2017-06-27 12:52:58 -07001711 if (config->use_capmask) {
1712 minijail_use_caps(c->jail, config->capmask);
1713 if (config->use_capmask_ambient) {
1714 minijail_set_ambient_caps(c->jail);
1715 }
Luis Hector Chavezcd44ba72017-06-30 13:01:38 -07001716 if (config->securebits_skip_mask) {
1717 minijail_skip_setting_securebits(c->jail,
1718 config->securebits_skip_mask);
1719 }
Luis Hector Chavezff5978f2017-06-27 12:52:58 -07001720 }
1721
Luis Hector Chavezdac65c32017-07-21 10:30:23 -07001722 if (!config->do_init)
1723 minijail_run_as_init(c->jail);
1724
Dylan Reid837c74a2016-01-22 17:25:21 -08001725 rc = minijail_run_pid_pipes_no_preload(c->jail,
Dylan Reide040c6b2016-05-02 18:49:02 -07001726 config->program_argv[0],
1727 config->program_argv,
Dylan Reid837c74a2016-01-22 17:25:21 -08001728 &c->init_pid, NULL, NULL,
1729 NULL);
1730 if (rc)
1731 goto error_rmdir;
1732 return 0;
1733
1734error_rmdir:
Luis Hector Chavez945af482016-06-03 08:39:34 -07001735 if (!rc)
1736 rc = -errno;
1737 container_teardown(c);
Dylan Reid837c74a2016-01-22 17:25:21 -08001738 return rc;
1739}
1740
1741const char *container_root(struct container *c)
1742{
1743 return c->runfs;
1744}
1745
1746int container_pid(struct container *c)
1747{
1748 return c->init_pid;
1749}
1750
1751static int container_teardown(struct container *c)
1752{
Dylan Reid837c74a2016-01-22 17:25:21 -08001753 int ret = 0;
1754
Dylan Reide040c6b2016-05-02 18:49:02 -07001755 unmount_external_mounts(c);
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001756 if (c->runfsroot && c->runfs) {
Luis Hector Chavez945af482016-06-03 08:39:34 -07001757 if (umount(c->runfsroot))
1758 ret = -errno;
1759 if (rmdir(c->runfsroot))
1760 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001761 FREE_AND_NULL(c->runfsroot);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001762 }
1763 if (c->pid_file_path) {
1764 if (unlink(c->pid_file_path))
1765 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001766 FREE_AND_NULL(c->pid_file_path);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001767 }
1768 if (c->runfs) {
1769 if (rmdir(c->runfs))
1770 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001771 FREE_AND_NULL(c->runfs);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001772 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001773 return ret;
1774}
1775
1776int container_wait(struct container *c)
1777{
Dylan Reidcf745c52016-04-22 10:18:03 -07001778 int rc;
1779
1780 do {
1781 rc = minijail_wait(c->jail);
Luis Hector Chavez4641e852016-06-02 15:40:19 -07001782 } while (rc == -EINTR);
Dylan Reidcf745c52016-04-22 10:18:03 -07001783
Luis Hector Chavez945af482016-06-03 08:39:34 -07001784 // If the process had already been reaped, still perform teardown.
1785 if (rc == -ECHILD || rc >= 0) {
Dylan Reidcf745c52016-04-22 10:18:03 -07001786 rc = container_teardown(c);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001787 }
Dylan Reidcf745c52016-04-22 10:18:03 -07001788 return rc;
Dylan Reid837c74a2016-01-22 17:25:21 -08001789}
1790
1791int container_kill(struct container *c)
1792{
Luis Hector Chavez945af482016-06-03 08:39:34 -07001793 if (kill(c->init_pid, SIGKILL) && errno != ESRCH)
Dylan Reid837c74a2016-01-22 17:25:21 -08001794 return -errno;
1795 return container_wait(c);
1796}