blob: 5e3ed356f0e5675759ea2b4f335fa2f7fb12566a [file] [log] [blame]
Dylan Reid837c74a2016-01-22 17:25:21 -08001/* Copyright 2016 The Chromium OS Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file.
4 */
5
6#define _GNU_SOURCE /* For asprintf */
7
8#include <errno.h>
9#include <fcntl.h>
Mike Frysinger05e594e2017-01-10 02:11:08 -050010#if USE_device_mapper
11#include <libdevmapper.h>
12#endif
Dylan Reid837c74a2016-01-22 17:25:21 -080013#include <malloc.h>
14#include <signal.h>
Luis Hector Chavezff5978f2017-06-27 12:52:58 -070015#include <stdint.h>
Dylan Reid837c74a2016-01-22 17:25:21 -080016#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19#include <sys/mount.h>
20#include <sys/stat.h>
21#include <sys/types.h>
Dylan Reid2bd9ea92016-04-07 20:57:47 -070022#include <sys/wait.h>
Dylan Reid837c74a2016-01-22 17:25:21 -080023#include <unistd.h>
24
Mike Frysinger412dbd22017-01-06 01:50:34 -050025#include <linux/loop.h>
26
Dylan Reid837c74a2016-01-22 17:25:21 -080027#include "container_cgroup.h"
28#include "libcontainer.h"
29#include "libminijail.h"
30
Luis Hector Chavez479b95f2016-06-06 08:01:05 -070031#define FREE_AND_NULL(ptr) \
32do { \
33 free(ptr); \
34 ptr = NULL; \
35} while(0)
36
Yusuke Sato91f11f02016-12-02 16:15:13 -080037#define MAX_NUM_SETFILES_ARGS 128
Dylan Reid93fa4602017-06-06 13:39:31 -070038#define MAX_RLIMITS 32 // Linux defines 15 at the time of writing.
Yusuke Sato91f11f02016-12-02 16:15:13 -080039
Mike Frysinger412dbd22017-01-06 01:50:34 -050040static const char loopdev_ctl[] = "/dev/loop-control";
Mike Frysinger05e594e2017-01-10 02:11:08 -050041#if USE_device_mapper
42static const char dm_dev_prefix[] = "/dev/mapper/";
43#endif
Mike Frysinger412dbd22017-01-06 01:50:34 -050044
Luis Hector Chavez945af482016-06-03 08:39:34 -070045static int container_teardown(struct container *c);
46
Luis Hector Chavez479b95f2016-06-06 08:01:05 -070047static int strdup_and_free(char **dest, const char *src)
48{
49 char *copy = strdup(src);
50 if (!copy)
51 return -ENOMEM;
52 if (*dest)
53 free(*dest);
54 *dest = copy;
55 return 0;
56}
57
Dylan Reid837c74a2016-01-22 17:25:21 -080058struct container_mount {
59 char *name;
60 char *source;
61 char *destination;
62 char *type;
63 char *data;
Mike Frysinger05e594e2017-01-10 02:11:08 -050064 char *verity;
Dylan Reid837c74a2016-01-22 17:25:21 -080065 int flags;
66 int uid;
67 int gid;
68 int mode;
69 int mount_in_ns; /* True if mount should happen in new vfs ns */
70 int create; /* True if target should be created if it doesn't exist */
Mike Frysinger412dbd22017-01-06 01:50:34 -050071 int loopback; /* True if target should be mounted via loopback */
Dylan Reid837c74a2016-01-22 17:25:21 -080072};
73
74struct container_device {
75 char type; /* 'c' or 'b' for char or block */
76 char *path;
77 int fs_permissions;
78 int major;
79 int minor;
Dylan Reid355d5e42016-04-29 16:53:31 -070080 int copy_minor; /* Copy the minor from existing node, ignores |minor| */
Dylan Reid837c74a2016-01-22 17:25:21 -080081 int uid;
82 int gid;
Dylan Reid4843d6b2017-03-31 18:14:30 -070083};
84
85struct container_cgroup_device {
86 int allow;
87 char type;
88 int major; /* -1 means all */
89 int minor; /* -1 means all */
90 int read;
91 int write;
92 int modify;
Dylan Reid837c74a2016-01-22 17:25:21 -080093};
94
Chinyue Chenfac909e2016-06-24 14:17:42 +080095struct container_cpu_cgroup {
96 int shares;
97 int quota;
98 int period;
99 int rt_runtime;
100 int rt_period;
101};
102
Dylan Reid93fa4602017-06-06 13:39:31 -0700103struct container_rlimit {
104 int type;
105 uint32_t cur;
106 uint32_t max;
107};
108
Dylan Reid837c74a2016-01-22 17:25:21 -0800109/*
110 * Structure that configures how the container is run.
111 *
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500112 * config_root - Path to the root of the container itself.
Dylan Reid837c74a2016-01-22 17:25:21 -0800113 * rootfs - Path to the root of the container's filesystem.
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -0700114 * rootfs_mount_flags - Flags that will be passed to mount() for the rootfs.
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700115 * premounted_runfs - Path to where the container will be run.
116 * pid_file_path - Path to the file where the pid should be written.
Dylan Reid837c74a2016-01-22 17:25:21 -0800117 * program_argv - The program to run and args, e.g. "/sbin/init".
118 * num_args - Number of args in program_argv.
Dylan Reid1874feb2016-06-22 17:53:50 -0700119 * uid - The uid the container will run as.
Dylan Reid837c74a2016-01-22 17:25:21 -0800120 * uid_map - Mapping of UIDs in the container, e.g. "0 100000 1024"
Dylan Reid1874feb2016-06-22 17:53:50 -0700121 * gid - The gid the container will run as.
Dylan Reid837c74a2016-01-22 17:25:21 -0800122 * gid_map - Mapping of GIDs in the container, e.g. "0 100000 1024"
123 * alt_syscall_table - Syscall table to use or NULL if none.
124 * mounts - Filesystems to mount in the new namespace.
125 * num_mounts - Number of above.
126 * devices - Device nodes to create.
127 * num_devices - Number of above.
Dylan Reid4843d6b2017-03-31 18:14:30 -0700128 * cgroup_devices - Device node cgroup permissions.
129 * num_cgroup_devices - Number of above.
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700130 * run_setfiles - Should run setfiles on mounts to enable selinux.
Chinyue Chenfac909e2016-06-24 14:17:42 +0800131 * cpu_cgparams - CPU cgroup params.
Dylan Reid9e724af2016-07-21 09:58:07 -0700132 * cgroup_parent - Parent dir for cgroup creation
133 * cgroup_owner - uid to own the created cgroups
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700134 * cgroup_group - gid to own the created cgroups
Keshav Santhanam1b6bf672016-08-10 18:35:12 -0700135 * share_host_netns - Enable sharing of the host network namespace.
Dylan Reidc4335842016-11-11 10:24:52 -0800136 * keep_fds_open - Allow the child process to keep open FDs (for stdin/out/err).
Dylan Reid93fa4602017-06-06 13:39:31 -0700137 * rlimits - Array of rlimits for the contained process.
138 * num_rlimits - The number of elements in `rlimits`.
Dylan Reid837c74a2016-01-22 17:25:21 -0800139 */
140struct container_config {
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500141 char *config_root;
Dylan Reid837c74a2016-01-22 17:25:21 -0800142 char *rootfs;
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -0700143 unsigned long rootfs_mount_flags;
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700144 char *premounted_runfs;
145 char *pid_file_path;
Dylan Reid837c74a2016-01-22 17:25:21 -0800146 char **program_argv;
147 size_t num_args;
Dylan Reid1874feb2016-06-22 17:53:50 -0700148 uid_t uid;
Dylan Reid837c74a2016-01-22 17:25:21 -0800149 char *uid_map;
Dylan Reid1874feb2016-06-22 17:53:50 -0700150 gid_t gid;
Dylan Reid837c74a2016-01-22 17:25:21 -0800151 char *gid_map;
152 char *alt_syscall_table;
153 struct container_mount *mounts;
154 size_t num_mounts;
155 struct container_device *devices;
156 size_t num_devices;
Dylan Reid4843d6b2017-03-31 18:14:30 -0700157 struct container_cgroup_device *cgroup_devices;
158 size_t num_cgroup_devices;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700159 char *run_setfiles;
Chinyue Chenfac909e2016-06-24 14:17:42 +0800160 struct container_cpu_cgroup cpu_cgparams;
Dylan Reid9e724af2016-07-21 09:58:07 -0700161 char *cgroup_parent;
162 uid_t cgroup_owner;
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700163 gid_t cgroup_group;
Keshav Santhanam1b6bf672016-08-10 18:35:12 -0700164 int share_host_netns;
Dylan Reidc4335842016-11-11 10:24:52 -0800165 int keep_fds_open;
Dylan Reid93fa4602017-06-06 13:39:31 -0700166 struct container_rlimit rlimits[MAX_RLIMITS];
167 int num_rlimits;
Luis Hector Chavezff5978f2017-06-27 12:52:58 -0700168 int use_capmask;
169 int use_capmask_ambient;
170 uint64_t capmask;
Dylan Reid837c74a2016-01-22 17:25:21 -0800171};
172
173struct container_config *container_config_create()
174{
175 return calloc(1, sizeof(struct container_config));
176}
177
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700178static void container_free_program_args(struct container_config *c)
179{
180 int i;
181
182 if (!c->program_argv)
183 return;
184 for (i = 0; i < c->num_args; ++i) {
185 FREE_AND_NULL(c->program_argv[i]);
186 }
187 FREE_AND_NULL(c->program_argv);
188}
189
190static void container_config_free_mount(struct container_mount *mount)
191{
192 FREE_AND_NULL(mount->name);
193 FREE_AND_NULL(mount->source);
194 FREE_AND_NULL(mount->destination);
195 FREE_AND_NULL(mount->type);
196 FREE_AND_NULL(mount->data);
197}
198
199static void container_config_free_device(struct container_device *device)
200{
201 FREE_AND_NULL(device->path);
202}
203
Dylan Reid837c74a2016-01-22 17:25:21 -0800204void container_config_destroy(struct container_config *c)
205{
206 size_t i;
207
208 if (c == NULL)
209 return;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700210 FREE_AND_NULL(c->rootfs);
211 container_free_program_args(c);
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700212 FREE_AND_NULL(c->premounted_runfs);
213 FREE_AND_NULL(c->pid_file_path);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700214 FREE_AND_NULL(c->uid_map);
215 FREE_AND_NULL(c->gid_map);
216 FREE_AND_NULL(c->alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -0800217 for (i = 0; i < c->num_mounts; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700218 container_config_free_mount(&c->mounts[i]);
Dylan Reid837c74a2016-01-22 17:25:21 -0800219 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700220 FREE_AND_NULL(c->mounts);
Dylan Reid837c74a2016-01-22 17:25:21 -0800221 for (i = 0; i < c->num_devices; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700222 container_config_free_device(&c->devices[i]);
Dylan Reid837c74a2016-01-22 17:25:21 -0800223 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700224 FREE_AND_NULL(c->devices);
Dylan Reida34f8162017-05-10 11:33:11 -0700225 FREE_AND_NULL(c->cgroup_devices);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700226 FREE_AND_NULL(c->run_setfiles);
Dylan Reid9e724af2016-07-21 09:58:07 -0700227 FREE_AND_NULL(c->cgroup_parent);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700228 FREE_AND_NULL(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800229}
230
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500231int container_config_config_root(struct container_config *c,
232 const char *config_root)
233{
234 return strdup_and_free(&c->config_root, config_root);
235}
236
237const char *container_config_get_config_root(const struct container_config *c)
238{
239 return c->config_root;
240}
241
Dylan Reid837c74a2016-01-22 17:25:21 -0800242int container_config_rootfs(struct container_config *c, const char *rootfs)
243{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700244 return strdup_and_free(&c->rootfs, rootfs);
Dylan Reid837c74a2016-01-22 17:25:21 -0800245}
246
Dylan Reid11456722016-05-02 11:24:50 -0700247const char *container_config_get_rootfs(const struct container_config *c)
248{
249 return c->rootfs;
250}
251
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -0700252void container_config_rootfs_mount_flags(struct container_config *c,
253 unsigned long rootfs_mount_flags)
254{
255 /* Since we are going to add MS_REMOUNT anyways, add it here so we can
256 * simply check against zero later. MS_BIND is also added to avoid
257 * re-mounting the original filesystem, since the rootfs is always
258 * bind-mounted.
259 */
260 c->rootfs_mount_flags = MS_REMOUNT | MS_BIND | rootfs_mount_flags;
261}
262
263unsigned long container_config_get_rootfs_mount_flags(
264 const struct container_config *c)
265{
266 return c->rootfs_mount_flags;
267}
268
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700269int container_config_premounted_runfs(struct container_config *c, const char *runfs)
270{
271 return strdup_and_free(&c->premounted_runfs, runfs);
272}
273
274const char *container_config_get_premounted_runfs(const struct container_config *c)
275{
276 return c->premounted_runfs;
277}
278
279int container_config_pid_file(struct container_config *c, const char *path)
280{
281 return strdup_and_free(&c->pid_file_path, path);
282}
283
284const char *container_config_get_pid_file(const struct container_config *c)
285{
286 return c->pid_file_path;
287}
288
Dylan Reid837c74a2016-01-22 17:25:21 -0800289int container_config_program_argv(struct container_config *c,
Dylan Reid17fd53f2016-11-18 19:14:41 -0800290 const char **argv, size_t num_args)
Dylan Reid837c74a2016-01-22 17:25:21 -0800291{
292 size_t i;
293
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700294 container_free_program_args(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800295 c->num_args = num_args;
296 c->program_argv = calloc(num_args + 1, sizeof(char *));
297 if (!c->program_argv)
298 return -ENOMEM;
299 for (i = 0; i < num_args; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700300 if (strdup_and_free(&c->program_argv[i], argv[i]))
301 goto error_free_return;
Dylan Reid837c74a2016-01-22 17:25:21 -0800302 }
303 c->program_argv[num_args] = NULL;
304 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700305
306error_free_return:
307 container_free_program_args(c);
308 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800309}
310
Dylan Reid11456722016-05-02 11:24:50 -0700311size_t container_config_get_num_program_args(const struct container_config *c)
312{
313 return c->num_args;
314}
315
316const char *container_config_get_program_arg(const struct container_config *c,
317 size_t index)
318{
319 if (index >= c->num_args)
320 return NULL;
321 return c->program_argv[index];
322}
323
Dylan Reid1874feb2016-06-22 17:53:50 -0700324void container_config_uid(struct container_config *c, uid_t uid)
325{
326 c->uid = uid;
327}
328
329uid_t container_config_get_uid(const struct container_config *c)
330{
331 return c->uid;
332}
333
Dylan Reid837c74a2016-01-22 17:25:21 -0800334int container_config_uid_map(struct container_config *c, const char *uid_map)
335{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700336 return strdup_and_free(&c->uid_map, uid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800337}
338
Dylan Reid1874feb2016-06-22 17:53:50 -0700339void container_config_gid(struct container_config *c, gid_t gid)
340{
341 c->gid = gid;
342}
343
344gid_t container_config_get_gid(const struct container_config *c)
345{
346 return c->gid;
347}
348
Dylan Reid837c74a2016-01-22 17:25:21 -0800349int container_config_gid_map(struct container_config *c, const char *gid_map)
350{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700351 return strdup_and_free(&c->gid_map, gid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800352}
353
354int container_config_alt_syscall_table(struct container_config *c,
355 const char *alt_syscall_table)
356{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700357 return strdup_and_free(&c->alt_syscall_table, alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -0800358}
359
Dylan Reid93fa4602017-06-06 13:39:31 -0700360int container_config_add_rlimit(struct container_config *c, int type,
361 uint32_t cur, uint32_t max)
362{
363 if (c->num_rlimits >= MAX_RLIMITS) {
364 return -ENOMEM;
365 }
366 c->rlimits[c->num_rlimits].type = type;
367 c->rlimits[c->num_rlimits].cur = cur;
368 c->rlimits[c->num_rlimits].max = max;
369 c->num_rlimits++;
370 return 0;
371}
372
Dylan Reid837c74a2016-01-22 17:25:21 -0800373int container_config_add_mount(struct container_config *c,
374 const char *name,
375 const char *source,
376 const char *destination,
377 const char *type,
378 const char *data,
Mike Frysinger05e594e2017-01-10 02:11:08 -0500379 const char *verity,
Dylan Reid837c74a2016-01-22 17:25:21 -0800380 int flags,
381 int uid,
382 int gid,
383 int mode,
384 int mount_in_ns,
Mike Frysinger412dbd22017-01-06 01:50:34 -0500385 int create,
386 int loopback)
Dylan Reid837c74a2016-01-22 17:25:21 -0800387{
388 struct container_mount *mount_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700389 struct container_mount *current_mount;
Dylan Reid837c74a2016-01-22 17:25:21 -0800390
391 if (name == NULL || source == NULL ||
392 destination == NULL || type == NULL)
393 return -EINVAL;
394
395 mount_ptr = realloc(c->mounts,
396 sizeof(c->mounts[0]) * (c->num_mounts + 1));
397 if (!mount_ptr)
398 return -ENOMEM;
399 c->mounts = mount_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700400 current_mount = &c->mounts[c->num_mounts];
401 memset(current_mount, 0, sizeof(struct container_mount));
402
403 if (strdup_and_free(&current_mount->name, name))
404 goto error_free_return;
405 if (strdup_and_free(&current_mount->source, source))
406 goto error_free_return;
407 if (strdup_and_free(&current_mount->destination, destination))
408 goto error_free_return;
409 if (strdup_and_free(&current_mount->type, type))
410 goto error_free_return;
411 if (data && strdup_and_free(&current_mount->data, data))
412 goto error_free_return;
Mike Frysinger05e594e2017-01-10 02:11:08 -0500413 if (verity && strdup_and_free(&current_mount->verity, verity))
414 goto error_free_return;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700415 current_mount->flags = flags;
416 current_mount->uid = uid;
417 current_mount->gid = gid;
418 current_mount->mode = mode;
419 current_mount->mount_in_ns = mount_in_ns;
420 current_mount->create = create;
Mike Frysinger412dbd22017-01-06 01:50:34 -0500421 current_mount->loopback = loopback;
Dylan Reid837c74a2016-01-22 17:25:21 -0800422 ++c->num_mounts;
423 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700424
425error_free_return:
426 container_config_free_mount(current_mount);
427 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800428}
429
Dylan Reid4843d6b2017-03-31 18:14:30 -0700430int container_config_add_cgroup_device(struct container_config *c,
431 int allow,
432 char type,
433 int major,
434 int minor,
435 int read,
436 int write,
437 int modify)
438{
439 struct container_cgroup_device *dev_ptr;
440 struct container_cgroup_device *current_dev;
441
442 dev_ptr = realloc(c->cgroup_devices,
443 sizeof(c->cgroup_devices[0]) *
444 (c->num_cgroup_devices + 1));
445 if (!dev_ptr)
446 return -ENOMEM;
447 c->cgroup_devices = dev_ptr;
448
449 current_dev = &c->cgroup_devices[c->num_cgroup_devices];
450 memset(current_dev, 0, sizeof(struct container_cgroup_device));
451 current_dev->allow = allow;
452 current_dev->type = type;
453 current_dev->major = major;
454 current_dev->minor = minor;
455 current_dev->read = read;
456 current_dev->write = write;
457 current_dev->modify = modify;
458 ++c->num_cgroup_devices;
459
460 return 0;
461}
462
Dylan Reid837c74a2016-01-22 17:25:21 -0800463int container_config_add_device(struct container_config *c,
464 char type,
465 const char *path,
466 int fs_permissions,
467 int major,
468 int minor,
Dylan Reid355d5e42016-04-29 16:53:31 -0700469 int copy_minor,
Dylan Reid837c74a2016-01-22 17:25:21 -0800470 int uid,
471 int gid,
472 int read_allowed,
473 int write_allowed,
474 int modify_allowed)
475{
476 struct container_device *dev_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700477 struct container_device *current_dev;
Dylan Reid837c74a2016-01-22 17:25:21 -0800478
479 if (path == NULL)
480 return -EINVAL;
Dylan Reid355d5e42016-04-29 16:53:31 -0700481 /* If using a dynamic minor number, ensure that minor is -1. */
482 if (copy_minor && (minor != -1))
483 return -EINVAL;
484
Dylan Reid837c74a2016-01-22 17:25:21 -0800485 dev_ptr = realloc(c->devices,
486 sizeof(c->devices[0]) * (c->num_devices + 1));
487 if (!dev_ptr)
488 return -ENOMEM;
489 c->devices = dev_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700490 current_dev = &c->devices[c->num_devices];
491 memset(current_dev, 0, sizeof(struct container_device));
492
493 current_dev->type = type;
494 if (strdup_and_free(&current_dev->path, path))
495 goto error_free_return;
496 current_dev->fs_permissions = fs_permissions;
497 current_dev->major = major;
498 current_dev->minor = minor;
499 current_dev->copy_minor = copy_minor;
500 current_dev->uid = uid;
501 current_dev->gid = gid;
Dylan Reid4843d6b2017-03-31 18:14:30 -0700502 if (read_allowed || write_allowed || modify_allowed) {
503 if (container_config_add_cgroup_device(c,
504 1,
505 type,
506 major,
507 minor,
508 read_allowed,
509 write_allowed,
510 modify_allowed))
511 goto error_free_return;
512 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800513 ++c->num_devices;
514 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700515
516error_free_return:
517 container_config_free_device(current_dev);
518 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800519}
520
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700521int container_config_run_setfiles(struct container_config *c,
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700522 const char *setfiles_cmd)
523{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700524 return strdup_and_free(&c->run_setfiles, setfiles_cmd);
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700525}
Dylan Reid837c74a2016-01-22 17:25:21 -0800526
Dylan Reid11456722016-05-02 11:24:50 -0700527const char *container_config_get_run_setfiles(const struct container_config *c)
528{
529 return c->run_setfiles;
530}
531
Chinyue Chenfac909e2016-06-24 14:17:42 +0800532int container_config_set_cpu_shares(struct container_config *c, int shares)
533{
534 /* CPU shares must be 2 or higher. */
535 if (shares < 2)
536 return -EINVAL;
537
538 c->cpu_cgparams.shares = shares;
539 return 0;
540}
541
542int container_config_set_cpu_cfs_params(struct container_config *c,
543 int quota,
544 int period)
545{
546 /*
547 * quota could be set higher than period to utilize more than one CPU.
548 * quota could also be set as -1 to indicate the cgroup does not adhere
549 * to any CPU time restrictions.
550 */
551 if (quota <= 0 && quota != -1)
552 return -EINVAL;
553 if (period <= 0)
554 return -EINVAL;
555
556 c->cpu_cgparams.quota = quota;
557 c->cpu_cgparams.period = period;
558 return 0;
559}
560
561int container_config_set_cpu_rt_params(struct container_config *c,
562 int rt_runtime,
563 int rt_period)
564{
565 /*
566 * rt_runtime could be set as 0 to prevent the cgroup from using
567 * realtime CPU.
568 */
569 if (rt_runtime < 0 || rt_runtime >= rt_period)
570 return -EINVAL;
571
572 c->cpu_cgparams.rt_runtime = rt_runtime;
573 c->cpu_cgparams.rt_period = rt_period;
574 return 0;
575}
576
Chinyue Chen4f3fd682016-07-01 14:11:42 +0800577int container_config_get_cpu_shares(struct container_config *c)
578{
579 return c->cpu_cgparams.shares;
580}
581
582int container_config_get_cpu_quota(struct container_config *c)
583{
584 return c->cpu_cgparams.quota;
585}
586
587int container_config_get_cpu_period(struct container_config *c)
588{
589 return c->cpu_cgparams.period;
590}
591
592int container_config_get_cpu_rt_runtime(struct container_config *c)
593{
594 return c->cpu_cgparams.rt_runtime;
595}
596
597int container_config_get_cpu_rt_period(struct container_config *c)
598{
599 return c->cpu_cgparams.rt_period;
600}
601
Dylan Reid9e724af2016-07-21 09:58:07 -0700602int container_config_set_cgroup_parent(struct container_config *c,
603 const char *parent,
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700604 uid_t cgroup_owner, gid_t cgroup_group)
Dylan Reid9e724af2016-07-21 09:58:07 -0700605{
606 c->cgroup_owner = cgroup_owner;
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700607 c->cgroup_group = cgroup_group;
Dylan Reid9e724af2016-07-21 09:58:07 -0700608 return strdup_and_free(&c->cgroup_parent, parent);
609}
610
611const char *container_config_get_cgroup_parent(struct container_config *c)
612{
613 return c->cgroup_parent;
614}
615
Keshav Santhanam1b6bf672016-08-10 18:35:12 -0700616void container_config_share_host_netns(struct container_config *c)
617{
618 c->share_host_netns = 1;
619}
620
621int get_container_config_share_host_netns(struct container_config *c)
622{
623 return c->share_host_netns;
624}
625
Dylan Reidc4335842016-11-11 10:24:52 -0800626void container_config_keep_fds_open(struct container_config *c)
627{
628 c->keep_fds_open = 1;
629}
630
Luis Hector Chavezff5978f2017-06-27 12:52:58 -0700631void container_config_set_capmask(struct container_config *c,
632 uint64_t capmask,
633 int ambient)
634{
635 c->use_capmask = 1;
636 c->capmask = capmask;
637 c->use_capmask_ambient = ambient;
638}
639
Dylan Reid837c74a2016-01-22 17:25:21 -0800640/*
641 * Container manipulation
642 */
643struct container {
Dylan Reid837c74a2016-01-22 17:25:21 -0800644 struct container_cgroup *cgroup;
645 struct minijail *jail;
646 pid_t init_pid;
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500647 char *config_root;
Dylan Reid837c74a2016-01-22 17:25:21 -0800648 char *runfs;
649 char *rundir;
650 char *runfsroot;
651 char *pid_file_path;
Dylan Reide040c6b2016-05-02 18:49:02 -0700652 char **ext_mounts; /* Mounts made outside of the minijail */
653 size_t num_ext_mounts;
Mike Frysinger412dbd22017-01-06 01:50:34 -0500654 char **loopdevs;
655 size_t num_loopdevs;
Mike Frysinger05e594e2017-01-10 02:11:08 -0500656 char **device_mappers;
657 size_t num_device_mappers;
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700658 char *name;
Dylan Reid837c74a2016-01-22 17:25:21 -0800659};
660
661struct container *container_new(const char *name,
Dylan Reide040c6b2016-05-02 18:49:02 -0700662 const char *rundir)
Dylan Reid837c74a2016-01-22 17:25:21 -0800663{
664 struct container *c;
665
Dylan Reid837c74a2016-01-22 17:25:21 -0800666 c = calloc(1, sizeof(*c));
Dylan Reidb435c682016-04-12 04:17:49 -0700667 if (!c)
668 return NULL;
Dylan Reid837c74a2016-01-22 17:25:21 -0800669 c->rundir = strdup(rundir);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700670 c->name = strdup(name);
Dylan Reida9966422016-07-21 10:11:34 -0700671 if (!c->rundir || !c->name) {
Dylan Reid684975e2016-05-02 15:44:47 -0700672 container_destroy(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800673 return NULL;
Dylan Reidb435c682016-04-12 04:17:49 -0700674 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800675 return c;
676}
677
678void container_destroy(struct container *c)
679{
Dylan Reid684975e2016-05-02 15:44:47 -0700680 if (c->cgroup)
681 container_cgroup_destroy(c->cgroup);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700682 if (c->jail)
683 minijail_destroy(c->jail);
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500684 FREE_AND_NULL(c->config_root);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700685 FREE_AND_NULL(c->name);
686 FREE_AND_NULL(c->rundir);
687 FREE_AND_NULL(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800688}
689
Stephen Barber1a398c72017-01-23 12:39:44 -0800690/*
691 * Given a uid/gid map of "inside1 outside1 length1, ...", and an id
692 * inside of the user namespace, return the equivalent outside id, or
693 * return < 0 on error.
694 */
695static int get_userns_outside_id(const char *map, int id)
696{
697 char *map_copy, *mapping, *saveptr1, *saveptr2;
698 int inside, outside, length;
699 int result = 0;
700 errno = 0;
701
702 if (asprintf(&map_copy, "%s", map) < 0)
703 return -ENOMEM;
704
705 mapping = strtok_r(map_copy, ",", &saveptr1);
706 while (mapping) {
707 inside = strtol(strtok_r(mapping, " ", &saveptr2), NULL, 10);
708 outside = strtol(strtok_r(NULL, " ", &saveptr2), NULL, 10);
709 length = strtol(strtok_r(NULL, "\0", &saveptr2), NULL, 10);
710 if (errno) {
711 goto error_free_return;
712 } else if (inside < 0 || outside < 0 || length < 0) {
713 errno = EINVAL;
714 goto error_free_return;
715 }
716
717 if (id >= inside && id <= (inside + length)) {
718 result = (id - inside) + outside;
719 goto exit;
720 }
721
722 mapping = strtok_r(NULL, ",", &saveptr1);
723 }
724 errno = EINVAL;
725
726error_free_return:
727 result = -errno;
728exit:
729 free(map_copy);
730 return result;
731}
732
Dylan Reid837c74a2016-01-22 17:25:21 -0800733static int make_dir(const char *path, int uid, int gid, int mode)
734{
735 if (mkdir(path, mode))
736 return -errno;
737 if (chmod(path, mode))
738 return -errno;
739 if (chown(path, uid, gid))
740 return -errno;
741 return 0;
742}
743
744static int touch_file(const char *path, int uid, int gid, int mode)
745{
746 int rc;
747 int fd = open(path, O_RDWR | O_CREAT, mode);
748 if (fd < 0)
749 return -errno;
750 rc = fchown(fd, uid, gid);
751 close(fd);
752
753 if (rc)
754 return -errno;
755 return 0;
756}
757
758/* Make sure the mount target exists in the new rootfs. Create if needed and
759 * possible.
760 */
Stephen Barber1a398c72017-01-23 12:39:44 -0800761static int setup_mount_destination(const struct container_config *config,
762 const struct container_mount *mnt,
Dylan Reid2149be92016-04-28 18:38:57 -0700763 const char *source,
Dylan Reid837c74a2016-01-22 17:25:21 -0800764 const char *dest)
765{
Stephen Barber1a398c72017-01-23 12:39:44 -0800766 int uid_userns, gid_userns;
Dylan Reid837c74a2016-01-22 17:25:21 -0800767 int rc;
768 struct stat st_buf;
769
770 rc = stat(dest, &st_buf);
771 if (rc == 0) /* destination exists */
772 return 0;
773
774 /* Try to create the destination. Either make directory or touch a file
775 * depending on the source type.
776 */
Stephen Barber1a398c72017-01-23 12:39:44 -0800777 uid_userns = get_userns_outside_id(config->uid_map, mnt->uid);
778 if (uid_userns < 0)
779 return uid_userns;
780 gid_userns = get_userns_outside_id(config->gid_map, mnt->gid);
781 if (gid_userns < 0)
782 return gid_userns;
783
Dylan Reid2149be92016-04-28 18:38:57 -0700784 rc = stat(source, &st_buf);
Dylan Reid837c74a2016-01-22 17:25:21 -0800785 if (rc || S_ISDIR(st_buf.st_mode) || S_ISBLK(st_buf.st_mode))
Stephen Barber1a398c72017-01-23 12:39:44 -0800786 return make_dir(dest, uid_userns, gid_userns, mnt->mode);
Dylan Reid837c74a2016-01-22 17:25:21 -0800787
Stephen Barber1a398c72017-01-23 12:39:44 -0800788 return touch_file(dest, uid_userns, gid_userns, mnt->mode);
Dylan Reid837c74a2016-01-22 17:25:21 -0800789}
790
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700791/* Fork and exec the setfiles command to configure the selinux policy. */
Dylan Reide040c6b2016-05-02 18:49:02 -0700792static int run_setfiles_command(const struct container *c,
793 const struct container_config *config,
Yusuke Sato91f11f02016-12-02 16:15:13 -0800794 char *const *destinations, size_t num_destinations)
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700795{
796 int rc;
797 int status;
798 int pid;
799 char *context_path;
800
Dylan Reide040c6b2016-05-02 18:49:02 -0700801 if (!config->run_setfiles)
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700802 return 0;
803
804 if (asprintf(&context_path, "%s/file_contexts",
805 c->runfsroot) < 0)
806 return -errno;
807
808 pid = fork();
809 if (pid == 0) {
Yusuke Sato91f11f02016-12-02 16:15:13 -0800810 size_t i;
811 size_t arg_index = 0;
812 const char *argv[MAX_NUM_SETFILES_ARGS];
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700813 const char *env[] = {
814 NULL,
815 };
816
Yusuke Sato91f11f02016-12-02 16:15:13 -0800817 argv[arg_index++] = config->run_setfiles;
818 argv[arg_index++] = "-r";
819 argv[arg_index++] = c->runfsroot;
820 argv[arg_index++] = context_path;
821 if (arg_index + num_destinations >= MAX_NUM_SETFILES_ARGS)
822 _exit(-E2BIG);
823 for (i = 0; i < num_destinations; ++i) {
824 argv[arg_index++] = destinations[i];
825 }
826 argv[arg_index] = NULL;
827
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700828 execve(argv[0], (char *const*)argv, (char *const*)env);
829
830 /* Command failed to exec if execve returns. */
831 _exit(-errno);
832 }
833 free(context_path);
834 if (pid < 0)
835 return -errno;
836 do {
837 rc = waitpid(pid, &status, 0);
838 } while (rc == -1 && errno == EINTR);
839 if (rc < 0)
840 return -errno;
841 return status;
842}
843
Mike Frysinger412dbd22017-01-06 01:50:34 -0500844/* Find a free loop device and attach it. */
845static int loopdev_setup(char **loopdev_ret, const char *source)
846{
847 int ret = 0;
848 int source_fd = -1;
849 int control_fd = -1;
850 int loop_fd = -1;
851 char *loopdev = NULL;
852
853 source_fd = open(source, O_RDONLY|O_CLOEXEC);
854 if (source_fd < 0)
855 goto error;
856
857 control_fd = open(loopdev_ctl, O_RDWR|O_NOFOLLOW|O_CLOEXEC);
858 if (control_fd < 0)
859 goto error;
860
861 while (1) {
862 int num = ioctl(control_fd, LOOP_CTL_GET_FREE);
863 if (num < 0)
864 goto error;
865
866 if (asprintf(&loopdev, "/dev/loop%i", num) < 0)
867 goto error;
868
869 loop_fd = open(loopdev, O_RDONLY|O_NOFOLLOW|O_CLOEXEC);
870 if (loop_fd < 0)
871 goto error;
872
873 if (ioctl(loop_fd, LOOP_SET_FD, source_fd) == 0)
874 break;
875
876 if (errno != EBUSY)
877 goto error;
878
879 /* Clean up resources for the next pass. */
880 free(loopdev);
881 close(loop_fd);
882 }
883
884 *loopdev_ret = loopdev;
885 goto exit;
886
887error:
888 ret = -errno;
889 free(loopdev);
890exit:
891 if (source_fd != -1)
892 close(source_fd);
893 if (control_fd != -1)
894 close(control_fd);
895 if (loop_fd != -1)
896 close(loop_fd);
897 return ret;
898}
899
900/* Detach the specified loop device. */
901static int loopdev_detach(const char *loopdev)
902{
903 int ret = 0;
904 int fd;
905
906 fd = open(loopdev, O_RDONLY|O_NOFOLLOW|O_CLOEXEC);
907 if (fd < 0)
908 goto error;
909 if (ioctl(fd, LOOP_CLR_FD) < 0)
910 goto error;
911
912 goto exit;
913
914error:
915 ret = -errno;
916exit:
917 if (fd != -1)
918 close(fd);
919 return ret;
920}
921
Mike Frysinger05e594e2017-01-10 02:11:08 -0500922/* Create a new device mapper target for the source. */
923static int dm_setup(char **dm_path_ret, char **dm_name_ret, const char *source,
924 const char *verity_cmdline)
925{
926 int ret = 0;
927#if USE_device_mapper
928 char *p;
929 char *dm_path = NULL;
930 char *dm_name = NULL;
931 char *verity = NULL;
932 struct dm_task *dmt = NULL;
933 uint32_t cookie = 0;
934
935 /* Normalize the name into something unique-esque. */
936 if (asprintf(&dm_name, "cros-containers-%s", source) < 0)
937 goto error;
938 p = dm_name;
939 while ((p = strchr(p, '/')) != NULL)
940 *p++ = '_';
941
942 /* Get the /dev path for the higher levels to mount. */
943 if (asprintf(&dm_path, "%s%s", dm_dev_prefix, dm_name) < 0)
944 goto error;
945
946 /* Insert the source path in the verity command line. */
947 size_t source_len = strlen(source);
948 verity = malloc(strlen(verity_cmdline) + source_len * 2 + 1);
949 strcpy(verity, verity_cmdline);
950 while ((p = strstr(verity, "@DEV@")) != NULL) {
951 memmove(p + source_len, p + 5, strlen(p + 5) + 1);
952 memcpy(p, source, source_len);
953 }
954
955 /* Extract the first three parameters for dm-verity settings. */
956 char ttype[20];
957 unsigned long long start, size;
958 int n;
959 if (sscanf(verity, "%llu %llu %10s %n", &start, &size, ttype, &n) != 3)
960 goto error;
961
962 /* Finally create the device mapper. */
963 dmt = dm_task_create(DM_DEVICE_CREATE);
964 if (dmt == NULL)
965 goto error;
966
967 if (!dm_task_set_name(dmt, dm_name))
968 goto error;
969
970 if (!dm_task_set_ro(dmt))
971 goto error;
972
973 if (!dm_task_add_target(dmt, start, size, ttype, verity + n))
974 goto error;
975
976 if (!dm_task_set_cookie(dmt, &cookie, 0))
977 goto error;
978
979 if (!dm_task_run(dmt))
980 goto error;
981
982 /* Make sure the node exists before we continue. */
983 dm_udev_wait(cookie);
984
985 *dm_path_ret = dm_path;
986 *dm_name_ret = dm_name;
987 goto exit;
988
989error:
990 ret = -errno;
991 free(dm_name);
992 free(dm_path);
993exit:
994 free(verity);
995 if (dmt)
996 dm_task_destroy(dmt);
997#endif
998 return ret;
999}
1000
1001/* Tear down the device mapper target. */
1002static int dm_detach(const char *dm_name)
1003{
1004 int ret = 0;
1005#if USE_device_mapper
1006 struct dm_task *dmt;
1007
1008 dmt = dm_task_create(DM_DEVICE_REMOVE);
1009 if (dmt == NULL)
1010 goto error;
1011
1012 if (!dm_task_set_name(dmt, dm_name))
1013 goto error;
1014
1015 if (!dm_task_run(dmt))
1016 goto error;
1017
1018 goto exit;
1019
1020error:
1021 ret = -errno;
1022exit:
1023 dm_task_destroy(dmt);
1024#endif
1025 return ret;
1026}
1027
Dylan Reide040c6b2016-05-02 18:49:02 -07001028/*
1029 * Unmounts anything we mounted in this mount namespace in the opposite order
1030 * that they were mounted.
1031 */
1032static int unmount_external_mounts(struct container *c)
1033{
1034 int ret = 0;
1035
1036 while (c->num_ext_mounts) {
1037 c->num_ext_mounts--;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001038 if (!c->ext_mounts[c->num_ext_mounts])
1039 continue;
Dylan Reide040c6b2016-05-02 18:49:02 -07001040 if (umount(c->ext_mounts[c->num_ext_mounts]))
1041 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001042 FREE_AND_NULL(c->ext_mounts[c->num_ext_mounts]);
Dylan Reide040c6b2016-05-02 18:49:02 -07001043 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001044 FREE_AND_NULL(c->ext_mounts);
Mike Frysinger412dbd22017-01-06 01:50:34 -05001045
1046 while (c->num_loopdevs) {
1047 c->num_loopdevs--;
1048 if (loopdev_detach(c->loopdevs[c->num_loopdevs]))
1049 ret = -errno;
1050 FREE_AND_NULL(c->loopdevs[c->num_loopdevs]);
1051 }
1052 FREE_AND_NULL(c->loopdevs);
1053
Mike Frysinger05e594e2017-01-10 02:11:08 -05001054 while (c->num_device_mappers) {
1055 c->num_device_mappers--;
1056 if (dm_detach(c->device_mappers[c->num_device_mappers]))
1057 ret = -errno;
1058 FREE_AND_NULL(c->device_mappers[c->num_device_mappers]);
1059 }
1060 FREE_AND_NULL(c->device_mappers);
1061
Dylan Reide040c6b2016-05-02 18:49:02 -07001062 return ret;
1063}
1064
Junichi Uekawa5d272772016-07-21 16:07:19 +09001065/*
1066 * Match mount_one in minijail, mount one mountpoint with
1067 * consideration for combination of MS_BIND/MS_RDONLY flag.
1068 */
1069static int mount_external(const char *src, const char *dest, const char *type,
1070 unsigned long flags, const void *data)
1071{
1072 int remount_ro = 0;
1073
1074 /*
1075 * R/O bind mounts have to be remounted since 'bind' and 'ro'
1076 * can't both be specified in the original bind mount.
1077 * Remount R/O after the initial mount.
1078 */
1079 if ((flags & MS_BIND) && (flags & MS_RDONLY)) {
1080 remount_ro = 1;
1081 flags &= ~MS_RDONLY;
1082 }
1083
1084 if (mount(src, dest, type, flags, data) == -1)
1085 return -1;
1086
1087 if (remount_ro) {
1088 flags |= MS_RDONLY;
1089 if (mount(src, dest, NULL, flags | MS_REMOUNT, data) == -1)
1090 return -1;
1091 }
1092
1093 return 0;
1094}
1095
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001096static int do_container_mount(struct container *c,
Stephen Barber1a398c72017-01-23 12:39:44 -08001097 const struct container_config *config,
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001098 const struct container_mount *mnt)
1099{
Mike Frysinger05e594e2017-01-10 02:11:08 -05001100 char *dm_source = NULL;
Mike Frysinger412dbd22017-01-06 01:50:34 -05001101 char *loop_source = NULL;
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001102 char *source = NULL;
1103 char *dest = NULL;
1104 int rc = 0;
1105
1106 if (asprintf(&dest, "%s%s", c->runfsroot, mnt->destination) < 0)
1107 return -errno;
1108
1109 /*
1110 * If it's a bind mount relative to rootfs, append source to
1111 * rootfs path, otherwise source path is absolute.
1112 */
1113 if ((mnt->flags & MS_BIND) && mnt->source[0] != '/') {
1114 if (asprintf(&source, "%s/%s", c->runfsroot, mnt->source) < 0)
1115 goto error_free_return;
Mike Frysingerb22acdf2017-01-08 02:02:35 -05001116 } else if (mnt->loopback && mnt->source[0] != '/' && c->config_root) {
1117 if (asprintf(&source, "%s/%s", c->config_root, mnt->source) < 0)
1118 goto error_free_return;
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001119 } else {
1120 if (asprintf(&source, "%s", mnt->source) < 0)
1121 goto error_free_return;
1122 }
1123
Dylan Reidbd5234c2017-06-06 21:20:07 -07001124 // Only create the destinations for external mounts, minijail will take
1125 // care of those mounted in the new namespace.
1126 if (mnt->create && !mnt->mount_in_ns) {
Stephen Barber1a398c72017-01-23 12:39:44 -08001127 rc = setup_mount_destination(config, mnt, source, dest);
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001128 if (rc)
1129 goto error_free_return;
1130 }
Mike Frysinger412dbd22017-01-06 01:50:34 -05001131 if (mnt->loopback) {
1132 /* Record this loopback file for cleanup later. */
1133 loop_source = source;
1134 source = NULL;
1135 rc = loopdev_setup(&source, loop_source);
1136 if (rc)
1137 goto error_free_return;
1138
Mike Frysinger05e594e2017-01-10 02:11:08 -05001139 /* Save this to cleanup when shutting down. */
Mike Frysinger412dbd22017-01-06 01:50:34 -05001140 rc = strdup_and_free(&c->loopdevs[c->num_loopdevs], source);
1141 if (rc)
1142 goto error_free_return;
1143 c->num_loopdevs++;
1144 }
Mike Frysinger05e594e2017-01-10 02:11:08 -05001145 if (mnt->verity) {
1146 /* Set this device up via dm-verity. */
1147 char *dm_name;
1148 dm_source = source;
1149 source = NULL;
1150 rc = dm_setup(&source, &dm_name, dm_source, mnt->verity);
1151 if (rc)
1152 goto error_free_return;
1153
1154 /* Save this to cleanup when shutting down. */
1155 rc = strdup_and_free(&c->device_mappers[c->num_device_mappers],
1156 dm_name);
1157 free(dm_name);
1158 if (rc)
1159 goto error_free_return;
1160 c->num_device_mappers++;
1161 }
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001162 if (mnt->mount_in_ns) {
1163 /* We can mount this with minijail. */
Dylan Reid36b9c012016-06-24 18:27:08 -07001164 rc = minijail_mount_with_data(c->jail, source, mnt->destination,
1165 mnt->type, mnt->flags, mnt->data);
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001166 if (rc)
1167 goto error_free_return;
1168 } else {
1169 /* Mount this externally and unmount it on exit. */
Junichi Uekawa5d272772016-07-21 16:07:19 +09001170 if (mount_external(source, dest, mnt->type, mnt->flags,
1171 mnt->data))
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001172 goto error_free_return;
1173 /* Save this to unmount when shutting down. */
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001174 rc = strdup_and_free(&c->ext_mounts[c->num_ext_mounts], dest);
1175 if (rc)
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001176 goto error_free_return;
1177 c->num_ext_mounts++;
1178 }
1179
1180 goto exit;
1181
1182error_free_return:
1183 if (!rc)
1184 rc = -errno;
1185exit:
Mike Frysinger05e594e2017-01-10 02:11:08 -05001186 free(dm_source);
Mike Frysinger412dbd22017-01-06 01:50:34 -05001187 free(loop_source);
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001188 free(source);
1189 free(dest);
1190 return rc;
1191}
1192
Dylan Reide040c6b2016-05-02 18:49:02 -07001193static int do_container_mounts(struct container *c,
1194 const struct container_config *config)
Dylan Reid7daf9982016-04-28 16:55:42 -07001195{
1196 unsigned int i;
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -07001197 int rc = 0;
Dylan Reid7daf9982016-04-28 16:55:42 -07001198
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001199 unmount_external_mounts(c);
Dylan Reide040c6b2016-05-02 18:49:02 -07001200 /*
1201 * Allocate space to track anything we mount in our mount namespace.
1202 * This over-allocates as it has space for all mounts.
1203 */
1204 c->ext_mounts = calloc(config->num_mounts, sizeof(*c->ext_mounts));
1205 if (!c->ext_mounts)
1206 return -errno;
Mike Frysinger412dbd22017-01-06 01:50:34 -05001207 c->loopdevs = calloc(config->num_mounts, sizeof(*c->loopdevs));
1208 if (!c->loopdevs)
1209 return -errno;
Mike Frysinger05e594e2017-01-10 02:11:08 -05001210 c->device_mappers = calloc(config->num_mounts, sizeof(*c->device_mappers));
1211 if (!c->device_mappers)
1212 return -errno;
Dylan Reide040c6b2016-05-02 18:49:02 -07001213
1214 for (i = 0; i < config->num_mounts; ++i) {
Stephen Barber1a398c72017-01-23 12:39:44 -08001215 rc = do_container_mount(c, config, &config->mounts[i]);
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001216 if (rc)
1217 goto error_free_return;
Dylan Reid7daf9982016-04-28 16:55:42 -07001218 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001219
Dylan Reid7daf9982016-04-28 16:55:42 -07001220 return 0;
Dylan Reid2149be92016-04-28 18:38:57 -07001221
1222error_free_return:
Dylan Reide040c6b2016-05-02 18:49:02 -07001223 unmount_external_mounts(c);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -07001224 return rc;
Dylan Reid7daf9982016-04-28 16:55:42 -07001225}
1226
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001227static int container_create_device(const struct container *c,
Stephen Barber1a398c72017-01-23 12:39:44 -08001228 const struct container_config *config,
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001229 const struct container_device *dev,
1230 int minor)
1231{
1232 char *path = NULL;
1233 int rc = 0;
1234 int mode;
Stephen Barber1a398c72017-01-23 12:39:44 -08001235 int uid_userns, gid_userns;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001236
1237 switch (dev->type) {
1238 case 'b':
1239 mode = S_IFBLK;
1240 break;
1241 case 'c':
1242 mode = S_IFCHR;
1243 break;
1244 default:
1245 return -EINVAL;
1246 }
1247 mode |= dev->fs_permissions;
1248
Stephen Barber1a398c72017-01-23 12:39:44 -08001249 uid_userns = get_userns_outside_id(config->uid_map, dev->uid);
1250 if (uid_userns < 0)
1251 return uid_userns;
1252 gid_userns = get_userns_outside_id(config->gid_map, dev->gid);
1253 if (gid_userns < 0)
1254 return gid_userns;
1255
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001256 if (asprintf(&path, "%s%s", c->runfsroot, dev->path) < 0)
1257 goto error_free_return;
1258 if (mknod(path, mode, makedev(dev->major, minor)) && errno != EEXIST)
1259 goto error_free_return;
Stephen Barber1a398c72017-01-23 12:39:44 -08001260 if (chown(path, uid_userns, gid_userns))
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001261 goto error_free_return;
1262 if (chmod(path, dev->fs_permissions))
1263 goto error_free_return;
1264
1265 goto exit;
1266
1267error_free_return:
1268 rc = -errno;
1269exit:
1270 free(path);
1271 return rc;
1272}
1273
Stephen Barber1a398c72017-01-23 12:39:44 -08001274
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001275static int mount_runfs(struct container *c, const struct container_config *config)
Dylan Reid837c74a2016-01-22 17:25:21 -08001276{
Dylan Reidb3621832016-03-24 10:24:57 -07001277 static const mode_t root_dir_mode = 0660;
Dylan Reide040c6b2016-05-02 18:49:02 -07001278 const char *rootfs = config->rootfs;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001279 char *runfs_template = NULL;
Stephen Barber1a398c72017-01-23 12:39:44 -08001280 int uid_userns, gid_userns;
Dylan Reid837c74a2016-01-22 17:25:21 -08001281
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001282 if (asprintf(&runfs_template, "%s/%s_XXXXXX", c->rundir, c->name) < 0)
1283 return -ENOMEM;
1284
1285 c->runfs = mkdtemp(runfs_template);
1286 if (!c->runfs) {
1287 free(runfs_template);
1288 return -errno;
1289 }
1290
Stephen Barber1a398c72017-01-23 12:39:44 -08001291 uid_userns = get_userns_outside_id(config->uid_map, config->uid);
1292 if (uid_userns < 0)
1293 return uid_userns;
1294 gid_userns = get_userns_outside_id(config->gid_map, config->gid);
1295 if (gid_userns < 0)
1296 return gid_userns;
1297
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001298 /* Make sure the container uid can access the rootfs. */
1299 if (chmod(c->runfs, 0700))
1300 return -errno;
Stephen Barber1a398c72017-01-23 12:39:44 -08001301 if (chown(c->runfs, uid_userns, gid_userns))
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001302 return -errno;
1303
1304 if (asprintf(&c->runfsroot, "%s/root", c->runfs) < 0)
1305 return -errno;
1306
1307 if (mkdir(c->runfsroot, root_dir_mode))
1308 return -errno;
1309 if (chmod(c->runfsroot, root_dir_mode))
1310 return -errno;
1311
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -07001312 if (mount(rootfs, c->runfsroot, "", MS_BIND, NULL))
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001313 return -errno;
1314
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -07001315 /* MS_BIND ignores any flags passed to it (except MS_REC). We need a
1316 * second call to mount() to actually set them.
1317 */
1318 if (config->rootfs_mount_flags &&
1319 mount(rootfs, c->runfsroot, "",
1320 config->rootfs_mount_flags, NULL)) {
1321 return -errno;
1322 }
1323
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001324 return 0;
1325}
1326
Dylan Reidacedff92017-03-31 17:41:40 -07001327static int device_setup(struct container *c,
1328 const struct container_config *config)
1329{
Dylan Reid43d4e5c2017-04-05 09:40:11 -07001330 int rc;
1331 size_t i;
Dylan Reidacedff92017-03-31 17:41:40 -07001332
1333 c->cgroup->ops->deny_all_devices(c->cgroup);
1334
Dylan Reid4843d6b2017-03-31 18:14:30 -07001335 for (i = 0; i < config->num_cgroup_devices; i++) {
1336 const struct container_cgroup_device *dev =
1337 &config->cgroup_devices[i];
1338 rc = c->cgroup->ops->add_device(c->cgroup,
1339 dev->allow,
1340 dev->major,
1341 dev->minor,
1342 dev->read,
1343 dev->write,
1344 dev->modify,
1345 dev->type);
1346 if (rc)
1347 return rc;
1348 }
1349
Dylan Reidacedff92017-03-31 17:41:40 -07001350 for (i = 0; i < config->num_devices; i++) {
1351 const struct container_device *dev = &config->devices[i];
1352 int minor = dev->minor;
1353
1354 if (dev->copy_minor) {
1355 struct stat st_buff;
1356 if (stat(dev->path, &st_buff) < 0)
1357 continue;
1358 minor = minor(st_buff.st_rdev);
1359 }
1360 if (minor >= 0) {
1361 rc = container_create_device(c, config, dev, minor);
1362 if (rc)
1363 return rc;
1364 }
Dylan Reidacedff92017-03-31 17:41:40 -07001365 }
1366
1367 for (i = 0; i < c->num_loopdevs; ++i) {
1368 struct stat st;
1369
Dylan Reid43d4e5c2017-04-05 09:40:11 -07001370 rc = stat(c->loopdevs[i], &st);
1371 if (rc < 0)
1372 return -errno;
Dylan Reid4843d6b2017-03-31 18:14:30 -07001373 rc = c->cgroup->ops->add_device(c->cgroup, 1, major(st.st_rdev),
Dylan Reidacedff92017-03-31 17:41:40 -07001374 minor(st.st_rdev),
1375 1, 0, 0, 'b');
1376 if (rc)
1377 return rc;
1378 }
1379
1380 return 0;
1381}
1382
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001383int container_start(struct container *c, const struct container_config *config)
1384{
1385 int rc = 0;
1386 unsigned int i;
Stephen Barber1a398c72017-01-23 12:39:44 -08001387 int cgroup_uid, cgroup_gid;
Yusuke Sato91f11f02016-12-02 16:15:13 -08001388 char **destinations;
1389 size_t num_destinations;
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001390
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001391 if (!c)
1392 return -EINVAL;
Dylan Reide040c6b2016-05-02 18:49:02 -07001393 if (!config)
1394 return -EINVAL;
1395 if (!config->program_argv || !config->program_argv[0])
1396 return -EINVAL;
1397
Mike Frysingerb22acdf2017-01-08 02:02:35 -05001398 if (config->config_root) {
1399 c->config_root = strdup(config->config_root);
1400 if (!c->config_root) {
1401 rc = -ENOMEM;
1402 goto error_rmdir;
1403 }
1404 }
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001405 if (config->premounted_runfs) {
1406 c->runfs = NULL;
1407 c->runfsroot = strdup(config->premounted_runfs);
1408 if (!c->runfsroot) {
1409 rc = -ENOMEM;
1410 goto error_rmdir;
1411 }
1412 } else {
1413 rc = mount_runfs(c, config);
1414 if (rc)
1415 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001416 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001417
1418 c->jail = minijail_new();
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001419 if (!c->jail)
Luis Hector Chavez945af482016-06-03 08:39:34 -07001420 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001421
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -07001422 rc = do_container_mounts(c, config);
1423 if (rc)
Dylan Reid7daf9982016-04-28 16:55:42 -07001424 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001425
Stephen Barber1a398c72017-01-23 12:39:44 -08001426 cgroup_uid = get_userns_outside_id(config->uid_map,
1427 config->cgroup_owner);
1428 if (cgroup_uid < 0) {
1429 rc = cgroup_uid;
1430 goto error_rmdir;
1431 }
1432 cgroup_gid = get_userns_outside_id(config->gid_map,
1433 config->cgroup_group);
1434 if (cgroup_gid < 0) {
1435 rc = cgroup_gid;
1436 goto error_rmdir;
1437 }
1438
Dylan Reida9966422016-07-21 10:11:34 -07001439 c->cgroup = container_cgroup_new(c->name,
1440 "/sys/fs/cgroup",
1441 config->cgroup_parent,
Stephen Barber1a398c72017-01-23 12:39:44 -08001442 cgroup_uid,
1443 cgroup_gid);
Dylan Reida9966422016-07-21 10:11:34 -07001444 if (!c->cgroup)
1445 goto error_rmdir;
1446
Keshav Santhanam268fa032016-07-14 09:59:24 -07001447 /* Must be root to modify device cgroup or mknod */
1448 if (getuid() == 0) {
Dylan Reidacedff92017-03-31 17:41:40 -07001449 if (device_setup(c, config))
1450 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001451 }
1452
Dylan Reidd7229582016-04-27 17:08:40 -07001453 /* Potentailly run setfiles on mounts configured outside of the jail */
Yusuke Sato91f11f02016-12-02 16:15:13 -08001454 destinations = calloc(config->num_mounts, sizeof(char *));
1455 num_destinations = 0;
Dylan Reide040c6b2016-05-02 18:49:02 -07001456 for (i = 0; i < config->num_mounts; i++) {
1457 const struct container_mount *mnt = &config->mounts[i];
Yusuke Sato91f11f02016-12-02 16:15:13 -08001458 char* dest = mnt->destination;
Dylan Reidd7229582016-04-27 17:08:40 -07001459
1460 if (mnt->mount_in_ns)
1461 continue;
Junichi Uekawa5d272772016-07-21 16:07:19 +09001462 if (mnt->flags & MS_RDONLY)
1463 continue;
Yusuke Sato91f11f02016-12-02 16:15:13 -08001464
Yusuke Satod33db432016-12-05 16:24:37 -08001465 /* A hack to avoid setfiles on /data and /cache. */
1466 if (!strcmp(dest, "/data") || !strcmp(dest, "/cache"))
Yusuke Sato91f11f02016-12-02 16:15:13 -08001467 continue;
1468
1469 if (asprintf(&dest, "%s%s", c->runfsroot, mnt->destination) < 0) {
1470 size_t j;
1471 for (j = 0; j < num_destinations; ++j) {
1472 free(destinations[j]);
1473 }
1474 free(destinations);
Dylan Reidd7229582016-04-27 17:08:40 -07001475 goto error_rmdir;
Yusuke Sato91f11f02016-12-02 16:15:13 -08001476 }
1477
1478 destinations[num_destinations++] = dest;
Dylan Reidd7229582016-04-27 17:08:40 -07001479 }
Yusuke Sato91f11f02016-12-02 16:15:13 -08001480 if (num_destinations) {
1481 size_t i;
1482 rc = run_setfiles_command(c, config, destinations, num_destinations);
1483 for (i = 0; i < num_destinations; ++i) {
1484 free(destinations[i]);
1485 }
1486 }
1487 free(destinations);
1488 if (rc)
1489 goto error_rmdir;
Dylan Reidd7229582016-04-27 17:08:40 -07001490
Chinyue Chenfac909e2016-06-24 14:17:42 +08001491 /* Setup CPU cgroup params. */
1492 if (config->cpu_cgparams.shares) {
1493 rc = c->cgroup->ops->set_cpu_shares(
1494 c->cgroup, config->cpu_cgparams.shares);
1495 if (rc)
1496 goto error_rmdir;
1497 }
1498 if (config->cpu_cgparams.period) {
1499 rc = c->cgroup->ops->set_cpu_quota(
1500 c->cgroup, config->cpu_cgparams.quota);
1501 if (rc)
1502 goto error_rmdir;
1503 rc = c->cgroup->ops->set_cpu_period(
1504 c->cgroup, config->cpu_cgparams.period);
1505 if (rc)
1506 goto error_rmdir;
1507 }
1508 if (config->cpu_cgparams.rt_period) {
1509 rc = c->cgroup->ops->set_cpu_rt_runtime(
1510 c->cgroup, config->cpu_cgparams.rt_runtime);
1511 if (rc)
1512 goto error_rmdir;
1513 rc = c->cgroup->ops->set_cpu_rt_period(
1514 c->cgroup, config->cpu_cgparams.rt_period);
1515 if (rc)
1516 goto error_rmdir;
1517 }
1518
Dylan Reid837c74a2016-01-22 17:25:21 -08001519 /* Setup and start the container with libminijail. */
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001520 if (config->pid_file_path) {
1521 c->pid_file_path = strdup(config->pid_file_path);
1522 if (!c->pid_file_path) {
1523 rc = -ENOMEM;
1524 goto error_rmdir;
1525 }
1526 } else if (c->runfs) {
1527 if (asprintf(&c->pid_file_path, "%s/container.pid", c->runfs) < 0) {
1528 rc = -ENOMEM;
1529 goto error_rmdir;
1530 }
1531 }
1532
1533 if (c->pid_file_path)
1534 minijail_write_pid_file(c->jail, c->pid_file_path);
Dylan Reid837c74a2016-01-22 17:25:21 -08001535 minijail_reset_signal_mask(c->jail);
1536
1537 /* Setup container namespaces. */
1538 minijail_namespace_ipc(c->jail);
1539 minijail_namespace_vfs(c->jail);
Keshav Santhanam1b6bf672016-08-10 18:35:12 -07001540 if (!config->share_host_netns)
1541 minijail_namespace_net(c->jail);
Dylan Reid837c74a2016-01-22 17:25:21 -08001542 minijail_namespace_pids(c->jail);
Dylan Reid837c74a2016-01-22 17:25:21 -08001543 minijail_namespace_user(c->jail);
Mike Frysingerfbd60552017-01-03 17:28:48 -05001544 if (getuid() != 0)
1545 minijail_namespace_user_disable_setgroups(c->jail);
Dylan Reidc6ca1042016-07-11 15:03:27 -07001546 minijail_namespace_cgroups(c->jail);
Dylan Reide040c6b2016-05-02 18:49:02 -07001547 rc = minijail_uidmap(c->jail, config->uid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -08001548 if (rc)
1549 goto error_rmdir;
Dylan Reide040c6b2016-05-02 18:49:02 -07001550 rc = minijail_gidmap(c->jail, config->gid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -08001551 if (rc)
1552 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001553
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001554 /* Set the UID/GID inside the container if not 0. */
Stephen Barber1a398c72017-01-23 12:39:44 -08001555 if (get_userns_outside_id(config->uid_map, config->uid) < 0)
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001556 goto error_rmdir;
Stephen Barber1a398c72017-01-23 12:39:44 -08001557 else if (config->uid > 0)
1558 minijail_change_uid(c->jail, config->uid);
1559 if (get_userns_outside_id(config->gid_map, config->gid) < 0)
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001560 goto error_rmdir;
Stephen Barber1a398c72017-01-23 12:39:44 -08001561 else if (config->gid > 0)
1562 minijail_change_gid(c->jail, config->gid);
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001563
Dylan Reid837c74a2016-01-22 17:25:21 -08001564 rc = minijail_enter_pivot_root(c->jail, c->runfsroot);
1565 if (rc)
1566 goto error_rmdir;
1567
1568 /* Add the cgroups configured above. */
Dmitry Torokhov0d253a62017-01-05 09:41:33 -08001569 for (i = 0; i < NUM_CGROUP_TYPES; i++) {
1570 if (c->cgroup->cgroup_tasks_paths[i]) {
1571 rc = minijail_add_to_cgroup(c->jail,
1572 c->cgroup->cgroup_tasks_paths[i]);
1573 if (rc)
1574 goto error_rmdir;
1575 }
1576 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001577
Dylan Reide040c6b2016-05-02 18:49:02 -07001578 if (config->alt_syscall_table)
1579 minijail_use_alt_syscall(c->jail, config->alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -08001580
Dylan Reid93fa4602017-06-06 13:39:31 -07001581 for (i = 0; i < config->num_rlimits; i++) {
1582 const struct container_rlimit *lim = &config->rlimits[i];
1583 rc = minijail_rlimit(c->jail, lim->type, lim->cur,
1584 lim->max);
1585 if (rc)
1586 goto error_rmdir;
1587 }
1588
1589
Dylan Reid837c74a2016-01-22 17:25:21 -08001590 minijail_run_as_init(c->jail);
1591
Dylan Reid3da683b2016-04-05 03:35:35 -07001592 /* TODO(dgreid) - remove this once shared mounts are cleaned up. */
1593 minijail_skip_remount_private(c->jail);
1594
Dylan Reidc4335842016-11-11 10:24:52 -08001595 if (!config->keep_fds_open)
1596 minijail_close_open_fds(c->jail);
Luis Hector Chaveze18e7d42016-10-12 07:35:32 -07001597
Luis Hector Chavezff5978f2017-06-27 12:52:58 -07001598 if (config->use_capmask) {
1599 minijail_use_caps(c->jail, config->capmask);
1600 if (config->use_capmask_ambient) {
1601 minijail_set_ambient_caps(c->jail);
1602 }
1603 }
1604
Dylan Reid837c74a2016-01-22 17:25:21 -08001605 rc = minijail_run_pid_pipes_no_preload(c->jail,
Dylan Reide040c6b2016-05-02 18:49:02 -07001606 config->program_argv[0],
1607 config->program_argv,
Dylan Reid837c74a2016-01-22 17:25:21 -08001608 &c->init_pid, NULL, NULL,
1609 NULL);
1610 if (rc)
1611 goto error_rmdir;
1612 return 0;
1613
1614error_rmdir:
Luis Hector Chavez945af482016-06-03 08:39:34 -07001615 if (!rc)
1616 rc = -errno;
1617 container_teardown(c);
Dylan Reid837c74a2016-01-22 17:25:21 -08001618 return rc;
1619}
1620
1621const char *container_root(struct container *c)
1622{
1623 return c->runfs;
1624}
1625
1626int container_pid(struct container *c)
1627{
1628 return c->init_pid;
1629}
1630
1631static int container_teardown(struct container *c)
1632{
Dylan Reid837c74a2016-01-22 17:25:21 -08001633 int ret = 0;
1634
Dylan Reide040c6b2016-05-02 18:49:02 -07001635 unmount_external_mounts(c);
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001636 if (c->runfsroot && c->runfs) {
Luis Hector Chavez945af482016-06-03 08:39:34 -07001637 if (umount(c->runfsroot))
1638 ret = -errno;
1639 if (rmdir(c->runfsroot))
1640 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001641 FREE_AND_NULL(c->runfsroot);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001642 }
1643 if (c->pid_file_path) {
1644 if (unlink(c->pid_file_path))
1645 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001646 FREE_AND_NULL(c->pid_file_path);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001647 }
1648 if (c->runfs) {
1649 if (rmdir(c->runfs))
1650 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001651 FREE_AND_NULL(c->runfs);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001652 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001653 return ret;
1654}
1655
1656int container_wait(struct container *c)
1657{
Dylan Reidcf745c52016-04-22 10:18:03 -07001658 int rc;
1659
1660 do {
1661 rc = minijail_wait(c->jail);
Luis Hector Chavez4641e852016-06-02 15:40:19 -07001662 } while (rc == -EINTR);
Dylan Reidcf745c52016-04-22 10:18:03 -07001663
Luis Hector Chavez945af482016-06-03 08:39:34 -07001664 // If the process had already been reaped, still perform teardown.
1665 if (rc == -ECHILD || rc >= 0) {
Dylan Reidcf745c52016-04-22 10:18:03 -07001666 rc = container_teardown(c);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001667 }
Dylan Reidcf745c52016-04-22 10:18:03 -07001668 return rc;
Dylan Reid837c74a2016-01-22 17:25:21 -08001669}
1670
1671int container_kill(struct container *c)
1672{
Luis Hector Chavez945af482016-06-03 08:39:34 -07001673 if (kill(c->init_pid, SIGKILL) && errno != ESRCH)
Dylan Reid837c74a2016-01-22 17:25:21 -08001674 return -errno;
1675 return container_wait(c);
1676}