blob: b71739cc35173b161505687229e9fcfcfd5cbe08 [file] [log] [blame]
Dylan Reid837c74a2016-01-22 17:25:21 -08001/* Copyright 2016 The Chromium OS Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file.
4 */
5
6#define _GNU_SOURCE /* For asprintf */
7
8#include <errno.h>
9#include <fcntl.h>
10#include <malloc.h>
11#include <signal.h>
12#include <stdio.h>
13#include <stdlib.h>
14#include <string.h>
15#include <sys/mount.h>
16#include <sys/stat.h>
17#include <sys/types.h>
Dylan Reid2bd9ea92016-04-07 20:57:47 -070018#include <sys/wait.h>
Dylan Reid837c74a2016-01-22 17:25:21 -080019#include <unistd.h>
20
Mike Frysinger412dbd22017-01-06 01:50:34 -050021#include <linux/loop.h>
22
Dylan Reid837c74a2016-01-22 17:25:21 -080023#include "container_cgroup.h"
24#include "libcontainer.h"
25#include "libminijail.h"
26
Luis Hector Chavez479b95f2016-06-06 08:01:05 -070027#define FREE_AND_NULL(ptr) \
28do { \
29 free(ptr); \
30 ptr = NULL; \
31} while(0)
32
Yusuke Sato91f11f02016-12-02 16:15:13 -080033#define MAX_NUM_SETFILES_ARGS 128
34
Mike Frysinger412dbd22017-01-06 01:50:34 -050035static const char loopdev_ctl[] = "/dev/loop-control";
36
Luis Hector Chavez945af482016-06-03 08:39:34 -070037static int container_teardown(struct container *c);
38
Luis Hector Chavez479b95f2016-06-06 08:01:05 -070039static int strdup_and_free(char **dest, const char *src)
40{
41 char *copy = strdup(src);
42 if (!copy)
43 return -ENOMEM;
44 if (*dest)
45 free(*dest);
46 *dest = copy;
47 return 0;
48}
49
Dylan Reid837c74a2016-01-22 17:25:21 -080050struct container_mount {
51 char *name;
52 char *source;
53 char *destination;
54 char *type;
55 char *data;
56 int flags;
57 int uid;
58 int gid;
59 int mode;
60 int mount_in_ns; /* True if mount should happen in new vfs ns */
61 int create; /* True if target should be created if it doesn't exist */
Mike Frysinger412dbd22017-01-06 01:50:34 -050062 int loopback; /* True if target should be mounted via loopback */
Dylan Reid837c74a2016-01-22 17:25:21 -080063};
64
65struct container_device {
66 char type; /* 'c' or 'b' for char or block */
67 char *path;
68 int fs_permissions;
69 int major;
70 int minor;
Dylan Reid355d5e42016-04-29 16:53:31 -070071 int copy_minor; /* Copy the minor from existing node, ignores |minor| */
Dylan Reid837c74a2016-01-22 17:25:21 -080072 int uid;
73 int gid;
74 int read_allowed;
75 int write_allowed;
76 int modify_allowed;
77};
78
Chinyue Chenfac909e2016-06-24 14:17:42 +080079struct container_cpu_cgroup {
80 int shares;
81 int quota;
82 int period;
83 int rt_runtime;
84 int rt_period;
85};
86
Dylan Reid837c74a2016-01-22 17:25:21 -080087/*
88 * Structure that configures how the container is run.
89 *
Mike Frysingerb22acdf2017-01-08 02:02:35 -050090 * config_root - Path to the root of the container itself.
Dylan Reid837c74a2016-01-22 17:25:21 -080091 * rootfs - Path to the root of the container's filesystem.
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -070092 * rootfs_mount_flags - Flags that will be passed to mount() for the rootfs.
Keshav Santhanam0e4c3282016-07-14 10:25:16 -070093 * premounted_runfs - Path to where the container will be run.
94 * pid_file_path - Path to the file where the pid should be written.
Dylan Reid837c74a2016-01-22 17:25:21 -080095 * program_argv - The program to run and args, e.g. "/sbin/init".
96 * num_args - Number of args in program_argv.
Dylan Reid1874feb2016-06-22 17:53:50 -070097 * uid - The uid the container will run as.
Dylan Reid837c74a2016-01-22 17:25:21 -080098 * uid_map - Mapping of UIDs in the container, e.g. "0 100000 1024"
Dylan Reid1874feb2016-06-22 17:53:50 -070099 * gid - The gid the container will run as.
Dylan Reid837c74a2016-01-22 17:25:21 -0800100 * gid_map - Mapping of GIDs in the container, e.g. "0 100000 1024"
101 * alt_syscall_table - Syscall table to use or NULL if none.
102 * mounts - Filesystems to mount in the new namespace.
103 * num_mounts - Number of above.
104 * devices - Device nodes to create.
105 * num_devices - Number of above.
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700106 * run_setfiles - Should run setfiles on mounts to enable selinux.
Chinyue Chenfac909e2016-06-24 14:17:42 +0800107 * cpu_cgparams - CPU cgroup params.
Dylan Reid9e724af2016-07-21 09:58:07 -0700108 * cgroup_parent - Parent dir for cgroup creation
109 * cgroup_owner - uid to own the created cgroups
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700110 * cgroup_group - gid to own the created cgroups
Keshav Santhanam1b6bf672016-08-10 18:35:12 -0700111 * share_host_netns - Enable sharing of the host network namespace.
Dylan Reidc4335842016-11-11 10:24:52 -0800112 * keep_fds_open - Allow the child process to keep open FDs (for stdin/out/err).
Dylan Reid837c74a2016-01-22 17:25:21 -0800113 */
114struct container_config {
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500115 char *config_root;
Dylan Reid837c74a2016-01-22 17:25:21 -0800116 char *rootfs;
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -0700117 unsigned long rootfs_mount_flags;
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700118 char *premounted_runfs;
119 char *pid_file_path;
Dylan Reid837c74a2016-01-22 17:25:21 -0800120 char **program_argv;
121 size_t num_args;
Dylan Reid1874feb2016-06-22 17:53:50 -0700122 uid_t uid;
Dylan Reid837c74a2016-01-22 17:25:21 -0800123 char *uid_map;
Dylan Reid1874feb2016-06-22 17:53:50 -0700124 gid_t gid;
Dylan Reid837c74a2016-01-22 17:25:21 -0800125 char *gid_map;
126 char *alt_syscall_table;
127 struct container_mount *mounts;
128 size_t num_mounts;
129 struct container_device *devices;
130 size_t num_devices;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700131 char *run_setfiles;
Chinyue Chenfac909e2016-06-24 14:17:42 +0800132 struct container_cpu_cgroup cpu_cgparams;
Dylan Reid9e724af2016-07-21 09:58:07 -0700133 char *cgroup_parent;
134 uid_t cgroup_owner;
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700135 gid_t cgroup_group;
Keshav Santhanam1b6bf672016-08-10 18:35:12 -0700136 int share_host_netns;
Dylan Reidc4335842016-11-11 10:24:52 -0800137 int keep_fds_open;
Dylan Reid837c74a2016-01-22 17:25:21 -0800138};
139
140struct container_config *container_config_create()
141{
142 return calloc(1, sizeof(struct container_config));
143}
144
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700145static void container_free_program_args(struct container_config *c)
146{
147 int i;
148
149 if (!c->program_argv)
150 return;
151 for (i = 0; i < c->num_args; ++i) {
152 FREE_AND_NULL(c->program_argv[i]);
153 }
154 FREE_AND_NULL(c->program_argv);
155}
156
157static void container_config_free_mount(struct container_mount *mount)
158{
159 FREE_AND_NULL(mount->name);
160 FREE_AND_NULL(mount->source);
161 FREE_AND_NULL(mount->destination);
162 FREE_AND_NULL(mount->type);
163 FREE_AND_NULL(mount->data);
164}
165
166static void container_config_free_device(struct container_device *device)
167{
168 FREE_AND_NULL(device->path);
169}
170
Dylan Reid837c74a2016-01-22 17:25:21 -0800171void container_config_destroy(struct container_config *c)
172{
173 size_t i;
174
175 if (c == NULL)
176 return;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700177 FREE_AND_NULL(c->rootfs);
178 container_free_program_args(c);
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700179 FREE_AND_NULL(c->premounted_runfs);
180 FREE_AND_NULL(c->pid_file_path);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700181 FREE_AND_NULL(c->uid_map);
182 FREE_AND_NULL(c->gid_map);
183 FREE_AND_NULL(c->alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -0800184 for (i = 0; i < c->num_mounts; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700185 container_config_free_mount(&c->mounts[i]);
Dylan Reid837c74a2016-01-22 17:25:21 -0800186 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700187 FREE_AND_NULL(c->mounts);
Dylan Reid837c74a2016-01-22 17:25:21 -0800188 for (i = 0; i < c->num_devices; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700189 container_config_free_device(&c->devices[i]);
Dylan Reid837c74a2016-01-22 17:25:21 -0800190 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700191 FREE_AND_NULL(c->devices);
192 FREE_AND_NULL(c->run_setfiles);
Dylan Reid9e724af2016-07-21 09:58:07 -0700193 FREE_AND_NULL(c->cgroup_parent);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700194 FREE_AND_NULL(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800195}
196
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500197int container_config_config_root(struct container_config *c,
198 const char *config_root)
199{
200 return strdup_and_free(&c->config_root, config_root);
201}
202
203const char *container_config_get_config_root(const struct container_config *c)
204{
205 return c->config_root;
206}
207
Dylan Reid837c74a2016-01-22 17:25:21 -0800208int container_config_rootfs(struct container_config *c, const char *rootfs)
209{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700210 return strdup_and_free(&c->rootfs, rootfs);
Dylan Reid837c74a2016-01-22 17:25:21 -0800211}
212
Dylan Reid11456722016-05-02 11:24:50 -0700213const char *container_config_get_rootfs(const struct container_config *c)
214{
215 return c->rootfs;
216}
217
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -0700218void container_config_rootfs_mount_flags(struct container_config *c,
219 unsigned long rootfs_mount_flags)
220{
221 /* Since we are going to add MS_REMOUNT anyways, add it here so we can
222 * simply check against zero later. MS_BIND is also added to avoid
223 * re-mounting the original filesystem, since the rootfs is always
224 * bind-mounted.
225 */
226 c->rootfs_mount_flags = MS_REMOUNT | MS_BIND | rootfs_mount_flags;
227}
228
229unsigned long container_config_get_rootfs_mount_flags(
230 const struct container_config *c)
231{
232 return c->rootfs_mount_flags;
233}
234
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700235int container_config_premounted_runfs(struct container_config *c, const char *runfs)
236{
237 return strdup_and_free(&c->premounted_runfs, runfs);
238}
239
240const char *container_config_get_premounted_runfs(const struct container_config *c)
241{
242 return c->premounted_runfs;
243}
244
245int container_config_pid_file(struct container_config *c, const char *path)
246{
247 return strdup_and_free(&c->pid_file_path, path);
248}
249
250const char *container_config_get_pid_file(const struct container_config *c)
251{
252 return c->pid_file_path;
253}
254
Dylan Reid837c74a2016-01-22 17:25:21 -0800255int container_config_program_argv(struct container_config *c,
Dylan Reid17fd53f2016-11-18 19:14:41 -0800256 const char **argv, size_t num_args)
Dylan Reid837c74a2016-01-22 17:25:21 -0800257{
258 size_t i;
259
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700260 container_free_program_args(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800261 c->num_args = num_args;
262 c->program_argv = calloc(num_args + 1, sizeof(char *));
263 if (!c->program_argv)
264 return -ENOMEM;
265 for (i = 0; i < num_args; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700266 if (strdup_and_free(&c->program_argv[i], argv[i]))
267 goto error_free_return;
Dylan Reid837c74a2016-01-22 17:25:21 -0800268 }
269 c->program_argv[num_args] = NULL;
270 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700271
272error_free_return:
273 container_free_program_args(c);
274 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800275}
276
Dylan Reid11456722016-05-02 11:24:50 -0700277size_t container_config_get_num_program_args(const struct container_config *c)
278{
279 return c->num_args;
280}
281
282const char *container_config_get_program_arg(const struct container_config *c,
283 size_t index)
284{
285 if (index >= c->num_args)
286 return NULL;
287 return c->program_argv[index];
288}
289
Dylan Reid1874feb2016-06-22 17:53:50 -0700290void container_config_uid(struct container_config *c, uid_t uid)
291{
292 c->uid = uid;
293}
294
295uid_t container_config_get_uid(const struct container_config *c)
296{
297 return c->uid;
298}
299
Dylan Reid837c74a2016-01-22 17:25:21 -0800300int container_config_uid_map(struct container_config *c, const char *uid_map)
301{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700302 return strdup_and_free(&c->uid_map, uid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800303}
304
Dylan Reid1874feb2016-06-22 17:53:50 -0700305void container_config_gid(struct container_config *c, gid_t gid)
306{
307 c->gid = gid;
308}
309
310gid_t container_config_get_gid(const struct container_config *c)
311{
312 return c->gid;
313}
314
Dylan Reid837c74a2016-01-22 17:25:21 -0800315int container_config_gid_map(struct container_config *c, const char *gid_map)
316{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700317 return strdup_and_free(&c->gid_map, gid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800318}
319
320int container_config_alt_syscall_table(struct container_config *c,
321 const char *alt_syscall_table)
322{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700323 return strdup_and_free(&c->alt_syscall_table, alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -0800324}
325
326int container_config_add_mount(struct container_config *c,
327 const char *name,
328 const char *source,
329 const char *destination,
330 const char *type,
331 const char *data,
332 int flags,
333 int uid,
334 int gid,
335 int mode,
336 int mount_in_ns,
Mike Frysinger412dbd22017-01-06 01:50:34 -0500337 int create,
338 int loopback)
Dylan Reid837c74a2016-01-22 17:25:21 -0800339{
340 struct container_mount *mount_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700341 struct container_mount *current_mount;
Dylan Reid837c74a2016-01-22 17:25:21 -0800342
343 if (name == NULL || source == NULL ||
344 destination == NULL || type == NULL)
345 return -EINVAL;
346
347 mount_ptr = realloc(c->mounts,
348 sizeof(c->mounts[0]) * (c->num_mounts + 1));
349 if (!mount_ptr)
350 return -ENOMEM;
351 c->mounts = mount_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700352 current_mount = &c->mounts[c->num_mounts];
353 memset(current_mount, 0, sizeof(struct container_mount));
354
355 if (strdup_and_free(&current_mount->name, name))
356 goto error_free_return;
357 if (strdup_and_free(&current_mount->source, source))
358 goto error_free_return;
359 if (strdup_and_free(&current_mount->destination, destination))
360 goto error_free_return;
361 if (strdup_and_free(&current_mount->type, type))
362 goto error_free_return;
363 if (data && strdup_and_free(&current_mount->data, data))
364 goto error_free_return;
365 current_mount->flags = flags;
366 current_mount->uid = uid;
367 current_mount->gid = gid;
368 current_mount->mode = mode;
369 current_mount->mount_in_ns = mount_in_ns;
370 current_mount->create = create;
Mike Frysinger412dbd22017-01-06 01:50:34 -0500371 current_mount->loopback = loopback;
Dylan Reid837c74a2016-01-22 17:25:21 -0800372 ++c->num_mounts;
373 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700374
375error_free_return:
376 container_config_free_mount(current_mount);
377 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800378}
379
380int container_config_add_device(struct container_config *c,
381 char type,
382 const char *path,
383 int fs_permissions,
384 int major,
385 int minor,
Dylan Reid355d5e42016-04-29 16:53:31 -0700386 int copy_minor,
Dylan Reid837c74a2016-01-22 17:25:21 -0800387 int uid,
388 int gid,
389 int read_allowed,
390 int write_allowed,
391 int modify_allowed)
392{
393 struct container_device *dev_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700394 struct container_device *current_dev;
Dylan Reid837c74a2016-01-22 17:25:21 -0800395
396 if (path == NULL)
397 return -EINVAL;
Dylan Reid355d5e42016-04-29 16:53:31 -0700398 /* If using a dynamic minor number, ensure that minor is -1. */
399 if (copy_minor && (minor != -1))
400 return -EINVAL;
401
Dylan Reid837c74a2016-01-22 17:25:21 -0800402 dev_ptr = realloc(c->devices,
403 sizeof(c->devices[0]) * (c->num_devices + 1));
404 if (!dev_ptr)
405 return -ENOMEM;
406 c->devices = dev_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700407 current_dev = &c->devices[c->num_devices];
408 memset(current_dev, 0, sizeof(struct container_device));
409
410 current_dev->type = type;
411 if (strdup_and_free(&current_dev->path, path))
412 goto error_free_return;
413 current_dev->fs_permissions = fs_permissions;
414 current_dev->major = major;
415 current_dev->minor = minor;
416 current_dev->copy_minor = copy_minor;
417 current_dev->uid = uid;
418 current_dev->gid = gid;
419 current_dev->read_allowed = read_allowed;
420 current_dev->write_allowed = write_allowed;
421 current_dev->modify_allowed = modify_allowed;
Dylan Reid837c74a2016-01-22 17:25:21 -0800422 ++c->num_devices;
423 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700424
425error_free_return:
426 container_config_free_device(current_dev);
427 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800428}
429
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700430int container_config_run_setfiles(struct container_config *c,
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700431 const char *setfiles_cmd)
432{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700433 return strdup_and_free(&c->run_setfiles, setfiles_cmd);
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700434}
Dylan Reid837c74a2016-01-22 17:25:21 -0800435
Dylan Reid11456722016-05-02 11:24:50 -0700436const char *container_config_get_run_setfiles(const struct container_config *c)
437{
438 return c->run_setfiles;
439}
440
Chinyue Chenfac909e2016-06-24 14:17:42 +0800441int container_config_set_cpu_shares(struct container_config *c, int shares)
442{
443 /* CPU shares must be 2 or higher. */
444 if (shares < 2)
445 return -EINVAL;
446
447 c->cpu_cgparams.shares = shares;
448 return 0;
449}
450
451int container_config_set_cpu_cfs_params(struct container_config *c,
452 int quota,
453 int period)
454{
455 /*
456 * quota could be set higher than period to utilize more than one CPU.
457 * quota could also be set as -1 to indicate the cgroup does not adhere
458 * to any CPU time restrictions.
459 */
460 if (quota <= 0 && quota != -1)
461 return -EINVAL;
462 if (period <= 0)
463 return -EINVAL;
464
465 c->cpu_cgparams.quota = quota;
466 c->cpu_cgparams.period = period;
467 return 0;
468}
469
470int container_config_set_cpu_rt_params(struct container_config *c,
471 int rt_runtime,
472 int rt_period)
473{
474 /*
475 * rt_runtime could be set as 0 to prevent the cgroup from using
476 * realtime CPU.
477 */
478 if (rt_runtime < 0 || rt_runtime >= rt_period)
479 return -EINVAL;
480
481 c->cpu_cgparams.rt_runtime = rt_runtime;
482 c->cpu_cgparams.rt_period = rt_period;
483 return 0;
484}
485
Chinyue Chen4f3fd682016-07-01 14:11:42 +0800486int container_config_get_cpu_shares(struct container_config *c)
487{
488 return c->cpu_cgparams.shares;
489}
490
491int container_config_get_cpu_quota(struct container_config *c)
492{
493 return c->cpu_cgparams.quota;
494}
495
496int container_config_get_cpu_period(struct container_config *c)
497{
498 return c->cpu_cgparams.period;
499}
500
501int container_config_get_cpu_rt_runtime(struct container_config *c)
502{
503 return c->cpu_cgparams.rt_runtime;
504}
505
506int container_config_get_cpu_rt_period(struct container_config *c)
507{
508 return c->cpu_cgparams.rt_period;
509}
510
Dylan Reid9e724af2016-07-21 09:58:07 -0700511int container_config_set_cgroup_parent(struct container_config *c,
512 const char *parent,
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700513 uid_t cgroup_owner, gid_t cgroup_group)
Dylan Reid9e724af2016-07-21 09:58:07 -0700514{
515 c->cgroup_owner = cgroup_owner;
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700516 c->cgroup_group = cgroup_group;
Dylan Reid9e724af2016-07-21 09:58:07 -0700517 return strdup_and_free(&c->cgroup_parent, parent);
518}
519
520const char *container_config_get_cgroup_parent(struct container_config *c)
521{
522 return c->cgroup_parent;
523}
524
Keshav Santhanam1b6bf672016-08-10 18:35:12 -0700525void container_config_share_host_netns(struct container_config *c)
526{
527 c->share_host_netns = 1;
528}
529
530int get_container_config_share_host_netns(struct container_config *c)
531{
532 return c->share_host_netns;
533}
534
Dylan Reidc4335842016-11-11 10:24:52 -0800535void container_config_keep_fds_open(struct container_config *c)
536{
537 c->keep_fds_open = 1;
538}
539
Dylan Reid837c74a2016-01-22 17:25:21 -0800540/*
541 * Container manipulation
542 */
543struct container {
Dylan Reid837c74a2016-01-22 17:25:21 -0800544 struct container_cgroup *cgroup;
545 struct minijail *jail;
546 pid_t init_pid;
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500547 char *config_root;
Dylan Reid837c74a2016-01-22 17:25:21 -0800548 char *runfs;
549 char *rundir;
550 char *runfsroot;
551 char *pid_file_path;
Dylan Reide040c6b2016-05-02 18:49:02 -0700552 char **ext_mounts; /* Mounts made outside of the minijail */
553 size_t num_ext_mounts;
Mike Frysinger412dbd22017-01-06 01:50:34 -0500554 char **loopdevs;
555 size_t num_loopdevs;
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700556 char *name;
Dylan Reid837c74a2016-01-22 17:25:21 -0800557};
558
559struct container *container_new(const char *name,
Dylan Reide040c6b2016-05-02 18:49:02 -0700560 const char *rundir)
Dylan Reid837c74a2016-01-22 17:25:21 -0800561{
562 struct container *c;
563
Dylan Reid837c74a2016-01-22 17:25:21 -0800564 c = calloc(1, sizeof(*c));
Dylan Reidb435c682016-04-12 04:17:49 -0700565 if (!c)
566 return NULL;
Dylan Reid837c74a2016-01-22 17:25:21 -0800567 c->rundir = strdup(rundir);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700568 c->name = strdup(name);
Dylan Reida9966422016-07-21 10:11:34 -0700569 if (!c->rundir || !c->name) {
Dylan Reid684975e2016-05-02 15:44:47 -0700570 container_destroy(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800571 return NULL;
Dylan Reidb435c682016-04-12 04:17:49 -0700572 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800573 return c;
574}
575
576void container_destroy(struct container *c)
577{
Dylan Reid684975e2016-05-02 15:44:47 -0700578 if (c->cgroup)
579 container_cgroup_destroy(c->cgroup);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700580 if (c->jail)
581 minijail_destroy(c->jail);
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500582 FREE_AND_NULL(c->config_root);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700583 FREE_AND_NULL(c->name);
584 FREE_AND_NULL(c->rundir);
585 FREE_AND_NULL(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800586}
587
588static int make_dir(const char *path, int uid, int gid, int mode)
589{
590 if (mkdir(path, mode))
591 return -errno;
592 if (chmod(path, mode))
593 return -errno;
594 if (chown(path, uid, gid))
595 return -errno;
596 return 0;
597}
598
599static int touch_file(const char *path, int uid, int gid, int mode)
600{
601 int rc;
602 int fd = open(path, O_RDWR | O_CREAT, mode);
603 if (fd < 0)
604 return -errno;
605 rc = fchown(fd, uid, gid);
606 close(fd);
607
608 if (rc)
609 return -errno;
610 return 0;
611}
612
613/* Make sure the mount target exists in the new rootfs. Create if needed and
614 * possible.
615 */
616static int setup_mount_destination(const struct container_mount *mnt,
Dylan Reid2149be92016-04-28 18:38:57 -0700617 const char *source,
Dylan Reid837c74a2016-01-22 17:25:21 -0800618 const char *dest)
619{
620 int rc;
621 struct stat st_buf;
622
623 rc = stat(dest, &st_buf);
624 if (rc == 0) /* destination exists */
625 return 0;
626
627 /* Try to create the destination. Either make directory or touch a file
628 * depending on the source type.
629 */
Dylan Reid2149be92016-04-28 18:38:57 -0700630 rc = stat(source, &st_buf);
Dylan Reid837c74a2016-01-22 17:25:21 -0800631 if (rc || S_ISDIR(st_buf.st_mode) || S_ISBLK(st_buf.st_mode))
632 return make_dir(dest, mnt->uid, mnt->gid, mnt->mode);
633
634 return touch_file(dest, mnt->uid, mnt->gid, mnt->mode);
635}
636
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700637/* Fork and exec the setfiles command to configure the selinux policy. */
Dylan Reide040c6b2016-05-02 18:49:02 -0700638static int run_setfiles_command(const struct container *c,
639 const struct container_config *config,
Yusuke Sato91f11f02016-12-02 16:15:13 -0800640 char *const *destinations, size_t num_destinations)
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700641{
642 int rc;
643 int status;
644 int pid;
645 char *context_path;
646
Dylan Reide040c6b2016-05-02 18:49:02 -0700647 if (!config->run_setfiles)
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700648 return 0;
649
650 if (asprintf(&context_path, "%s/file_contexts",
651 c->runfsroot) < 0)
652 return -errno;
653
654 pid = fork();
655 if (pid == 0) {
Yusuke Sato91f11f02016-12-02 16:15:13 -0800656 size_t i;
657 size_t arg_index = 0;
658 const char *argv[MAX_NUM_SETFILES_ARGS];
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700659 const char *env[] = {
660 NULL,
661 };
662
Yusuke Sato91f11f02016-12-02 16:15:13 -0800663 argv[arg_index++] = config->run_setfiles;
664 argv[arg_index++] = "-r";
665 argv[arg_index++] = c->runfsroot;
666 argv[arg_index++] = context_path;
667 if (arg_index + num_destinations >= MAX_NUM_SETFILES_ARGS)
668 _exit(-E2BIG);
669 for (i = 0; i < num_destinations; ++i) {
670 argv[arg_index++] = destinations[i];
671 }
672 argv[arg_index] = NULL;
673
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700674 execve(argv[0], (char *const*)argv, (char *const*)env);
675
676 /* Command failed to exec if execve returns. */
677 _exit(-errno);
678 }
679 free(context_path);
680 if (pid < 0)
681 return -errno;
682 do {
683 rc = waitpid(pid, &status, 0);
684 } while (rc == -1 && errno == EINTR);
685 if (rc < 0)
686 return -errno;
687 return status;
688}
689
Mike Frysinger412dbd22017-01-06 01:50:34 -0500690/* Find a free loop device and attach it. */
691static int loopdev_setup(char **loopdev_ret, const char *source)
692{
693 int ret = 0;
694 int source_fd = -1;
695 int control_fd = -1;
696 int loop_fd = -1;
697 char *loopdev = NULL;
698
699 source_fd = open(source, O_RDONLY|O_CLOEXEC);
700 if (source_fd < 0)
701 goto error;
702
703 control_fd = open(loopdev_ctl, O_RDWR|O_NOFOLLOW|O_CLOEXEC);
704 if (control_fd < 0)
705 goto error;
706
707 while (1) {
708 int num = ioctl(control_fd, LOOP_CTL_GET_FREE);
709 if (num < 0)
710 goto error;
711
712 if (asprintf(&loopdev, "/dev/loop%i", num) < 0)
713 goto error;
714
715 loop_fd = open(loopdev, O_RDONLY|O_NOFOLLOW|O_CLOEXEC);
716 if (loop_fd < 0)
717 goto error;
718
719 if (ioctl(loop_fd, LOOP_SET_FD, source_fd) == 0)
720 break;
721
722 if (errno != EBUSY)
723 goto error;
724
725 /* Clean up resources for the next pass. */
726 free(loopdev);
727 close(loop_fd);
728 }
729
730 *loopdev_ret = loopdev;
731 goto exit;
732
733error:
734 ret = -errno;
735 free(loopdev);
736exit:
737 if (source_fd != -1)
738 close(source_fd);
739 if (control_fd != -1)
740 close(control_fd);
741 if (loop_fd != -1)
742 close(loop_fd);
743 return ret;
744}
745
746/* Detach the specified loop device. */
747static int loopdev_detach(const char *loopdev)
748{
749 int ret = 0;
750 int fd;
751
752 fd = open(loopdev, O_RDONLY|O_NOFOLLOW|O_CLOEXEC);
753 if (fd < 0)
754 goto error;
755 if (ioctl(fd, LOOP_CLR_FD) < 0)
756 goto error;
757
758 goto exit;
759
760error:
761 ret = -errno;
762exit:
763 if (fd != -1)
764 close(fd);
765 return ret;
766}
767
Dylan Reide040c6b2016-05-02 18:49:02 -0700768/*
769 * Unmounts anything we mounted in this mount namespace in the opposite order
770 * that they were mounted.
771 */
772static int unmount_external_mounts(struct container *c)
773{
774 int ret = 0;
775
776 while (c->num_ext_mounts) {
777 c->num_ext_mounts--;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700778 if (!c->ext_mounts[c->num_ext_mounts])
779 continue;
Dylan Reide040c6b2016-05-02 18:49:02 -0700780 if (umount(c->ext_mounts[c->num_ext_mounts]))
781 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700782 FREE_AND_NULL(c->ext_mounts[c->num_ext_mounts]);
Dylan Reide040c6b2016-05-02 18:49:02 -0700783 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700784 FREE_AND_NULL(c->ext_mounts);
Mike Frysinger412dbd22017-01-06 01:50:34 -0500785
786 while (c->num_loopdevs) {
787 c->num_loopdevs--;
788 if (loopdev_detach(c->loopdevs[c->num_loopdevs]))
789 ret = -errno;
790 FREE_AND_NULL(c->loopdevs[c->num_loopdevs]);
791 }
792 FREE_AND_NULL(c->loopdevs);
793
Dylan Reide040c6b2016-05-02 18:49:02 -0700794 return ret;
795}
796
Junichi Uekawa5d272772016-07-21 16:07:19 +0900797/*
798 * Match mount_one in minijail, mount one mountpoint with
799 * consideration for combination of MS_BIND/MS_RDONLY flag.
800 */
801static int mount_external(const char *src, const char *dest, const char *type,
802 unsigned long flags, const void *data)
803{
804 int remount_ro = 0;
805
806 /*
807 * R/O bind mounts have to be remounted since 'bind' and 'ro'
808 * can't both be specified in the original bind mount.
809 * Remount R/O after the initial mount.
810 */
811 if ((flags & MS_BIND) && (flags & MS_RDONLY)) {
812 remount_ro = 1;
813 flags &= ~MS_RDONLY;
814 }
815
816 if (mount(src, dest, type, flags, data) == -1)
817 return -1;
818
819 if (remount_ro) {
820 flags |= MS_RDONLY;
821 if (mount(src, dest, NULL, flags | MS_REMOUNT, data) == -1)
822 return -1;
823 }
824
825 return 0;
826}
827
Luis Hector Chavez3341ed62016-06-06 08:04:04 -0700828static int do_container_mount(struct container *c,
829 const struct container_mount *mnt)
830{
Mike Frysinger412dbd22017-01-06 01:50:34 -0500831 char *loop_source = NULL;
Luis Hector Chavez3341ed62016-06-06 08:04:04 -0700832 char *source = NULL;
833 char *dest = NULL;
834 int rc = 0;
835
836 if (asprintf(&dest, "%s%s", c->runfsroot, mnt->destination) < 0)
837 return -errno;
838
839 /*
840 * If it's a bind mount relative to rootfs, append source to
841 * rootfs path, otherwise source path is absolute.
842 */
843 if ((mnt->flags & MS_BIND) && mnt->source[0] != '/') {
844 if (asprintf(&source, "%s/%s", c->runfsroot, mnt->source) < 0)
845 goto error_free_return;
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500846 } else if (mnt->loopback && mnt->source[0] != '/' && c->config_root) {
847 if (asprintf(&source, "%s/%s", c->config_root, mnt->source) < 0)
848 goto error_free_return;
Luis Hector Chavez3341ed62016-06-06 08:04:04 -0700849 } else {
850 if (asprintf(&source, "%s", mnt->source) < 0)
851 goto error_free_return;
852 }
853
854 if (mnt->create) {
855 rc = setup_mount_destination(mnt, source, dest);
856 if (rc)
857 goto error_free_return;
858 }
Mike Frysinger412dbd22017-01-06 01:50:34 -0500859 if (mnt->loopback) {
860 /* Record this loopback file for cleanup later. */
861 loop_source = source;
862 source = NULL;
863 rc = loopdev_setup(&source, loop_source);
864 if (rc)
865 goto error_free_return;
866
867 /* Save this to unmount when shutting down. */
868 rc = strdup_and_free(&c->loopdevs[c->num_loopdevs], source);
869 if (rc)
870 goto error_free_return;
871 c->num_loopdevs++;
872 }
Luis Hector Chavez3341ed62016-06-06 08:04:04 -0700873 if (mnt->mount_in_ns) {
874 /* We can mount this with minijail. */
Dylan Reid36b9c012016-06-24 18:27:08 -0700875 rc = minijail_mount_with_data(c->jail, source, mnt->destination,
876 mnt->type, mnt->flags, mnt->data);
Luis Hector Chavez3341ed62016-06-06 08:04:04 -0700877 if (rc)
878 goto error_free_return;
879 } else {
880 /* Mount this externally and unmount it on exit. */
Junichi Uekawa5d272772016-07-21 16:07:19 +0900881 if (mount_external(source, dest, mnt->type, mnt->flags,
882 mnt->data))
Luis Hector Chavez3341ed62016-06-06 08:04:04 -0700883 goto error_free_return;
884 /* Save this to unmount when shutting down. */
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700885 rc = strdup_and_free(&c->ext_mounts[c->num_ext_mounts], dest);
886 if (rc)
Luis Hector Chavez3341ed62016-06-06 08:04:04 -0700887 goto error_free_return;
888 c->num_ext_mounts++;
889 }
890
891 goto exit;
892
893error_free_return:
894 if (!rc)
895 rc = -errno;
896exit:
Mike Frysinger412dbd22017-01-06 01:50:34 -0500897 free(loop_source);
Luis Hector Chavez3341ed62016-06-06 08:04:04 -0700898 free(source);
899 free(dest);
900 return rc;
901}
902
Dylan Reide040c6b2016-05-02 18:49:02 -0700903static int do_container_mounts(struct container *c,
904 const struct container_config *config)
Dylan Reid7daf9982016-04-28 16:55:42 -0700905{
906 unsigned int i;
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700907 int rc = 0;
Dylan Reid7daf9982016-04-28 16:55:42 -0700908
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700909 unmount_external_mounts(c);
Dylan Reide040c6b2016-05-02 18:49:02 -0700910 /*
911 * Allocate space to track anything we mount in our mount namespace.
912 * This over-allocates as it has space for all mounts.
913 */
914 c->ext_mounts = calloc(config->num_mounts, sizeof(*c->ext_mounts));
915 if (!c->ext_mounts)
916 return -errno;
Mike Frysinger412dbd22017-01-06 01:50:34 -0500917 c->loopdevs = calloc(config->num_mounts, sizeof(*c->loopdevs));
918 if (!c->loopdevs)
919 return -errno;
Dylan Reide040c6b2016-05-02 18:49:02 -0700920
921 for (i = 0; i < config->num_mounts; ++i) {
Luis Hector Chavez3341ed62016-06-06 08:04:04 -0700922 rc = do_container_mount(c, &config->mounts[i]);
923 if (rc)
924 goto error_free_return;
Dylan Reid7daf9982016-04-28 16:55:42 -0700925 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700926
Dylan Reid7daf9982016-04-28 16:55:42 -0700927 return 0;
Dylan Reid2149be92016-04-28 18:38:57 -0700928
929error_free_return:
Dylan Reide040c6b2016-05-02 18:49:02 -0700930 unmount_external_mounts(c);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700931 return rc;
Dylan Reid7daf9982016-04-28 16:55:42 -0700932}
933
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700934static int container_create_device(const struct container *c,
935 const struct container_device *dev,
936 int minor)
937{
938 char *path = NULL;
939 int rc = 0;
940 int mode;
941
942 switch (dev->type) {
943 case 'b':
944 mode = S_IFBLK;
945 break;
946 case 'c':
947 mode = S_IFCHR;
948 break;
949 default:
950 return -EINVAL;
951 }
952 mode |= dev->fs_permissions;
953
954 if (asprintf(&path, "%s%s", c->runfsroot, dev->path) < 0)
955 goto error_free_return;
956 if (mknod(path, mode, makedev(dev->major, minor)) && errno != EEXIST)
957 goto error_free_return;
958 if (chown(path, dev->uid, dev->gid))
959 goto error_free_return;
960 if (chmod(path, dev->fs_permissions))
961 goto error_free_return;
962
963 goto exit;
964
965error_free_return:
966 rc = -errno;
967exit:
968 free(path);
969 return rc;
970}
971
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700972static int mount_runfs(struct container *c, const struct container_config *config)
Dylan Reid837c74a2016-01-22 17:25:21 -0800973{
Dylan Reidb3621832016-03-24 10:24:57 -0700974 static const mode_t root_dir_mode = 0660;
Dylan Reide040c6b2016-05-02 18:49:02 -0700975 const char *rootfs = config->rootfs;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700976 char *runfs_template = NULL;
Dylan Reid837c74a2016-01-22 17:25:21 -0800977
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700978 if (asprintf(&runfs_template, "%s/%s_XXXXXX", c->rundir, c->name) < 0)
979 return -ENOMEM;
980
981 c->runfs = mkdtemp(runfs_template);
982 if (!c->runfs) {
983 free(runfs_template);
984 return -errno;
985 }
986
987 /* Make sure the container uid can access the rootfs. */
988 if (chmod(c->runfs, 0700))
989 return -errno;
990 if (chown(c->runfs, config->uid, config->gid))
991 return -errno;
992
993 if (asprintf(&c->runfsroot, "%s/root", c->runfs) < 0)
994 return -errno;
995
996 if (mkdir(c->runfsroot, root_dir_mode))
997 return -errno;
998 if (chmod(c->runfsroot, root_dir_mode))
999 return -errno;
1000
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -07001001 if (mount(rootfs, c->runfsroot, "", MS_BIND, NULL))
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001002 return -errno;
1003
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -07001004 /* MS_BIND ignores any flags passed to it (except MS_REC). We need a
1005 * second call to mount() to actually set them.
1006 */
1007 if (config->rootfs_mount_flags &&
1008 mount(rootfs, c->runfsroot, "",
1009 config->rootfs_mount_flags, NULL)) {
1010 return -errno;
1011 }
1012
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001013 return 0;
1014}
1015
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001016static int get_userns_id(const char *map, int id)
1017{
1018 char *map_copy, *mapping, *saveptr1, *saveptr2;
1019 int inside, outside, length;
1020 int result = 0;
1021 errno = 0;
1022
1023 if (asprintf(&map_copy, "%s", map) < 0)
1024 return -ENOMEM;
1025
1026 mapping = strtok_r(map_copy, ",", &saveptr1);
1027 while (mapping) {
1028 inside = strtol(strtok_r(mapping, " ", &saveptr2), NULL, 10);
1029 outside = strtol(strtok_r(NULL, " ", &saveptr2), NULL, 10);
1030 length = strtol(strtok_r(NULL, "\0", &saveptr2), NULL, 10);
1031 if (errno) {
1032 goto error_free_return;
1033 } else if (inside < 0 || outside < 0 || length < 0) {
1034 errno = EINVAL;
1035 goto error_free_return;
1036 }
1037
1038 if (id >= outside && id <= (outside + length)) {
1039 result = id - (outside - inside);
1040 goto exit;
1041 }
1042
1043 mapping = strtok_r(NULL, ",", &saveptr1);
1044 }
1045 errno = EINVAL;
1046
1047error_free_return:
1048 result = -errno;
1049exit:
1050 free(map_copy);
1051 return result;
1052}
1053
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001054int container_start(struct container *c, const struct container_config *config)
1055{
1056 int rc = 0;
1057 unsigned int i;
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001058 int uid_userns, gid_userns;
Yusuke Sato91f11f02016-12-02 16:15:13 -08001059 char **destinations;
1060 size_t num_destinations;
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001061
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001062 if (!c)
1063 return -EINVAL;
Dylan Reide040c6b2016-05-02 18:49:02 -07001064 if (!config)
1065 return -EINVAL;
1066 if (!config->program_argv || !config->program_argv[0])
1067 return -EINVAL;
1068
Mike Frysingerb22acdf2017-01-08 02:02:35 -05001069 if (config->config_root) {
1070 c->config_root = strdup(config->config_root);
1071 if (!c->config_root) {
1072 rc = -ENOMEM;
1073 goto error_rmdir;
1074 }
1075 }
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001076 if (config->premounted_runfs) {
1077 c->runfs = NULL;
1078 c->runfsroot = strdup(config->premounted_runfs);
1079 if (!c->runfsroot) {
1080 rc = -ENOMEM;
1081 goto error_rmdir;
1082 }
1083 } else {
1084 rc = mount_runfs(c, config);
1085 if (rc)
1086 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001087 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001088
1089 c->jail = minijail_new();
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001090 if (!c->jail)
Luis Hector Chavez945af482016-06-03 08:39:34 -07001091 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001092
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -07001093 rc = do_container_mounts(c, config);
1094 if (rc)
Dylan Reid7daf9982016-04-28 16:55:42 -07001095 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001096
Dylan Reida9966422016-07-21 10:11:34 -07001097 c->cgroup = container_cgroup_new(c->name,
1098 "/sys/fs/cgroup",
1099 config->cgroup_parent,
Dmitry Torokhov14eef722016-09-27 16:40:37 -07001100 config->cgroup_owner,
1101 config->cgroup_group);
Dylan Reida9966422016-07-21 10:11:34 -07001102 if (!c->cgroup)
1103 goto error_rmdir;
1104
Keshav Santhanam268fa032016-07-14 09:59:24 -07001105 /* Must be root to modify device cgroup or mknod */
1106 if (getuid() == 0) {
1107 c->cgroup->ops->deny_all_devices(c->cgroup);
Dylan Reid837c74a2016-01-22 17:25:21 -08001108
Keshav Santhanam268fa032016-07-14 09:59:24 -07001109 for (i = 0; i < config->num_devices; i++) {
1110 const struct container_device *dev = &config->devices[i];
1111 int minor = dev->minor;
Dylan Reid837c74a2016-01-22 17:25:21 -08001112
Keshav Santhanam268fa032016-07-14 09:59:24 -07001113 if (dev->copy_minor) {
1114 struct stat st_buff;
1115 if (stat(dev->path, &st_buff) < 0)
1116 continue;
1117 /* Use the minor macro to extract the device number. */
1118 minor = minor(st_buff.st_rdev);
1119 }
1120 if (minor >= 0) {
1121 rc = container_create_device(c, dev, minor);
1122 if (rc)
1123 goto error_rmdir;
1124 }
1125
1126 rc = c->cgroup->ops->add_device(c->cgroup, dev->major,
1127 minor, dev->read_allowed,
1128 dev->write_allowed,
1129 dev->modify_allowed, dev->type);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001130 if (rc)
Dylan Reid355d5e42016-04-29 16:53:31 -07001131 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001132 }
Mike Frysinger412dbd22017-01-06 01:50:34 -05001133
1134 for (i = 0; i < c->num_loopdevs; ++i) {
1135 struct stat st;
1136
1137 if (stat(c->loopdevs[i], &st) < 0)
1138 goto error_rmdir;
1139 rc = c->cgroup->ops->add_device(c->cgroup, major(st.st_rdev),
1140 minor(st.st_rdev), 1, 0, 0, 'b');
1141 if (rc)
1142 goto error_rmdir;
1143 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001144 }
1145
Dylan Reidd7229582016-04-27 17:08:40 -07001146 /* Potentailly run setfiles on mounts configured outside of the jail */
Yusuke Sato91f11f02016-12-02 16:15:13 -08001147 destinations = calloc(config->num_mounts, sizeof(char *));
1148 num_destinations = 0;
Dylan Reide040c6b2016-05-02 18:49:02 -07001149 for (i = 0; i < config->num_mounts; i++) {
1150 const struct container_mount *mnt = &config->mounts[i];
Yusuke Sato91f11f02016-12-02 16:15:13 -08001151 char* dest = mnt->destination;
Dylan Reidd7229582016-04-27 17:08:40 -07001152
1153 if (mnt->mount_in_ns)
1154 continue;
Junichi Uekawa5d272772016-07-21 16:07:19 +09001155 if (mnt->flags & MS_RDONLY)
1156 continue;
Yusuke Sato91f11f02016-12-02 16:15:13 -08001157
Yusuke Satod33db432016-12-05 16:24:37 -08001158 /* A hack to avoid setfiles on /data and /cache. */
1159 if (!strcmp(dest, "/data") || !strcmp(dest, "/cache"))
Yusuke Sato91f11f02016-12-02 16:15:13 -08001160 continue;
1161
1162 if (asprintf(&dest, "%s%s", c->runfsroot, mnt->destination) < 0) {
1163 size_t j;
1164 for (j = 0; j < num_destinations; ++j) {
1165 free(destinations[j]);
1166 }
1167 free(destinations);
Dylan Reidd7229582016-04-27 17:08:40 -07001168 goto error_rmdir;
Yusuke Sato91f11f02016-12-02 16:15:13 -08001169 }
1170
1171 destinations[num_destinations++] = dest;
Dylan Reidd7229582016-04-27 17:08:40 -07001172 }
Yusuke Sato91f11f02016-12-02 16:15:13 -08001173 if (num_destinations) {
1174 size_t i;
1175 rc = run_setfiles_command(c, config, destinations, num_destinations);
1176 for (i = 0; i < num_destinations; ++i) {
1177 free(destinations[i]);
1178 }
1179 }
1180 free(destinations);
1181 if (rc)
1182 goto error_rmdir;
Dylan Reidd7229582016-04-27 17:08:40 -07001183
Chinyue Chenfac909e2016-06-24 14:17:42 +08001184 /* Setup CPU cgroup params. */
1185 if (config->cpu_cgparams.shares) {
1186 rc = c->cgroup->ops->set_cpu_shares(
1187 c->cgroup, config->cpu_cgparams.shares);
1188 if (rc)
1189 goto error_rmdir;
1190 }
1191 if (config->cpu_cgparams.period) {
1192 rc = c->cgroup->ops->set_cpu_quota(
1193 c->cgroup, config->cpu_cgparams.quota);
1194 if (rc)
1195 goto error_rmdir;
1196 rc = c->cgroup->ops->set_cpu_period(
1197 c->cgroup, config->cpu_cgparams.period);
1198 if (rc)
1199 goto error_rmdir;
1200 }
1201 if (config->cpu_cgparams.rt_period) {
1202 rc = c->cgroup->ops->set_cpu_rt_runtime(
1203 c->cgroup, config->cpu_cgparams.rt_runtime);
1204 if (rc)
1205 goto error_rmdir;
1206 rc = c->cgroup->ops->set_cpu_rt_period(
1207 c->cgroup, config->cpu_cgparams.rt_period);
1208 if (rc)
1209 goto error_rmdir;
1210 }
1211
Dylan Reid837c74a2016-01-22 17:25:21 -08001212 /* Setup and start the container with libminijail. */
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001213 if (config->pid_file_path) {
1214 c->pid_file_path = strdup(config->pid_file_path);
1215 if (!c->pid_file_path) {
1216 rc = -ENOMEM;
1217 goto error_rmdir;
1218 }
1219 } else if (c->runfs) {
1220 if (asprintf(&c->pid_file_path, "%s/container.pid", c->runfs) < 0) {
1221 rc = -ENOMEM;
1222 goto error_rmdir;
1223 }
1224 }
1225
1226 if (c->pid_file_path)
1227 minijail_write_pid_file(c->jail, c->pid_file_path);
Dylan Reid837c74a2016-01-22 17:25:21 -08001228 minijail_reset_signal_mask(c->jail);
1229
1230 /* Setup container namespaces. */
1231 minijail_namespace_ipc(c->jail);
1232 minijail_namespace_vfs(c->jail);
Keshav Santhanam1b6bf672016-08-10 18:35:12 -07001233 if (!config->share_host_netns)
1234 minijail_namespace_net(c->jail);
Dylan Reid837c74a2016-01-22 17:25:21 -08001235 minijail_namespace_pids(c->jail);
Dylan Reid837c74a2016-01-22 17:25:21 -08001236 minijail_namespace_user(c->jail);
Mike Frysingerfbd60552017-01-03 17:28:48 -05001237 if (getuid() != 0)
1238 minijail_namespace_user_disable_setgroups(c->jail);
Dylan Reidc6ca1042016-07-11 15:03:27 -07001239 minijail_namespace_cgroups(c->jail);
Dylan Reide040c6b2016-05-02 18:49:02 -07001240 rc = minijail_uidmap(c->jail, config->uid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -08001241 if (rc)
1242 goto error_rmdir;
Dylan Reide040c6b2016-05-02 18:49:02 -07001243 rc = minijail_gidmap(c->jail, config->gid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -08001244 if (rc)
1245 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001246
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001247 /* Set the UID/GID inside the container if not 0. */
1248 uid_userns = get_userns_id(config->uid_map, config->uid);
1249 if (uid_userns < 0)
1250 goto error_rmdir;
1251 else if (uid_userns > 0)
1252 minijail_change_uid(c->jail, (uid_t) uid_userns);
1253 gid_userns = get_userns_id(config->gid_map, config->gid);
1254 if (gid_userns < 0)
1255 goto error_rmdir;
1256 else if (gid_userns > 0)
1257 minijail_change_gid(c->jail, (gid_t) gid_userns);
1258
Dylan Reid837c74a2016-01-22 17:25:21 -08001259 rc = minijail_enter_pivot_root(c->jail, c->runfsroot);
1260 if (rc)
1261 goto error_rmdir;
1262
1263 /* Add the cgroups configured above. */
Dmitry Torokhov0d253a62017-01-05 09:41:33 -08001264 for (i = 0; i < NUM_CGROUP_TYPES; i++) {
1265 if (c->cgroup->cgroup_tasks_paths[i]) {
1266 rc = minijail_add_to_cgroup(c->jail,
1267 c->cgroup->cgroup_tasks_paths[i]);
1268 if (rc)
1269 goto error_rmdir;
1270 }
1271 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001272
Dylan Reide040c6b2016-05-02 18:49:02 -07001273 if (config->alt_syscall_table)
1274 minijail_use_alt_syscall(c->jail, config->alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -08001275
1276 minijail_run_as_init(c->jail);
1277
Dylan Reid3da683b2016-04-05 03:35:35 -07001278 /* TODO(dgreid) - remove this once shared mounts are cleaned up. */
1279 minijail_skip_remount_private(c->jail);
1280
Dylan Reidc4335842016-11-11 10:24:52 -08001281 if (!config->keep_fds_open)
1282 minijail_close_open_fds(c->jail);
Luis Hector Chaveze18e7d42016-10-12 07:35:32 -07001283
Dylan Reid837c74a2016-01-22 17:25:21 -08001284 rc = minijail_run_pid_pipes_no_preload(c->jail,
Dylan Reide040c6b2016-05-02 18:49:02 -07001285 config->program_argv[0],
1286 config->program_argv,
Dylan Reid837c74a2016-01-22 17:25:21 -08001287 &c->init_pid, NULL, NULL,
1288 NULL);
1289 if (rc)
1290 goto error_rmdir;
1291 return 0;
1292
1293error_rmdir:
Luis Hector Chavez945af482016-06-03 08:39:34 -07001294 if (!rc)
1295 rc = -errno;
1296 container_teardown(c);
Dylan Reid837c74a2016-01-22 17:25:21 -08001297 return rc;
1298}
1299
1300const char *container_root(struct container *c)
1301{
1302 return c->runfs;
1303}
1304
1305int container_pid(struct container *c)
1306{
1307 return c->init_pid;
1308}
1309
1310static int container_teardown(struct container *c)
1311{
Dylan Reid837c74a2016-01-22 17:25:21 -08001312 int ret = 0;
1313
Dylan Reide040c6b2016-05-02 18:49:02 -07001314 unmount_external_mounts(c);
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001315 if (c->runfsroot && c->runfs) {
Luis Hector Chavez945af482016-06-03 08:39:34 -07001316 if (umount(c->runfsroot))
1317 ret = -errno;
1318 if (rmdir(c->runfsroot))
1319 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001320 FREE_AND_NULL(c->runfsroot);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001321 }
1322 if (c->pid_file_path) {
1323 if (unlink(c->pid_file_path))
1324 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001325 FREE_AND_NULL(c->pid_file_path);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001326 }
1327 if (c->runfs) {
1328 if (rmdir(c->runfs))
1329 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001330 FREE_AND_NULL(c->runfs);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001331 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001332 return ret;
1333}
1334
1335int container_wait(struct container *c)
1336{
Dylan Reidcf745c52016-04-22 10:18:03 -07001337 int rc;
1338
1339 do {
1340 rc = minijail_wait(c->jail);
Luis Hector Chavez4641e852016-06-02 15:40:19 -07001341 } while (rc == -EINTR);
Dylan Reidcf745c52016-04-22 10:18:03 -07001342
Luis Hector Chavez945af482016-06-03 08:39:34 -07001343 // If the process had already been reaped, still perform teardown.
1344 if (rc == -ECHILD || rc >= 0) {
Dylan Reidcf745c52016-04-22 10:18:03 -07001345 rc = container_teardown(c);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001346 }
Dylan Reidcf745c52016-04-22 10:18:03 -07001347 return rc;
Dylan Reid837c74a2016-01-22 17:25:21 -08001348}
1349
1350int container_kill(struct container *c)
1351{
Luis Hector Chavez945af482016-06-03 08:39:34 -07001352 if (kill(c->init_pid, SIGKILL) && errno != ESRCH)
Dylan Reid837c74a2016-01-22 17:25:21 -08001353 return -errno;
1354 return container_wait(c);
1355}