blob: 14c8b04a3b63e0bbc5a13561030bfbb06b10b882 [file] [log] [blame]
Dylan Reid837c74a2016-01-22 17:25:21 -08001/* Copyright 2016 The Chromium OS Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file.
4 */
5
6#define _GNU_SOURCE /* For asprintf */
7
8#include <errno.h>
9#include <fcntl.h>
10#include <malloc.h>
11#include <signal.h>
12#include <stdio.h>
13#include <stdlib.h>
14#include <string.h>
15#include <sys/mount.h>
16#include <sys/stat.h>
17#include <sys/types.h>
Dylan Reid2bd9ea92016-04-07 20:57:47 -070018#include <sys/wait.h>
Dylan Reid837c74a2016-01-22 17:25:21 -080019#include <unistd.h>
20
21#include "container_cgroup.h"
22#include "libcontainer.h"
23#include "libminijail.h"
24
Luis Hector Chavez945af482016-06-03 08:39:34 -070025static int container_teardown(struct container *c);
26
Dylan Reid837c74a2016-01-22 17:25:21 -080027struct container_mount {
28 char *name;
29 char *source;
30 char *destination;
31 char *type;
32 char *data;
33 int flags;
34 int uid;
35 int gid;
36 int mode;
37 int mount_in_ns; /* True if mount should happen in new vfs ns */
38 int create; /* True if target should be created if it doesn't exist */
39};
40
41struct container_device {
42 char type; /* 'c' or 'b' for char or block */
43 char *path;
44 int fs_permissions;
45 int major;
46 int minor;
Dylan Reid355d5e42016-04-29 16:53:31 -070047 int copy_minor; /* Copy the minor from existing node, ignores |minor| */
Dylan Reid837c74a2016-01-22 17:25:21 -080048 int uid;
49 int gid;
50 int read_allowed;
51 int write_allowed;
52 int modify_allowed;
53};
54
55/*
56 * Structure that configures how the container is run.
57 *
58 * rootfs - Path to the root of the container's filesystem.
59 * program_argv - The program to run and args, e.g. "/sbin/init".
60 * num_args - Number of args in program_argv.
61 * uid_map - Mapping of UIDs in the container, e.g. "0 100000 1024"
62 * gid_map - Mapping of GIDs in the container, e.g. "0 100000 1024"
63 * alt_syscall_table - Syscall table to use or NULL if none.
64 * mounts - Filesystems to mount in the new namespace.
65 * num_mounts - Number of above.
66 * devices - Device nodes to create.
67 * num_devices - Number of above.
Dylan Reid2bd9ea92016-04-07 20:57:47 -070068 * run_setfiles - Should run setfiles on mounts to enable selinux.
Dylan Reid837c74a2016-01-22 17:25:21 -080069 */
70struct container_config {
71 char *rootfs;
72 char **program_argv;
73 size_t num_args;
74 char *uid_map;
75 char *gid_map;
76 char *alt_syscall_table;
77 struct container_mount *mounts;
78 size_t num_mounts;
79 struct container_device *devices;
80 size_t num_devices;
Dylan Reid2bd9ea92016-04-07 20:57:47 -070081 const char *run_setfiles;
Dylan Reid837c74a2016-01-22 17:25:21 -080082};
83
84struct container_config *container_config_create()
85{
86 return calloc(1, sizeof(struct container_config));
87}
88
89void container_config_destroy(struct container_config *c)
90{
91 size_t i;
92
93 if (c == NULL)
94 return;
95 free(c->rootfs);
96 for (i = 0; i < c->num_args; ++i)
97 free(c->program_argv[i]);
98 free(c->program_argv);
99 free(c->uid_map);
100 free(c->gid_map);
101 free(c->alt_syscall_table);
102 for (i = 0; i < c->num_mounts; ++i) {
103 free(c->mounts[i].name);
104 free(c->mounts[i].source);
105 free(c->mounts[i].destination);
106 free(c->mounts[i].type);
107 free(c->mounts[i].data);
108 }
109 free(c->mounts);
110 for (i = 0; i < c->num_devices; ++i) {
111 free(c->devices[i].path);
112 }
113 free(c->devices);
114 free(c);
115}
116
117int container_config_rootfs(struct container_config *c, const char *rootfs)
118{
119 c->rootfs = strdup(rootfs);
120 if (!c->rootfs)
121 return -ENOMEM;
122 return 0;
123}
124
Dylan Reid11456722016-05-02 11:24:50 -0700125const char *container_config_get_rootfs(const struct container_config *c)
126{
127 return c->rootfs;
128}
129
Dylan Reid837c74a2016-01-22 17:25:21 -0800130int container_config_program_argv(struct container_config *c,
131 char **argv, size_t num_args)
132{
133 size_t i;
134
135 c->num_args = num_args;
136 c->program_argv = calloc(num_args + 1, sizeof(char *));
137 if (!c->program_argv)
138 return -ENOMEM;
139 for (i = 0; i < num_args; ++i) {
140 c->program_argv[i] = strdup(argv[i]);
141 if (!c->program_argv[i])
142 return -ENOMEM;
143 }
144 c->program_argv[num_args] = NULL;
145 return 0;
146}
147
Dylan Reid11456722016-05-02 11:24:50 -0700148size_t container_config_get_num_program_args(const struct container_config *c)
149{
150 return c->num_args;
151}
152
153const char *container_config_get_program_arg(const struct container_config *c,
154 size_t index)
155{
156 if (index >= c->num_args)
157 return NULL;
158 return c->program_argv[index];
159}
160
Dylan Reid837c74a2016-01-22 17:25:21 -0800161int container_config_uid_map(struct container_config *c, const char *uid_map)
162{
163 c->uid_map = strdup(uid_map);
164 if (!c->uid_map)
165 return -ENOMEM;
166 return 0;
167}
168
169int container_config_gid_map(struct container_config *c, const char *gid_map)
170{
171 c->gid_map = strdup(gid_map);
172 if (!c->gid_map)
173 return -ENOMEM;
174 return 0;
175}
176
177int container_config_alt_syscall_table(struct container_config *c,
178 const char *alt_syscall_table)
179{
180 c->alt_syscall_table = strdup(alt_syscall_table);
181 if (!c->alt_syscall_table)
182 return -ENOMEM;
183 return 0;
184}
185
186int container_config_add_mount(struct container_config *c,
187 const char *name,
188 const char *source,
189 const char *destination,
190 const char *type,
191 const char *data,
192 int flags,
193 int uid,
194 int gid,
195 int mode,
196 int mount_in_ns,
197 int create)
198{
199 struct container_mount *mount_ptr;
200
201 if (name == NULL || source == NULL ||
202 destination == NULL || type == NULL)
203 return -EINVAL;
204
205 mount_ptr = realloc(c->mounts,
206 sizeof(c->mounts[0]) * (c->num_mounts + 1));
207 if (!mount_ptr)
208 return -ENOMEM;
209 c->mounts = mount_ptr;
210 c->mounts[c->num_mounts].name = strdup(name);
211 if (!c->mounts[c->num_mounts].name)
212 return -ENOMEM;
213 c->mounts[c->num_mounts].source = strdup(source);
214 if (!c->mounts[c->num_mounts].source)
215 return -ENOMEM;
216 c->mounts[c->num_mounts].destination = strdup(destination);
217 if (!c->mounts[c->num_mounts].destination)
218 return -ENOMEM;
219 c->mounts[c->num_mounts].type = strdup(type);
220 if (!c->mounts[c->num_mounts].type)
221 return -ENOMEM;
222 if (data) {
223 c->mounts[c->num_mounts].data = strdup(data);
224 if (!c->mounts[c->num_mounts].data)
225 return -ENOMEM;
226 } else {
227 c->mounts[c->num_mounts].data = NULL;
228 }
229 c->mounts[c->num_mounts].flags = flags;
230 c->mounts[c->num_mounts].uid = uid;
231 c->mounts[c->num_mounts].gid = gid;
232 c->mounts[c->num_mounts].mode = mode;
233 c->mounts[c->num_mounts].mount_in_ns = mount_in_ns;
234 c->mounts[c->num_mounts].create = create;
235 ++c->num_mounts;
236 return 0;
237}
238
239int container_config_add_device(struct container_config *c,
240 char type,
241 const char *path,
242 int fs_permissions,
243 int major,
244 int minor,
Dylan Reid355d5e42016-04-29 16:53:31 -0700245 int copy_minor,
Dylan Reid837c74a2016-01-22 17:25:21 -0800246 int uid,
247 int gid,
248 int read_allowed,
249 int write_allowed,
250 int modify_allowed)
251{
252 struct container_device *dev_ptr;
253
254 if (path == NULL)
255 return -EINVAL;
Dylan Reid355d5e42016-04-29 16:53:31 -0700256 /* If using a dynamic minor number, ensure that minor is -1. */
257 if (copy_minor && (minor != -1))
258 return -EINVAL;
259
Dylan Reid837c74a2016-01-22 17:25:21 -0800260 dev_ptr = realloc(c->devices,
261 sizeof(c->devices[0]) * (c->num_devices + 1));
262 if (!dev_ptr)
263 return -ENOMEM;
264 c->devices = dev_ptr;
265 c->devices[c->num_devices].type = type;
266 c->devices[c->num_devices].path = strdup(path);
267 if (!c->devices[c->num_devices].path)
268 return -ENOMEM;
269 c->devices[c->num_devices].fs_permissions = fs_permissions;
270 c->devices[c->num_devices].major = major;
271 c->devices[c->num_devices].minor = minor;
Dylan Reid355d5e42016-04-29 16:53:31 -0700272 c->devices[c->num_devices].copy_minor = copy_minor;
Dylan Reid837c74a2016-01-22 17:25:21 -0800273 c->devices[c->num_devices].uid = uid;
274 c->devices[c->num_devices].gid = gid;
275 c->devices[c->num_devices].read_allowed = read_allowed;
276 c->devices[c->num_devices].write_allowed = write_allowed;
277 c->devices[c->num_devices].modify_allowed = modify_allowed;
278 ++c->num_devices;
279 return 0;
280}
281
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700282void container_config_run_setfiles(struct container_config *c,
283 const char *setfiles_cmd)
284{
285 c->run_setfiles = setfiles_cmd;
286}
Dylan Reid837c74a2016-01-22 17:25:21 -0800287
Dylan Reid11456722016-05-02 11:24:50 -0700288const char *container_config_get_run_setfiles(const struct container_config *c)
289{
290 return c->run_setfiles;
291}
292
Dylan Reid837c74a2016-01-22 17:25:21 -0800293/*
294 * Container manipulation
295 */
296struct container {
Dylan Reid837c74a2016-01-22 17:25:21 -0800297 struct container_cgroup *cgroup;
298 struct minijail *jail;
299 pid_t init_pid;
300 char *runfs;
301 char *rundir;
302 char *runfsroot;
303 char *pid_file_path;
Dylan Reide040c6b2016-05-02 18:49:02 -0700304 char **ext_mounts; /* Mounts made outside of the minijail */
305 size_t num_ext_mounts;
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700306 char *name;
Dylan Reid837c74a2016-01-22 17:25:21 -0800307};
308
309struct container *container_new(const char *name,
Dylan Reide040c6b2016-05-02 18:49:02 -0700310 const char *rundir)
Dylan Reid837c74a2016-01-22 17:25:21 -0800311{
312 struct container *c;
313
Dylan Reid837c74a2016-01-22 17:25:21 -0800314 c = calloc(1, sizeof(*c));
Dylan Reidb435c682016-04-12 04:17:49 -0700315 if (!c)
316 return NULL;
Dylan Reid837c74a2016-01-22 17:25:21 -0800317 c->cgroup = container_cgroup_new(name, "/sys/fs/cgroup");
318 c->rundir = strdup(rundir);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700319 c->name = strdup(name);
320 if (!c->cgroup || !c->rundir || !c->name) {
Dylan Reid684975e2016-05-02 15:44:47 -0700321 container_destroy(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800322 return NULL;
Dylan Reidb435c682016-04-12 04:17:49 -0700323 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800324 return c;
325}
326
327void container_destroy(struct container *c)
328{
Dylan Reid684975e2016-05-02 15:44:47 -0700329 if (c->cgroup)
330 container_cgroup_destroy(c->cgroup);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700331 if (c->jail)
332 minijail_destroy(c->jail);
333 free(c->name);
Dylan Reid837c74a2016-01-22 17:25:21 -0800334 free(c->rundir);
335 free(c);
336}
337
338static int make_dir(const char *path, int uid, int gid, int mode)
339{
340 if (mkdir(path, mode))
341 return -errno;
342 if (chmod(path, mode))
343 return -errno;
344 if (chown(path, uid, gid))
345 return -errno;
346 return 0;
347}
348
349static int touch_file(const char *path, int uid, int gid, int mode)
350{
351 int rc;
352 int fd = open(path, O_RDWR | O_CREAT, mode);
353 if (fd < 0)
354 return -errno;
355 rc = fchown(fd, uid, gid);
356 close(fd);
357
358 if (rc)
359 return -errno;
360 return 0;
361}
362
363/* Make sure the mount target exists in the new rootfs. Create if needed and
364 * possible.
365 */
366static int setup_mount_destination(const struct container_mount *mnt,
Dylan Reid2149be92016-04-28 18:38:57 -0700367 const char *source,
Dylan Reid837c74a2016-01-22 17:25:21 -0800368 const char *dest)
369{
370 int rc;
371 struct stat st_buf;
372
373 rc = stat(dest, &st_buf);
374 if (rc == 0) /* destination exists */
375 return 0;
376
377 /* Try to create the destination. Either make directory or touch a file
378 * depending on the source type.
379 */
Dylan Reid2149be92016-04-28 18:38:57 -0700380 rc = stat(source, &st_buf);
Dylan Reid837c74a2016-01-22 17:25:21 -0800381 if (rc || S_ISDIR(st_buf.st_mode) || S_ISBLK(st_buf.st_mode))
382 return make_dir(dest, mnt->uid, mnt->gid, mnt->mode);
383
384 return touch_file(dest, mnt->uid, mnt->gid, mnt->mode);
385}
386
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700387/* Fork and exec the setfiles command to configure the selinux policy. */
Dylan Reide040c6b2016-05-02 18:49:02 -0700388static int run_setfiles_command(const struct container *c,
389 const struct container_config *config,
390 const char *dest)
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700391{
392 int rc;
393 int status;
394 int pid;
395 char *context_path;
396
Dylan Reide040c6b2016-05-02 18:49:02 -0700397 if (!config->run_setfiles)
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700398 return 0;
399
Dylan Reidb3621832016-03-24 10:24:57 -0700400 /* Really gross hack to avoid setfiles on /data, this should be removed
401 * when data isn't under /home/chronos/user where we can't access it as
402 * the android user.
403 * TODO(b/28705740) - Fix permission to the data directory.
404 */
405 if (strlen(dest) >= 5 && !strcmp(&dest[strlen(dest) - 5], "/data"))
406 return 0;
407
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700408 if (asprintf(&context_path, "%s/file_contexts",
409 c->runfsroot) < 0)
410 return -errno;
411
412 pid = fork();
413 if (pid == 0) {
414 const char *argv[] = {
Dylan Reide040c6b2016-05-02 18:49:02 -0700415 config->run_setfiles,
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700416 "-r",
417 c->runfsroot,
418 context_path,
419 dest,
420 NULL,
421 };
422 const char *env[] = {
423 NULL,
424 };
425
426 execve(argv[0], (char *const*)argv, (char *const*)env);
427
428 /* Command failed to exec if execve returns. */
429 _exit(-errno);
430 }
431 free(context_path);
432 if (pid < 0)
433 return -errno;
434 do {
435 rc = waitpid(pid, &status, 0);
436 } while (rc == -1 && errno == EINTR);
437 if (rc < 0)
438 return -errno;
439 return status;
440}
441
Dylan Reide040c6b2016-05-02 18:49:02 -0700442/*
443 * Unmounts anything we mounted in this mount namespace in the opposite order
444 * that they were mounted.
445 */
446static int unmount_external_mounts(struct container *c)
447{
448 int ret = 0;
449
450 while (c->num_ext_mounts) {
451 c->num_ext_mounts--;
452 if (umount(c->ext_mounts[c->num_ext_mounts]))
453 ret = -errno;
454 free(c->ext_mounts[c->num_ext_mounts]);
455 }
456 free(c->ext_mounts);
457 return ret;
458}
459
Luis Hector Chavez3341ed62016-06-06 08:04:04 -0700460static int do_container_mount(struct container *c,
461 const struct container_mount *mnt)
462{
463 char *source = NULL;
464 char *dest = NULL;
465 int rc = 0;
466
467 if (asprintf(&dest, "%s%s", c->runfsroot, mnt->destination) < 0)
468 return -errno;
469
470 /*
471 * If it's a bind mount relative to rootfs, append source to
472 * rootfs path, otherwise source path is absolute.
473 */
474 if ((mnt->flags & MS_BIND) && mnt->source[0] != '/') {
475 if (asprintf(&source, "%s/%s", c->runfsroot, mnt->source) < 0)
476 goto error_free_return;
477 } else {
478 if (asprintf(&source, "%s", mnt->source) < 0)
479 goto error_free_return;
480 }
481
482 if (mnt->create) {
483 rc = setup_mount_destination(mnt, source, dest);
484 if (rc)
485 goto error_free_return;
486 }
487 if (mnt->mount_in_ns) {
488 /* We can mount this with minijail. */
489 rc = minijail_mount(c->jail, source, mnt->destination,
490 mnt->type, mnt->flags);
491 if (rc)
492 goto error_free_return;
493 } else {
494 /* Mount this externally and unmount it on exit. */
495 if (mount(source, dest, mnt->type, mnt->flags,
496 mnt->data))
497 goto error_free_return;
498 /* Save this to unmount when shutting down. */
499 c->ext_mounts[c->num_ext_mounts] = strdup(dest);
500 if (!c->ext_mounts[c->num_ext_mounts])
501 goto error_free_return;
502 c->num_ext_mounts++;
503 }
504
505 goto exit;
506
507error_free_return:
508 if (!rc)
509 rc = -errno;
510exit:
511 free(source);
512 free(dest);
513 return rc;
514}
515
Dylan Reide040c6b2016-05-02 18:49:02 -0700516static int do_container_mounts(struct container *c,
517 const struct container_config *config)
Dylan Reid7daf9982016-04-28 16:55:42 -0700518{
519 unsigned int i;
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700520 int rc = 0;
Dylan Reid7daf9982016-04-28 16:55:42 -0700521
Dylan Reide040c6b2016-05-02 18:49:02 -0700522 /*
523 * Allocate space to track anything we mount in our mount namespace.
524 * This over-allocates as it has space for all mounts.
525 */
526 c->ext_mounts = calloc(config->num_mounts, sizeof(*c->ext_mounts));
527 if (!c->ext_mounts)
528 return -errno;
529
530 for (i = 0; i < config->num_mounts; ++i) {
Luis Hector Chavez3341ed62016-06-06 08:04:04 -0700531 rc = do_container_mount(c, &config->mounts[i]);
532 if (rc)
533 goto error_free_return;
Dylan Reid7daf9982016-04-28 16:55:42 -0700534 }
535 return 0;
Dylan Reid2149be92016-04-28 18:38:57 -0700536
537error_free_return:
Dylan Reide040c6b2016-05-02 18:49:02 -0700538 unmount_external_mounts(c);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700539 return rc;
Dylan Reid7daf9982016-04-28 16:55:42 -0700540}
541
Dylan Reide040c6b2016-05-02 18:49:02 -0700542int container_start(struct container *c, const struct container_config *config)
Dylan Reid837c74a2016-01-22 17:25:21 -0800543{
Dylan Reidb3621832016-03-24 10:24:57 -0700544 static const mode_t root_dir_mode = 0660;
Luis Hector Chavez945af482016-06-03 08:39:34 -0700545 int rc = 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800546 unsigned int i;
Dylan Reide040c6b2016-05-02 18:49:02 -0700547 const char *rootfs = config->rootfs;
Dylan Reid837c74a2016-01-22 17:25:21 -0800548 char *runfs_template;
549
Dylan Reide040c6b2016-05-02 18:49:02 -0700550 if (!config)
551 return -EINVAL;
552 if (!config->program_argv || !config->program_argv[0])
553 return -EINVAL;
554
Dylan Reid837c74a2016-01-22 17:25:21 -0800555 if (asprintf(&runfs_template, "%s/%s_XXXXXX", c->rundir, c->name) < 0)
556 return -errno;
557
558 c->runfs = mkdtemp(runfs_template);
559 if (!c->runfs) {
560 free(runfs_template);
561 return -errno;
562 }
Dylan Reidb3621832016-03-24 10:24:57 -0700563 /* Make sure the container uid can access the rootfs. */
Luis Hector Chavez945af482016-06-03 08:39:34 -0700564 if (chmod(c->runfs, 0755))
Dylan Reidb3621832016-03-24 10:24:57 -0700565 goto error_rmdir;
566
Luis Hector Chavez945af482016-06-03 08:39:34 -0700567 if (asprintf(&c->runfsroot, "%s/root", c->runfs) < 0)
Dylan Reid837c74a2016-01-22 17:25:21 -0800568 goto error_rmdir;
569
Luis Hector Chavez945af482016-06-03 08:39:34 -0700570 if (mkdir(c->runfsroot, root_dir_mode))
571 goto error_rmdir;
572 if (chmod(c->runfsroot, root_dir_mode))
573 goto error_rmdir;
574
575 if (mount(rootfs, c->runfsroot, "", MS_BIND | MS_RDONLY, NULL))
Dylan Reid837c74a2016-01-22 17:25:21 -0800576 goto error_rmdir;
577
578 c->jail = minijail_new();
Luis Hector Chavez945af482016-06-03 08:39:34 -0700579 if (!c->jail) {
580 rc = -ENOMEM;
581 goto error_rmdir;
582 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800583
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700584 rc = do_container_mounts(c, config);
585 if (rc)
Dylan Reid7daf9982016-04-28 16:55:42 -0700586 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -0800587
588 c->cgroup->ops->deny_all_devices(c->cgroup);
589
Dylan Reide040c6b2016-05-02 18:49:02 -0700590 for (i = 0; i < config->num_devices; i++) {
591 const struct container_device *dev = &config->devices[i];
Dylan Reid837c74a2016-01-22 17:25:21 -0800592 int mode;
Dylan Reid355d5e42016-04-29 16:53:31 -0700593 int minor = dev->minor;
Dylan Reid837c74a2016-01-22 17:25:21 -0800594
595 switch (dev->type) {
596 case 'b':
597 mode = S_IFBLK;
598 break;
599 case 'c':
600 mode = S_IFCHR;
601 break;
602 default:
Luis Hector Chavez945af482016-06-03 08:39:34 -0700603 rc = -EINVAL;
Dylan Reid837c74a2016-01-22 17:25:21 -0800604 goto error_rmdir;
605 }
606 mode |= dev->fs_permissions;
607
Dylan Reid355d5e42016-04-29 16:53:31 -0700608 if (dev->copy_minor) {
609 struct stat st_buff;
610 if (stat(dev->path, &st_buff) < 0)
611 goto error_rmdir;
612 /* Use the minor macro to extract the device number. */
613 minor = minor(st_buff.st_rdev);
614 }
615 if (minor >= 0) {
616 char *path;
617
618 if (asprintf(&path, "%s%s", c->runfsroot, dev->path) < 0)
619 goto error_rmdir;
Luis Hector Chavez945af482016-06-03 08:39:34 -0700620 if (mknod(path, mode, makedev(dev->major, minor)) && errno != EEXIST) {
Dylan Reid837c74a2016-01-22 17:25:21 -0800621 free(path);
622 goto error_rmdir;
623 }
Luis Hector Chavez945af482016-06-03 08:39:34 -0700624 if (chown(path, dev->uid, dev->gid)) {
Dylan Reid837c74a2016-01-22 17:25:21 -0800625 free(path);
626 goto error_rmdir;
627 }
Luis Hector Chavez945af482016-06-03 08:39:34 -0700628 if (chmod(path, dev->fs_permissions)) {
629 free(path);
630 goto error_rmdir;
631 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800632 free(path);
Dylan Reid837c74a2016-01-22 17:25:21 -0800633 }
634
635 rc = c->cgroup->ops->add_device(c->cgroup, dev->major,
Dylan Reid355d5e42016-04-29 16:53:31 -0700636 minor, dev->read_allowed,
Dylan Reid837c74a2016-01-22 17:25:21 -0800637 dev->write_allowed,
638 dev->modify_allowed, dev->type);
639 if (rc)
640 goto error_rmdir;
641 }
642
Dylan Reidd7229582016-04-27 17:08:40 -0700643 /* Potentailly run setfiles on mounts configured outside of the jail */
Dylan Reide040c6b2016-05-02 18:49:02 -0700644 for (i = 0; i < config->num_mounts; i++) {
645 const struct container_mount *mnt = &config->mounts[i];
Dylan Reidd7229582016-04-27 17:08:40 -0700646 char *dest;
647
648 if (mnt->mount_in_ns)
649 continue;
650 if (asprintf(&dest, "%s%s", c->runfsroot, mnt->destination) < 0)
651 goto error_rmdir;
Dylan Reide040c6b2016-05-02 18:49:02 -0700652 rc = run_setfiles_command(c, config, dest);
Dylan Reidd7229582016-04-27 17:08:40 -0700653 free(dest);
654 if (rc)
655 goto error_rmdir;
656 }
657
Dylan Reid837c74a2016-01-22 17:25:21 -0800658 /* Setup and start the container with libminijail. */
659 if (asprintf(&c->pid_file_path, "%s/container.pid", c->runfs) < 0)
660 goto error_rmdir;
661 minijail_write_pid_file(c->jail, c->pid_file_path);
662 minijail_reset_signal_mask(c->jail);
663
664 /* Setup container namespaces. */
665 minijail_namespace_ipc(c->jail);
666 minijail_namespace_vfs(c->jail);
667 minijail_namespace_net(c->jail);
668 minijail_namespace_pids(c->jail);
Dylan Reid837c74a2016-01-22 17:25:21 -0800669 minijail_namespace_user(c->jail);
Dylan Reide040c6b2016-05-02 18:49:02 -0700670 rc = minijail_uidmap(c->jail, config->uid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800671 if (rc)
672 goto error_rmdir;
Dylan Reide040c6b2016-05-02 18:49:02 -0700673 rc = minijail_gidmap(c->jail, config->gid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800674 if (rc)
675 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -0800676
677 rc = minijail_enter_pivot_root(c->jail, c->runfsroot);
678 if (rc)
679 goto error_rmdir;
680
681 /* Add the cgroups configured above. */
682 rc = minijail_add_to_cgroup(c->jail, cgroup_cpu_tasks_path(c->cgroup));
683 if (rc)
684 goto error_rmdir;
685 rc = minijail_add_to_cgroup(c->jail,
686 cgroup_cpuacct_tasks_path(c->cgroup));
687 if (rc)
688 goto error_rmdir;
689 rc = minijail_add_to_cgroup(c->jail,
690 cgroup_devices_tasks_path(c->cgroup));
691 if (rc)
692 goto error_rmdir;
693 rc = minijail_add_to_cgroup(c->jail,
694 cgroup_freezer_tasks_path(c->cgroup));
695 if (rc)
696 goto error_rmdir;
697
Dylan Reide040c6b2016-05-02 18:49:02 -0700698 if (config->alt_syscall_table)
699 minijail_use_alt_syscall(c->jail, config->alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -0800700
701 minijail_run_as_init(c->jail);
702
Dylan Reid3da683b2016-04-05 03:35:35 -0700703 /* TODO(dgreid) - remove this once shared mounts are cleaned up. */
704 minijail_skip_remount_private(c->jail);
705
Dylan Reid837c74a2016-01-22 17:25:21 -0800706 rc = minijail_run_pid_pipes_no_preload(c->jail,
Dylan Reide040c6b2016-05-02 18:49:02 -0700707 config->program_argv[0],
708 config->program_argv,
Dylan Reid837c74a2016-01-22 17:25:21 -0800709 &c->init_pid, NULL, NULL,
710 NULL);
711 if (rc)
712 goto error_rmdir;
713 return 0;
714
715error_rmdir:
Luis Hector Chavez945af482016-06-03 08:39:34 -0700716 if (!rc)
717 rc = -errno;
718 container_teardown(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800719 return rc;
720}
721
722const char *container_root(struct container *c)
723{
724 return c->runfs;
725}
726
727int container_pid(struct container *c)
728{
729 return c->init_pid;
730}
731
732static int container_teardown(struct container *c)
733{
Dylan Reid837c74a2016-01-22 17:25:21 -0800734 int ret = 0;
735
Dylan Reide040c6b2016-05-02 18:49:02 -0700736 unmount_external_mounts(c);
Luis Hector Chavez945af482016-06-03 08:39:34 -0700737 if (c->runfsroot) {
738 if (umount(c->runfsroot))
739 ret = -errno;
740 if (rmdir(c->runfsroot))
741 ret = -errno;
742 free(c->runfsroot);
743 c->runfsroot = NULL;
744 }
745 if (c->pid_file_path) {
746 if (unlink(c->pid_file_path))
747 ret = -errno;
748 free(c->pid_file_path);
749 c->pid_file_path = NULL;
750 }
751 if (c->runfs) {
752 if (rmdir(c->runfs))
753 ret = -errno;
754 free(c->runfs);
755 c->runfs = NULL;
756 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800757 return ret;
758}
759
760int container_wait(struct container *c)
761{
Dylan Reidcf745c52016-04-22 10:18:03 -0700762 int rc;
763
764 do {
765 rc = minijail_wait(c->jail);
Luis Hector Chavez4641e852016-06-02 15:40:19 -0700766 } while (rc == -EINTR);
Dylan Reidcf745c52016-04-22 10:18:03 -0700767
Luis Hector Chavez945af482016-06-03 08:39:34 -0700768 // If the process had already been reaped, still perform teardown.
769 if (rc == -ECHILD || rc >= 0) {
Dylan Reidcf745c52016-04-22 10:18:03 -0700770 rc = container_teardown(c);
Luis Hector Chavez945af482016-06-03 08:39:34 -0700771 }
Dylan Reidcf745c52016-04-22 10:18:03 -0700772 return rc;
Dylan Reid837c74a2016-01-22 17:25:21 -0800773}
774
775int container_kill(struct container *c)
776{
Luis Hector Chavez945af482016-06-03 08:39:34 -0700777 if (kill(c->init_pid, SIGKILL) && errno != ESRCH)
Dylan Reid837c74a2016-01-22 17:25:21 -0800778 return -errno;
779 return container_wait(c);
780}