blob: 9c5fb008c3046e27a77643e15ee0096febc58ec0 [file] [log] [blame]
Dylan Reid837c74a2016-01-22 17:25:21 -08001/* Copyright 2016 The Chromium OS Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file.
4 */
5
6#define _GNU_SOURCE /* For asprintf */
7
8#include <errno.h>
9#include <fcntl.h>
10#include <malloc.h>
11#include <signal.h>
12#include <stdio.h>
13#include <stdlib.h>
14#include <string.h>
15#include <sys/mount.h>
16#include <sys/stat.h>
17#include <sys/types.h>
Dylan Reid2bd9ea92016-04-07 20:57:47 -070018#include <sys/wait.h>
Dylan Reid837c74a2016-01-22 17:25:21 -080019#include <unistd.h>
20
21#include "container_cgroup.h"
22#include "libcontainer.h"
23#include "libminijail.h"
24
Luis Hector Chavez945af482016-06-03 08:39:34 -070025static int container_teardown(struct container *c);
26
Dylan Reid837c74a2016-01-22 17:25:21 -080027struct container_mount {
28 char *name;
29 char *source;
30 char *destination;
31 char *type;
32 char *data;
33 int flags;
34 int uid;
35 int gid;
36 int mode;
37 int mount_in_ns; /* True if mount should happen in new vfs ns */
38 int create; /* True if target should be created if it doesn't exist */
39};
40
41struct container_device {
42 char type; /* 'c' or 'b' for char or block */
43 char *path;
44 int fs_permissions;
45 int major;
46 int minor;
Dylan Reid355d5e42016-04-29 16:53:31 -070047 int copy_minor; /* Copy the minor from existing node, ignores |minor| */
Dylan Reid837c74a2016-01-22 17:25:21 -080048 int uid;
49 int gid;
50 int read_allowed;
51 int write_allowed;
52 int modify_allowed;
53};
54
55/*
56 * Structure that configures how the container is run.
57 *
58 * rootfs - Path to the root of the container's filesystem.
59 * program_argv - The program to run and args, e.g. "/sbin/init".
60 * num_args - Number of args in program_argv.
61 * uid_map - Mapping of UIDs in the container, e.g. "0 100000 1024"
62 * gid_map - Mapping of GIDs in the container, e.g. "0 100000 1024"
63 * alt_syscall_table - Syscall table to use or NULL if none.
64 * mounts - Filesystems to mount in the new namespace.
65 * num_mounts - Number of above.
66 * devices - Device nodes to create.
67 * num_devices - Number of above.
Dylan Reid2bd9ea92016-04-07 20:57:47 -070068 * run_setfiles - Should run setfiles on mounts to enable selinux.
Dylan Reid837c74a2016-01-22 17:25:21 -080069 */
70struct container_config {
71 char *rootfs;
72 char **program_argv;
73 size_t num_args;
74 char *uid_map;
75 char *gid_map;
76 char *alt_syscall_table;
77 struct container_mount *mounts;
78 size_t num_mounts;
79 struct container_device *devices;
80 size_t num_devices;
Dylan Reid2bd9ea92016-04-07 20:57:47 -070081 const char *run_setfiles;
Dylan Reid837c74a2016-01-22 17:25:21 -080082};
83
84struct container_config *container_config_create()
85{
86 return calloc(1, sizeof(struct container_config));
87}
88
89void container_config_destroy(struct container_config *c)
90{
91 size_t i;
92
93 if (c == NULL)
94 return;
95 free(c->rootfs);
96 for (i = 0; i < c->num_args; ++i)
97 free(c->program_argv[i]);
98 free(c->program_argv);
99 free(c->uid_map);
100 free(c->gid_map);
101 free(c->alt_syscall_table);
102 for (i = 0; i < c->num_mounts; ++i) {
103 free(c->mounts[i].name);
104 free(c->mounts[i].source);
105 free(c->mounts[i].destination);
106 free(c->mounts[i].type);
107 free(c->mounts[i].data);
108 }
109 free(c->mounts);
110 for (i = 0; i < c->num_devices; ++i) {
111 free(c->devices[i].path);
112 }
113 free(c->devices);
114 free(c);
115}
116
117int container_config_rootfs(struct container_config *c, const char *rootfs)
118{
119 c->rootfs = strdup(rootfs);
120 if (!c->rootfs)
121 return -ENOMEM;
122 return 0;
123}
124
Dylan Reid11456722016-05-02 11:24:50 -0700125const char *container_config_get_rootfs(const struct container_config *c)
126{
127 return c->rootfs;
128}
129
Dylan Reid837c74a2016-01-22 17:25:21 -0800130int container_config_program_argv(struct container_config *c,
131 char **argv, size_t num_args)
132{
133 size_t i;
134
135 c->num_args = num_args;
136 c->program_argv = calloc(num_args + 1, sizeof(char *));
137 if (!c->program_argv)
138 return -ENOMEM;
139 for (i = 0; i < num_args; ++i) {
140 c->program_argv[i] = strdup(argv[i]);
141 if (!c->program_argv[i])
142 return -ENOMEM;
143 }
144 c->program_argv[num_args] = NULL;
145 return 0;
146}
147
Dylan Reid11456722016-05-02 11:24:50 -0700148size_t container_config_get_num_program_args(const struct container_config *c)
149{
150 return c->num_args;
151}
152
153const char *container_config_get_program_arg(const struct container_config *c,
154 size_t index)
155{
156 if (index >= c->num_args)
157 return NULL;
158 return c->program_argv[index];
159}
160
Dylan Reid837c74a2016-01-22 17:25:21 -0800161int container_config_uid_map(struct container_config *c, const char *uid_map)
162{
163 c->uid_map = strdup(uid_map);
164 if (!c->uid_map)
165 return -ENOMEM;
166 return 0;
167}
168
169int container_config_gid_map(struct container_config *c, const char *gid_map)
170{
171 c->gid_map = strdup(gid_map);
172 if (!c->gid_map)
173 return -ENOMEM;
174 return 0;
175}
176
177int container_config_alt_syscall_table(struct container_config *c,
178 const char *alt_syscall_table)
179{
180 c->alt_syscall_table = strdup(alt_syscall_table);
181 if (!c->alt_syscall_table)
182 return -ENOMEM;
183 return 0;
184}
185
186int container_config_add_mount(struct container_config *c,
187 const char *name,
188 const char *source,
189 const char *destination,
190 const char *type,
191 const char *data,
192 int flags,
193 int uid,
194 int gid,
195 int mode,
196 int mount_in_ns,
197 int create)
198{
199 struct container_mount *mount_ptr;
200
201 if (name == NULL || source == NULL ||
202 destination == NULL || type == NULL)
203 return -EINVAL;
204
205 mount_ptr = realloc(c->mounts,
206 sizeof(c->mounts[0]) * (c->num_mounts + 1));
207 if (!mount_ptr)
208 return -ENOMEM;
209 c->mounts = mount_ptr;
210 c->mounts[c->num_mounts].name = strdup(name);
211 if (!c->mounts[c->num_mounts].name)
212 return -ENOMEM;
213 c->mounts[c->num_mounts].source = strdup(source);
214 if (!c->mounts[c->num_mounts].source)
215 return -ENOMEM;
216 c->mounts[c->num_mounts].destination = strdup(destination);
217 if (!c->mounts[c->num_mounts].destination)
218 return -ENOMEM;
219 c->mounts[c->num_mounts].type = strdup(type);
220 if (!c->mounts[c->num_mounts].type)
221 return -ENOMEM;
222 if (data) {
223 c->mounts[c->num_mounts].data = strdup(data);
224 if (!c->mounts[c->num_mounts].data)
225 return -ENOMEM;
226 } else {
227 c->mounts[c->num_mounts].data = NULL;
228 }
229 c->mounts[c->num_mounts].flags = flags;
230 c->mounts[c->num_mounts].uid = uid;
231 c->mounts[c->num_mounts].gid = gid;
232 c->mounts[c->num_mounts].mode = mode;
233 c->mounts[c->num_mounts].mount_in_ns = mount_in_ns;
234 c->mounts[c->num_mounts].create = create;
235 ++c->num_mounts;
236 return 0;
237}
238
239int container_config_add_device(struct container_config *c,
240 char type,
241 const char *path,
242 int fs_permissions,
243 int major,
244 int minor,
Dylan Reid355d5e42016-04-29 16:53:31 -0700245 int copy_minor,
Dylan Reid837c74a2016-01-22 17:25:21 -0800246 int uid,
247 int gid,
248 int read_allowed,
249 int write_allowed,
250 int modify_allowed)
251{
252 struct container_device *dev_ptr;
253
254 if (path == NULL)
255 return -EINVAL;
Dylan Reid355d5e42016-04-29 16:53:31 -0700256 /* If using a dynamic minor number, ensure that minor is -1. */
257 if (copy_minor && (minor != -1))
258 return -EINVAL;
259
Dylan Reid837c74a2016-01-22 17:25:21 -0800260 dev_ptr = realloc(c->devices,
261 sizeof(c->devices[0]) * (c->num_devices + 1));
262 if (!dev_ptr)
263 return -ENOMEM;
264 c->devices = dev_ptr;
265 c->devices[c->num_devices].type = type;
266 c->devices[c->num_devices].path = strdup(path);
267 if (!c->devices[c->num_devices].path)
268 return -ENOMEM;
269 c->devices[c->num_devices].fs_permissions = fs_permissions;
270 c->devices[c->num_devices].major = major;
271 c->devices[c->num_devices].minor = minor;
Dylan Reid355d5e42016-04-29 16:53:31 -0700272 c->devices[c->num_devices].copy_minor = copy_minor;
Dylan Reid837c74a2016-01-22 17:25:21 -0800273 c->devices[c->num_devices].uid = uid;
274 c->devices[c->num_devices].gid = gid;
275 c->devices[c->num_devices].read_allowed = read_allowed;
276 c->devices[c->num_devices].write_allowed = write_allowed;
277 c->devices[c->num_devices].modify_allowed = modify_allowed;
278 ++c->num_devices;
279 return 0;
280}
281
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700282void container_config_run_setfiles(struct container_config *c,
283 const char *setfiles_cmd)
284{
285 c->run_setfiles = setfiles_cmd;
286}
Dylan Reid837c74a2016-01-22 17:25:21 -0800287
Dylan Reid11456722016-05-02 11:24:50 -0700288const char *container_config_get_run_setfiles(const struct container_config *c)
289{
290 return c->run_setfiles;
291}
292
Dylan Reid837c74a2016-01-22 17:25:21 -0800293/*
294 * Container manipulation
295 */
296struct container {
Dylan Reid837c74a2016-01-22 17:25:21 -0800297 struct container_cgroup *cgroup;
298 struct minijail *jail;
299 pid_t init_pid;
300 char *runfs;
301 char *rundir;
302 char *runfsroot;
303 char *pid_file_path;
Dylan Reide040c6b2016-05-02 18:49:02 -0700304 char **ext_mounts; /* Mounts made outside of the minijail */
305 size_t num_ext_mounts;
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700306 char *name;
Dylan Reid837c74a2016-01-22 17:25:21 -0800307};
308
309struct container *container_new(const char *name,
Dylan Reide040c6b2016-05-02 18:49:02 -0700310 const char *rundir)
Dylan Reid837c74a2016-01-22 17:25:21 -0800311{
312 struct container *c;
313
Dylan Reid837c74a2016-01-22 17:25:21 -0800314 c = calloc(1, sizeof(*c));
Dylan Reidb435c682016-04-12 04:17:49 -0700315 if (!c)
316 return NULL;
Dylan Reid837c74a2016-01-22 17:25:21 -0800317 c->cgroup = container_cgroup_new(name, "/sys/fs/cgroup");
318 c->rundir = strdup(rundir);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700319 c->name = strdup(name);
320 if (!c->cgroup || !c->rundir || !c->name) {
Dylan Reid684975e2016-05-02 15:44:47 -0700321 container_destroy(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800322 return NULL;
Dylan Reidb435c682016-04-12 04:17:49 -0700323 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800324 return c;
325}
326
327void container_destroy(struct container *c)
328{
Dylan Reid684975e2016-05-02 15:44:47 -0700329 if (c->cgroup)
330 container_cgroup_destroy(c->cgroup);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700331 if (c->jail)
332 minijail_destroy(c->jail);
333 free(c->name);
Dylan Reid837c74a2016-01-22 17:25:21 -0800334 free(c->rundir);
335 free(c);
336}
337
338static int make_dir(const char *path, int uid, int gid, int mode)
339{
340 if (mkdir(path, mode))
341 return -errno;
342 if (chmod(path, mode))
343 return -errno;
344 if (chown(path, uid, gid))
345 return -errno;
346 return 0;
347}
348
349static int touch_file(const char *path, int uid, int gid, int mode)
350{
351 int rc;
352 int fd = open(path, O_RDWR | O_CREAT, mode);
353 if (fd < 0)
354 return -errno;
355 rc = fchown(fd, uid, gid);
356 close(fd);
357
358 if (rc)
359 return -errno;
360 return 0;
361}
362
363/* Make sure the mount target exists in the new rootfs. Create if needed and
364 * possible.
365 */
366static int setup_mount_destination(const struct container_mount *mnt,
Dylan Reid2149be92016-04-28 18:38:57 -0700367 const char *source,
Dylan Reid837c74a2016-01-22 17:25:21 -0800368 const char *dest)
369{
370 int rc;
371 struct stat st_buf;
372
373 rc = stat(dest, &st_buf);
374 if (rc == 0) /* destination exists */
375 return 0;
376
377 /* Try to create the destination. Either make directory or touch a file
378 * depending on the source type.
379 */
Dylan Reid2149be92016-04-28 18:38:57 -0700380 rc = stat(source, &st_buf);
Dylan Reid837c74a2016-01-22 17:25:21 -0800381 if (rc || S_ISDIR(st_buf.st_mode) || S_ISBLK(st_buf.st_mode))
382 return make_dir(dest, mnt->uid, mnt->gid, mnt->mode);
383
384 return touch_file(dest, mnt->uid, mnt->gid, mnt->mode);
385}
386
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700387/* Fork and exec the setfiles command to configure the selinux policy. */
Dylan Reide040c6b2016-05-02 18:49:02 -0700388static int run_setfiles_command(const struct container *c,
389 const struct container_config *config,
390 const char *dest)
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700391{
392 int rc;
393 int status;
394 int pid;
395 char *context_path;
396
Dylan Reide040c6b2016-05-02 18:49:02 -0700397 if (!config->run_setfiles)
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700398 return 0;
399
Dylan Reidb3621832016-03-24 10:24:57 -0700400 /* Really gross hack to avoid setfiles on /data, this should be removed
401 * when data isn't under /home/chronos/user where we can't access it as
402 * the android user.
403 * TODO(b/28705740) - Fix permission to the data directory.
404 */
405 if (strlen(dest) >= 5 && !strcmp(&dest[strlen(dest) - 5], "/data"))
406 return 0;
407
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700408 if (asprintf(&context_path, "%s/file_contexts",
409 c->runfsroot) < 0)
410 return -errno;
411
412 pid = fork();
413 if (pid == 0) {
414 const char *argv[] = {
Dylan Reide040c6b2016-05-02 18:49:02 -0700415 config->run_setfiles,
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700416 "-r",
417 c->runfsroot,
418 context_path,
419 dest,
420 NULL,
421 };
422 const char *env[] = {
423 NULL,
424 };
425
426 execve(argv[0], (char *const*)argv, (char *const*)env);
427
428 /* Command failed to exec if execve returns. */
429 _exit(-errno);
430 }
431 free(context_path);
432 if (pid < 0)
433 return -errno;
434 do {
435 rc = waitpid(pid, &status, 0);
436 } while (rc == -1 && errno == EINTR);
437 if (rc < 0)
438 return -errno;
439 return status;
440}
441
Dylan Reide040c6b2016-05-02 18:49:02 -0700442/*
443 * Unmounts anything we mounted in this mount namespace in the opposite order
444 * that they were mounted.
445 */
446static int unmount_external_mounts(struct container *c)
447{
448 int ret = 0;
449
450 while (c->num_ext_mounts) {
451 c->num_ext_mounts--;
452 if (umount(c->ext_mounts[c->num_ext_mounts]))
453 ret = -errno;
454 free(c->ext_mounts[c->num_ext_mounts]);
455 }
456 free(c->ext_mounts);
457 return ret;
458}
459
460static int do_container_mounts(struct container *c,
461 const struct container_config *config)
Dylan Reid7daf9982016-04-28 16:55:42 -0700462{
463 unsigned int i;
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700464 int rc = 0;
Dylan Reid2149be92016-04-28 18:38:57 -0700465 char *source;
466 char *dest;
Dylan Reid7daf9982016-04-28 16:55:42 -0700467
Dylan Reide040c6b2016-05-02 18:49:02 -0700468 /*
469 * Allocate space to track anything we mount in our mount namespace.
470 * This over-allocates as it has space for all mounts.
471 */
472 c->ext_mounts = calloc(config->num_mounts, sizeof(*c->ext_mounts));
473 if (!c->ext_mounts)
474 return -errno;
475
476 for (i = 0; i < config->num_mounts; ++i) {
477 const struct container_mount *mnt = &config->mounts[i];
Dylan Reid7daf9982016-04-28 16:55:42 -0700478
Dylan Reid2149be92016-04-28 18:38:57 -0700479 source = NULL;
480 dest = NULL;
Dylan Reid7daf9982016-04-28 16:55:42 -0700481 if (asprintf(&dest, "%s%s", c->runfsroot, mnt->destination) < 0)
482 return -errno;
483
Dylan Reid2149be92016-04-28 18:38:57 -0700484 /*
485 * If it's a bind mount relative to rootfs, append source to
486 * rootfs path, otherwise source path is absolute.
487 */
488 if ((mnt->flags & MS_BIND) && mnt->source[0] != '/') {
489 if (asprintf(&source, "%s/%s", c->runfsroot,
490 mnt->source) < 0)
491 goto error_free_return;
492 } else {
493 if (asprintf(&source, "%s", mnt->source) < 0)
494 goto error_free_return;
495 }
496
Dylan Reid7daf9982016-04-28 16:55:42 -0700497 if (mnt->create) {
Dylan Reid2149be92016-04-28 18:38:57 -0700498 if (setup_mount_destination(mnt, source, dest))
499 goto error_free_return;
Dylan Reid7daf9982016-04-28 16:55:42 -0700500 }
501 if (mnt->mount_in_ns) {
Dylan Reid2149be92016-04-28 18:38:57 -0700502 /* We can mount this with minijail. */
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700503 rc = minijail_mount(c->jail, source, mnt->destination,
504 mnt->type, mnt->flags);
505 if (rc)
Dylan Reid2149be92016-04-28 18:38:57 -0700506 goto error_free_return;
Dylan Reid7daf9982016-04-28 16:55:42 -0700507 } else {
Dylan Reidb3621832016-03-24 10:24:57 -0700508 /* Mount this externally and unmount it on exit. */
509 if (mount(source, dest, mnt->type, mnt->flags,
510 mnt->data))
Dylan Reid2149be92016-04-28 18:38:57 -0700511 goto error_free_return;
Dylan Reide040c6b2016-05-02 18:49:02 -0700512 /* Save this to unmount when shutting down. */
513 c->ext_mounts[c->num_ext_mounts] = strdup(dest);
514 c->num_ext_mounts++;
Dylan Reid7daf9982016-04-28 16:55:42 -0700515 }
Dylan Reid2149be92016-04-28 18:38:57 -0700516 free(source);
Dylan Reid7daf9982016-04-28 16:55:42 -0700517 free(dest);
518 }
519 return 0;
Dylan Reid2149be92016-04-28 18:38:57 -0700520
521error_free_return:
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700522 if (!rc)
523 rc = -errno;
Dylan Reid2149be92016-04-28 18:38:57 -0700524 free(dest);
525 free(source);
Dylan Reide040c6b2016-05-02 18:49:02 -0700526 unmount_external_mounts(c);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700527 return rc;
Dylan Reid7daf9982016-04-28 16:55:42 -0700528}
529
Dylan Reide040c6b2016-05-02 18:49:02 -0700530int container_start(struct container *c, const struct container_config *config)
Dylan Reid837c74a2016-01-22 17:25:21 -0800531{
Dylan Reidb3621832016-03-24 10:24:57 -0700532 static const mode_t root_dir_mode = 0660;
Luis Hector Chavez945af482016-06-03 08:39:34 -0700533 int rc = 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800534 unsigned int i;
Dylan Reide040c6b2016-05-02 18:49:02 -0700535 const char *rootfs = config->rootfs;
Dylan Reid837c74a2016-01-22 17:25:21 -0800536 char *runfs_template;
537
Dylan Reide040c6b2016-05-02 18:49:02 -0700538 if (!config)
539 return -EINVAL;
540 if (!config->program_argv || !config->program_argv[0])
541 return -EINVAL;
542
Dylan Reid837c74a2016-01-22 17:25:21 -0800543 if (asprintf(&runfs_template, "%s/%s_XXXXXX", c->rundir, c->name) < 0)
544 return -errno;
545
546 c->runfs = mkdtemp(runfs_template);
547 if (!c->runfs) {
548 free(runfs_template);
549 return -errno;
550 }
Dylan Reidb3621832016-03-24 10:24:57 -0700551 /* Make sure the container uid can access the rootfs. */
Luis Hector Chavez945af482016-06-03 08:39:34 -0700552 if (chmod(c->runfs, 0755))
Dylan Reidb3621832016-03-24 10:24:57 -0700553 goto error_rmdir;
554
Luis Hector Chavez945af482016-06-03 08:39:34 -0700555 if (asprintf(&c->runfsroot, "%s/root", c->runfs) < 0)
Dylan Reid837c74a2016-01-22 17:25:21 -0800556 goto error_rmdir;
557
Luis Hector Chavez945af482016-06-03 08:39:34 -0700558 if (mkdir(c->runfsroot, root_dir_mode))
559 goto error_rmdir;
560 if (chmod(c->runfsroot, root_dir_mode))
561 goto error_rmdir;
562
563 if (mount(rootfs, c->runfsroot, "", MS_BIND | MS_RDONLY, NULL))
Dylan Reid837c74a2016-01-22 17:25:21 -0800564 goto error_rmdir;
565
566 c->jail = minijail_new();
Luis Hector Chavez945af482016-06-03 08:39:34 -0700567 if (!c->jail) {
568 rc = -ENOMEM;
569 goto error_rmdir;
570 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800571
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700572 rc = do_container_mounts(c, config);
573 if (rc)
Dylan Reid7daf9982016-04-28 16:55:42 -0700574 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -0800575
576 c->cgroup->ops->deny_all_devices(c->cgroup);
577
Dylan Reide040c6b2016-05-02 18:49:02 -0700578 for (i = 0; i < config->num_devices; i++) {
579 const struct container_device *dev = &config->devices[i];
Dylan Reid837c74a2016-01-22 17:25:21 -0800580 int mode;
Dylan Reid355d5e42016-04-29 16:53:31 -0700581 int minor = dev->minor;
Dylan Reid837c74a2016-01-22 17:25:21 -0800582
583 switch (dev->type) {
584 case 'b':
585 mode = S_IFBLK;
586 break;
587 case 'c':
588 mode = S_IFCHR;
589 break;
590 default:
Luis Hector Chavez945af482016-06-03 08:39:34 -0700591 rc = -EINVAL;
Dylan Reid837c74a2016-01-22 17:25:21 -0800592 goto error_rmdir;
593 }
594 mode |= dev->fs_permissions;
595
Dylan Reid355d5e42016-04-29 16:53:31 -0700596 if (dev->copy_minor) {
597 struct stat st_buff;
598 if (stat(dev->path, &st_buff) < 0)
599 goto error_rmdir;
600 /* Use the minor macro to extract the device number. */
601 minor = minor(st_buff.st_rdev);
602 }
603 if (minor >= 0) {
604 char *path;
605
606 if (asprintf(&path, "%s%s", c->runfsroot, dev->path) < 0)
607 goto error_rmdir;
Luis Hector Chavez945af482016-06-03 08:39:34 -0700608 if (mknod(path, mode, makedev(dev->major, minor)) && errno != EEXIST) {
Dylan Reid837c74a2016-01-22 17:25:21 -0800609 free(path);
610 goto error_rmdir;
611 }
Luis Hector Chavez945af482016-06-03 08:39:34 -0700612 if (chown(path, dev->uid, dev->gid)) {
Dylan Reid837c74a2016-01-22 17:25:21 -0800613 free(path);
614 goto error_rmdir;
615 }
Luis Hector Chavez945af482016-06-03 08:39:34 -0700616 if (chmod(path, dev->fs_permissions)) {
617 free(path);
618 goto error_rmdir;
619 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800620 free(path);
Dylan Reid837c74a2016-01-22 17:25:21 -0800621 }
622
623 rc = c->cgroup->ops->add_device(c->cgroup, dev->major,
Dylan Reid355d5e42016-04-29 16:53:31 -0700624 minor, dev->read_allowed,
Dylan Reid837c74a2016-01-22 17:25:21 -0800625 dev->write_allowed,
626 dev->modify_allowed, dev->type);
627 if (rc)
628 goto error_rmdir;
629 }
630
Dylan Reidd7229582016-04-27 17:08:40 -0700631 /* Potentailly run setfiles on mounts configured outside of the jail */
Dylan Reide040c6b2016-05-02 18:49:02 -0700632 for (i = 0; i < config->num_mounts; i++) {
633 const struct container_mount *mnt = &config->mounts[i];
Dylan Reidd7229582016-04-27 17:08:40 -0700634 char *dest;
635
636 if (mnt->mount_in_ns)
637 continue;
638 if (asprintf(&dest, "%s%s", c->runfsroot, mnt->destination) < 0)
639 goto error_rmdir;
Dylan Reide040c6b2016-05-02 18:49:02 -0700640 rc = run_setfiles_command(c, config, dest);
Dylan Reidd7229582016-04-27 17:08:40 -0700641 free(dest);
642 if (rc)
643 goto error_rmdir;
644 }
645
Dylan Reid837c74a2016-01-22 17:25:21 -0800646 /* Setup and start the container with libminijail. */
647 if (asprintf(&c->pid_file_path, "%s/container.pid", c->runfs) < 0)
648 goto error_rmdir;
649 minijail_write_pid_file(c->jail, c->pid_file_path);
650 minijail_reset_signal_mask(c->jail);
651
652 /* Setup container namespaces. */
653 minijail_namespace_ipc(c->jail);
654 minijail_namespace_vfs(c->jail);
655 minijail_namespace_net(c->jail);
656 minijail_namespace_pids(c->jail);
Dylan Reid837c74a2016-01-22 17:25:21 -0800657 minijail_namespace_user(c->jail);
Dylan Reide040c6b2016-05-02 18:49:02 -0700658 rc = minijail_uidmap(c->jail, config->uid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800659 if (rc)
660 goto error_rmdir;
Dylan Reide040c6b2016-05-02 18:49:02 -0700661 rc = minijail_gidmap(c->jail, config->gid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800662 if (rc)
663 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -0800664
665 rc = minijail_enter_pivot_root(c->jail, c->runfsroot);
666 if (rc)
667 goto error_rmdir;
668
669 /* Add the cgroups configured above. */
670 rc = minijail_add_to_cgroup(c->jail, cgroup_cpu_tasks_path(c->cgroup));
671 if (rc)
672 goto error_rmdir;
673 rc = minijail_add_to_cgroup(c->jail,
674 cgroup_cpuacct_tasks_path(c->cgroup));
675 if (rc)
676 goto error_rmdir;
677 rc = minijail_add_to_cgroup(c->jail,
678 cgroup_devices_tasks_path(c->cgroup));
679 if (rc)
680 goto error_rmdir;
681 rc = minijail_add_to_cgroup(c->jail,
682 cgroup_freezer_tasks_path(c->cgroup));
683 if (rc)
684 goto error_rmdir;
685
Dylan Reide040c6b2016-05-02 18:49:02 -0700686 if (config->alt_syscall_table)
687 minijail_use_alt_syscall(c->jail, config->alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -0800688
689 minijail_run_as_init(c->jail);
690
Dylan Reid3da683b2016-04-05 03:35:35 -0700691 /* TODO(dgreid) - remove this once shared mounts are cleaned up. */
692 minijail_skip_remount_private(c->jail);
693
Dylan Reid837c74a2016-01-22 17:25:21 -0800694 rc = minijail_run_pid_pipes_no_preload(c->jail,
Dylan Reide040c6b2016-05-02 18:49:02 -0700695 config->program_argv[0],
696 config->program_argv,
Dylan Reid837c74a2016-01-22 17:25:21 -0800697 &c->init_pid, NULL, NULL,
698 NULL);
699 if (rc)
700 goto error_rmdir;
701 return 0;
702
703error_rmdir:
Luis Hector Chavez945af482016-06-03 08:39:34 -0700704 if (!rc)
705 rc = -errno;
706 container_teardown(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800707 return rc;
708}
709
710const char *container_root(struct container *c)
711{
712 return c->runfs;
713}
714
715int container_pid(struct container *c)
716{
717 return c->init_pid;
718}
719
720static int container_teardown(struct container *c)
721{
Dylan Reid837c74a2016-01-22 17:25:21 -0800722 int ret = 0;
723
Dylan Reide040c6b2016-05-02 18:49:02 -0700724 unmount_external_mounts(c);
Luis Hector Chavez945af482016-06-03 08:39:34 -0700725 if (c->runfsroot) {
726 if (umount(c->runfsroot))
727 ret = -errno;
728 if (rmdir(c->runfsroot))
729 ret = -errno;
730 free(c->runfsroot);
731 c->runfsroot = NULL;
732 }
733 if (c->pid_file_path) {
734 if (unlink(c->pid_file_path))
735 ret = -errno;
736 free(c->pid_file_path);
737 c->pid_file_path = NULL;
738 }
739 if (c->runfs) {
740 if (rmdir(c->runfs))
741 ret = -errno;
742 free(c->runfs);
743 c->runfs = NULL;
744 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800745 return ret;
746}
747
748int container_wait(struct container *c)
749{
Dylan Reidcf745c52016-04-22 10:18:03 -0700750 int rc;
751
752 do {
753 rc = minijail_wait(c->jail);
Luis Hector Chavez4641e852016-06-02 15:40:19 -0700754 } while (rc == -EINTR);
Dylan Reidcf745c52016-04-22 10:18:03 -0700755
Luis Hector Chavez945af482016-06-03 08:39:34 -0700756 // If the process had already been reaped, still perform teardown.
757 if (rc == -ECHILD || rc >= 0) {
Dylan Reidcf745c52016-04-22 10:18:03 -0700758 rc = container_teardown(c);
Luis Hector Chavez945af482016-06-03 08:39:34 -0700759 }
Dylan Reidcf745c52016-04-22 10:18:03 -0700760 return rc;
Dylan Reid837c74a2016-01-22 17:25:21 -0800761}
762
763int container_kill(struct container *c)
764{
Luis Hector Chavez945af482016-06-03 08:39:34 -0700765 if (kill(c->init_pid, SIGKILL) && errno != ESRCH)
Dylan Reid837c74a2016-01-22 17:25:21 -0800766 return -errno;
767 return container_wait(c);
768}