blob: fa37f44087bc452c800e2ff24855711113776561 [file] [log] [blame]
Dylan Reid837c74a2016-01-22 17:25:21 -08001/* Copyright 2016 The Chromium OS Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file.
4 */
5
6#define _GNU_SOURCE /* For asprintf */
7
8#include <errno.h>
9#include <fcntl.h>
10#include <malloc.h>
11#include <signal.h>
12#include <stdio.h>
13#include <stdlib.h>
14#include <string.h>
15#include <sys/mount.h>
16#include <sys/stat.h>
17#include <sys/types.h>
Dylan Reid2bd9ea92016-04-07 20:57:47 -070018#include <sys/wait.h>
Dylan Reid837c74a2016-01-22 17:25:21 -080019#include <unistd.h>
20
21#include "container_cgroup.h"
22#include "libcontainer.h"
23#include "libminijail.h"
24
Luis Hector Chavez479b95f2016-06-06 08:01:05 -070025#define FREE_AND_NULL(ptr) \
26do { \
27 free(ptr); \
28 ptr = NULL; \
29} while(0)
30
Luis Hector Chavez945af482016-06-03 08:39:34 -070031static int container_teardown(struct container *c);
32
Luis Hector Chavez479b95f2016-06-06 08:01:05 -070033static int strdup_and_free(char **dest, const char *src)
34{
35 char *copy = strdup(src);
36 if (!copy)
37 return -ENOMEM;
38 if (*dest)
39 free(*dest);
40 *dest = copy;
41 return 0;
42}
43
Dylan Reid837c74a2016-01-22 17:25:21 -080044struct container_mount {
45 char *name;
46 char *source;
47 char *destination;
48 char *type;
49 char *data;
50 int flags;
51 int uid;
52 int gid;
53 int mode;
54 int mount_in_ns; /* True if mount should happen in new vfs ns */
55 int create; /* True if target should be created if it doesn't exist */
56};
57
58struct container_device {
59 char type; /* 'c' or 'b' for char or block */
60 char *path;
61 int fs_permissions;
62 int major;
63 int minor;
Dylan Reid355d5e42016-04-29 16:53:31 -070064 int copy_minor; /* Copy the minor from existing node, ignores |minor| */
Dylan Reid837c74a2016-01-22 17:25:21 -080065 int uid;
66 int gid;
67 int read_allowed;
68 int write_allowed;
69 int modify_allowed;
70};
71
72/*
73 * Structure that configures how the container is run.
74 *
75 * rootfs - Path to the root of the container's filesystem.
76 * program_argv - The program to run and args, e.g. "/sbin/init".
77 * num_args - Number of args in program_argv.
Dylan Reid1874feb2016-06-22 17:53:50 -070078 * uid - The uid the container will run as.
Dylan Reid837c74a2016-01-22 17:25:21 -080079 * uid_map - Mapping of UIDs in the container, e.g. "0 100000 1024"
Dylan Reid1874feb2016-06-22 17:53:50 -070080 * gid - The gid the container will run as.
Dylan Reid837c74a2016-01-22 17:25:21 -080081 * gid_map - Mapping of GIDs in the container, e.g. "0 100000 1024"
82 * alt_syscall_table - Syscall table to use or NULL if none.
83 * mounts - Filesystems to mount in the new namespace.
84 * num_mounts - Number of above.
85 * devices - Device nodes to create.
86 * num_devices - Number of above.
Dylan Reid2bd9ea92016-04-07 20:57:47 -070087 * run_setfiles - Should run setfiles on mounts to enable selinux.
Dylan Reid837c74a2016-01-22 17:25:21 -080088 */
89struct container_config {
90 char *rootfs;
91 char **program_argv;
92 size_t num_args;
Dylan Reid1874feb2016-06-22 17:53:50 -070093 uid_t uid;
Dylan Reid837c74a2016-01-22 17:25:21 -080094 char *uid_map;
Dylan Reid1874feb2016-06-22 17:53:50 -070095 gid_t gid;
Dylan Reid837c74a2016-01-22 17:25:21 -080096 char *gid_map;
97 char *alt_syscall_table;
98 struct container_mount *mounts;
99 size_t num_mounts;
100 struct container_device *devices;
101 size_t num_devices;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700102 char *run_setfiles;
Dylan Reid837c74a2016-01-22 17:25:21 -0800103};
104
105struct container_config *container_config_create()
106{
107 return calloc(1, sizeof(struct container_config));
108}
109
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700110static void container_free_program_args(struct container_config *c)
111{
112 int i;
113
114 if (!c->program_argv)
115 return;
116 for (i = 0; i < c->num_args; ++i) {
117 FREE_AND_NULL(c->program_argv[i]);
118 }
119 FREE_AND_NULL(c->program_argv);
120}
121
122static void container_config_free_mount(struct container_mount *mount)
123{
124 FREE_AND_NULL(mount->name);
125 FREE_AND_NULL(mount->source);
126 FREE_AND_NULL(mount->destination);
127 FREE_AND_NULL(mount->type);
128 FREE_AND_NULL(mount->data);
129}
130
131static void container_config_free_device(struct container_device *device)
132{
133 FREE_AND_NULL(device->path);
134}
135
Dylan Reid837c74a2016-01-22 17:25:21 -0800136void container_config_destroy(struct container_config *c)
137{
138 size_t i;
139
140 if (c == NULL)
141 return;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700142 FREE_AND_NULL(c->rootfs);
143 container_free_program_args(c);
144 FREE_AND_NULL(c->uid_map);
145 FREE_AND_NULL(c->gid_map);
146 FREE_AND_NULL(c->alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -0800147 for (i = 0; i < c->num_mounts; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700148 container_config_free_mount(&c->mounts[i]);
Dylan Reid837c74a2016-01-22 17:25:21 -0800149 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700150 FREE_AND_NULL(c->mounts);
Dylan Reid837c74a2016-01-22 17:25:21 -0800151 for (i = 0; i < c->num_devices; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700152 container_config_free_device(&c->devices[i]);
Dylan Reid837c74a2016-01-22 17:25:21 -0800153 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700154 FREE_AND_NULL(c->devices);
155 FREE_AND_NULL(c->run_setfiles);
156 FREE_AND_NULL(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800157}
158
159int container_config_rootfs(struct container_config *c, const char *rootfs)
160{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700161 return strdup_and_free(&c->rootfs, rootfs);
Dylan Reid837c74a2016-01-22 17:25:21 -0800162}
163
Dylan Reid11456722016-05-02 11:24:50 -0700164const char *container_config_get_rootfs(const struct container_config *c)
165{
166 return c->rootfs;
167}
168
Dylan Reid837c74a2016-01-22 17:25:21 -0800169int container_config_program_argv(struct container_config *c,
170 char **argv, size_t num_args)
171{
172 size_t i;
173
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700174 container_free_program_args(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800175 c->num_args = num_args;
176 c->program_argv = calloc(num_args + 1, sizeof(char *));
177 if (!c->program_argv)
178 return -ENOMEM;
179 for (i = 0; i < num_args; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700180 if (strdup_and_free(&c->program_argv[i], argv[i]))
181 goto error_free_return;
Dylan Reid837c74a2016-01-22 17:25:21 -0800182 }
183 c->program_argv[num_args] = NULL;
184 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700185
186error_free_return:
187 container_free_program_args(c);
188 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800189}
190
Dylan Reid11456722016-05-02 11:24:50 -0700191size_t container_config_get_num_program_args(const struct container_config *c)
192{
193 return c->num_args;
194}
195
196const char *container_config_get_program_arg(const struct container_config *c,
197 size_t index)
198{
199 if (index >= c->num_args)
200 return NULL;
201 return c->program_argv[index];
202}
203
Dylan Reid1874feb2016-06-22 17:53:50 -0700204void container_config_uid(struct container_config *c, uid_t uid)
205{
206 c->uid = uid;
207}
208
209uid_t container_config_get_uid(const struct container_config *c)
210{
211 return c->uid;
212}
213
Dylan Reid837c74a2016-01-22 17:25:21 -0800214int container_config_uid_map(struct container_config *c, const char *uid_map)
215{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700216 return strdup_and_free(&c->uid_map, uid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800217}
218
Dylan Reid1874feb2016-06-22 17:53:50 -0700219void container_config_gid(struct container_config *c, gid_t gid)
220{
221 c->gid = gid;
222}
223
224gid_t container_config_get_gid(const struct container_config *c)
225{
226 return c->gid;
227}
228
Dylan Reid837c74a2016-01-22 17:25:21 -0800229int container_config_gid_map(struct container_config *c, const char *gid_map)
230{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700231 return strdup_and_free(&c->gid_map, gid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800232}
233
234int container_config_alt_syscall_table(struct container_config *c,
235 const char *alt_syscall_table)
236{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700237 return strdup_and_free(&c->alt_syscall_table, alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -0800238}
239
240int container_config_add_mount(struct container_config *c,
241 const char *name,
242 const char *source,
243 const char *destination,
244 const char *type,
245 const char *data,
246 int flags,
247 int uid,
248 int gid,
249 int mode,
250 int mount_in_ns,
251 int create)
252{
253 struct container_mount *mount_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700254 struct container_mount *current_mount;
Dylan Reid837c74a2016-01-22 17:25:21 -0800255
256 if (name == NULL || source == NULL ||
257 destination == NULL || type == NULL)
258 return -EINVAL;
259
260 mount_ptr = realloc(c->mounts,
261 sizeof(c->mounts[0]) * (c->num_mounts + 1));
262 if (!mount_ptr)
263 return -ENOMEM;
264 c->mounts = mount_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700265 current_mount = &c->mounts[c->num_mounts];
266 memset(current_mount, 0, sizeof(struct container_mount));
267
268 if (strdup_and_free(&current_mount->name, name))
269 goto error_free_return;
270 if (strdup_and_free(&current_mount->source, source))
271 goto error_free_return;
272 if (strdup_and_free(&current_mount->destination, destination))
273 goto error_free_return;
274 if (strdup_and_free(&current_mount->type, type))
275 goto error_free_return;
276 if (data && strdup_and_free(&current_mount->data, data))
277 goto error_free_return;
278 current_mount->flags = flags;
279 current_mount->uid = uid;
280 current_mount->gid = gid;
281 current_mount->mode = mode;
282 current_mount->mount_in_ns = mount_in_ns;
283 current_mount->create = create;
Dylan Reid837c74a2016-01-22 17:25:21 -0800284 ++c->num_mounts;
285 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700286
287error_free_return:
288 container_config_free_mount(current_mount);
289 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800290}
291
292int container_config_add_device(struct container_config *c,
293 char type,
294 const char *path,
295 int fs_permissions,
296 int major,
297 int minor,
Dylan Reid355d5e42016-04-29 16:53:31 -0700298 int copy_minor,
Dylan Reid837c74a2016-01-22 17:25:21 -0800299 int uid,
300 int gid,
301 int read_allowed,
302 int write_allowed,
303 int modify_allowed)
304{
305 struct container_device *dev_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700306 struct container_device *current_dev;
Dylan Reid837c74a2016-01-22 17:25:21 -0800307
308 if (path == NULL)
309 return -EINVAL;
Dylan Reid355d5e42016-04-29 16:53:31 -0700310 /* If using a dynamic minor number, ensure that minor is -1. */
311 if (copy_minor && (minor != -1))
312 return -EINVAL;
313
Dylan Reid837c74a2016-01-22 17:25:21 -0800314 dev_ptr = realloc(c->devices,
315 sizeof(c->devices[0]) * (c->num_devices + 1));
316 if (!dev_ptr)
317 return -ENOMEM;
318 c->devices = dev_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700319 current_dev = &c->devices[c->num_devices];
320 memset(current_dev, 0, sizeof(struct container_device));
321
322 current_dev->type = type;
323 if (strdup_and_free(&current_dev->path, path))
324 goto error_free_return;
325 current_dev->fs_permissions = fs_permissions;
326 current_dev->major = major;
327 current_dev->minor = minor;
328 current_dev->copy_minor = copy_minor;
329 current_dev->uid = uid;
330 current_dev->gid = gid;
331 current_dev->read_allowed = read_allowed;
332 current_dev->write_allowed = write_allowed;
333 current_dev->modify_allowed = modify_allowed;
Dylan Reid837c74a2016-01-22 17:25:21 -0800334 ++c->num_devices;
335 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700336
337error_free_return:
338 container_config_free_device(current_dev);
339 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800340}
341
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700342int container_config_run_setfiles(struct container_config *c,
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700343 const char *setfiles_cmd)
344{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700345 return strdup_and_free(&c->run_setfiles, setfiles_cmd);
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700346}
Dylan Reid837c74a2016-01-22 17:25:21 -0800347
Dylan Reid11456722016-05-02 11:24:50 -0700348const char *container_config_get_run_setfiles(const struct container_config *c)
349{
350 return c->run_setfiles;
351}
352
Dylan Reid837c74a2016-01-22 17:25:21 -0800353/*
354 * Container manipulation
355 */
356struct container {
Dylan Reid837c74a2016-01-22 17:25:21 -0800357 struct container_cgroup *cgroup;
358 struct minijail *jail;
359 pid_t init_pid;
360 char *runfs;
361 char *rundir;
362 char *runfsroot;
363 char *pid_file_path;
Dylan Reide040c6b2016-05-02 18:49:02 -0700364 char **ext_mounts; /* Mounts made outside of the minijail */
365 size_t num_ext_mounts;
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700366 char *name;
Dylan Reid837c74a2016-01-22 17:25:21 -0800367};
368
369struct container *container_new(const char *name,
Dylan Reide040c6b2016-05-02 18:49:02 -0700370 const char *rundir)
Dylan Reid837c74a2016-01-22 17:25:21 -0800371{
372 struct container *c;
373
Dylan Reid837c74a2016-01-22 17:25:21 -0800374 c = calloc(1, sizeof(*c));
Dylan Reidb435c682016-04-12 04:17:49 -0700375 if (!c)
376 return NULL;
Dylan Reid837c74a2016-01-22 17:25:21 -0800377 c->cgroup = container_cgroup_new(name, "/sys/fs/cgroup");
378 c->rundir = strdup(rundir);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700379 c->name = strdup(name);
380 if (!c->cgroup || !c->rundir || !c->name) {
Dylan Reid684975e2016-05-02 15:44:47 -0700381 container_destroy(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800382 return NULL;
Dylan Reidb435c682016-04-12 04:17:49 -0700383 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800384 return c;
385}
386
387void container_destroy(struct container *c)
388{
Dylan Reid684975e2016-05-02 15:44:47 -0700389 if (c->cgroup)
390 container_cgroup_destroy(c->cgroup);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700391 if (c->jail)
392 minijail_destroy(c->jail);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700393 FREE_AND_NULL(c->name);
394 FREE_AND_NULL(c->rundir);
395 FREE_AND_NULL(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800396}
397
398static int make_dir(const char *path, int uid, int gid, int mode)
399{
400 if (mkdir(path, mode))
401 return -errno;
402 if (chmod(path, mode))
403 return -errno;
404 if (chown(path, uid, gid))
405 return -errno;
406 return 0;
407}
408
409static int touch_file(const char *path, int uid, int gid, int mode)
410{
411 int rc;
412 int fd = open(path, O_RDWR | O_CREAT, mode);
413 if (fd < 0)
414 return -errno;
415 rc = fchown(fd, uid, gid);
416 close(fd);
417
418 if (rc)
419 return -errno;
420 return 0;
421}
422
423/* Make sure the mount target exists in the new rootfs. Create if needed and
424 * possible.
425 */
426static int setup_mount_destination(const struct container_mount *mnt,
Dylan Reid2149be92016-04-28 18:38:57 -0700427 const char *source,
Dylan Reid837c74a2016-01-22 17:25:21 -0800428 const char *dest)
429{
430 int rc;
431 struct stat st_buf;
432
433 rc = stat(dest, &st_buf);
434 if (rc == 0) /* destination exists */
435 return 0;
436
437 /* Try to create the destination. Either make directory or touch a file
438 * depending on the source type.
439 */
Dylan Reid2149be92016-04-28 18:38:57 -0700440 rc = stat(source, &st_buf);
Dylan Reid837c74a2016-01-22 17:25:21 -0800441 if (rc || S_ISDIR(st_buf.st_mode) || S_ISBLK(st_buf.st_mode))
442 return make_dir(dest, mnt->uid, mnt->gid, mnt->mode);
443
444 return touch_file(dest, mnt->uid, mnt->gid, mnt->mode);
445}
446
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700447/* Fork and exec the setfiles command to configure the selinux policy. */
Dylan Reide040c6b2016-05-02 18:49:02 -0700448static int run_setfiles_command(const struct container *c,
449 const struct container_config *config,
450 const char *dest)
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700451{
452 int rc;
453 int status;
454 int pid;
455 char *context_path;
456
Dylan Reide040c6b2016-05-02 18:49:02 -0700457 if (!config->run_setfiles)
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700458 return 0;
459
Dylan Reidb3621832016-03-24 10:24:57 -0700460 /* Really gross hack to avoid setfiles on /data, this should be removed
461 * when data isn't under /home/chronos/user where we can't access it as
462 * the android user.
463 * TODO(b/28705740) - Fix permission to the data directory.
464 */
465 if (strlen(dest) >= 5 && !strcmp(&dest[strlen(dest) - 5], "/data"))
466 return 0;
467
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700468 if (asprintf(&context_path, "%s/file_contexts",
469 c->runfsroot) < 0)
470 return -errno;
471
472 pid = fork();
473 if (pid == 0) {
474 const char *argv[] = {
Dylan Reide040c6b2016-05-02 18:49:02 -0700475 config->run_setfiles,
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700476 "-r",
477 c->runfsroot,
478 context_path,
479 dest,
480 NULL,
481 };
482 const char *env[] = {
483 NULL,
484 };
485
486 execve(argv[0], (char *const*)argv, (char *const*)env);
487
488 /* Command failed to exec if execve returns. */
489 _exit(-errno);
490 }
491 free(context_path);
492 if (pid < 0)
493 return -errno;
494 do {
495 rc = waitpid(pid, &status, 0);
496 } while (rc == -1 && errno == EINTR);
497 if (rc < 0)
498 return -errno;
499 return status;
500}
501
Dylan Reide040c6b2016-05-02 18:49:02 -0700502/*
503 * Unmounts anything we mounted in this mount namespace in the opposite order
504 * that they were mounted.
505 */
506static int unmount_external_mounts(struct container *c)
507{
508 int ret = 0;
509
510 while (c->num_ext_mounts) {
511 c->num_ext_mounts--;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700512 if (!c->ext_mounts[c->num_ext_mounts])
513 continue;
Dylan Reide040c6b2016-05-02 18:49:02 -0700514 if (umount(c->ext_mounts[c->num_ext_mounts]))
515 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700516 FREE_AND_NULL(c->ext_mounts[c->num_ext_mounts]);
Dylan Reide040c6b2016-05-02 18:49:02 -0700517 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700518 FREE_AND_NULL(c->ext_mounts);
Dylan Reide040c6b2016-05-02 18:49:02 -0700519 return ret;
520}
521
Luis Hector Chavez3341ed62016-06-06 08:04:04 -0700522static int do_container_mount(struct container *c,
523 const struct container_mount *mnt)
524{
525 char *source = NULL;
526 char *dest = NULL;
527 int rc = 0;
528
529 if (asprintf(&dest, "%s%s", c->runfsroot, mnt->destination) < 0)
530 return -errno;
531
532 /*
533 * If it's a bind mount relative to rootfs, append source to
534 * rootfs path, otherwise source path is absolute.
535 */
536 if ((mnt->flags & MS_BIND) && mnt->source[0] != '/') {
537 if (asprintf(&source, "%s/%s", c->runfsroot, mnt->source) < 0)
538 goto error_free_return;
539 } else {
540 if (asprintf(&source, "%s", mnt->source) < 0)
541 goto error_free_return;
542 }
543
544 if (mnt->create) {
545 rc = setup_mount_destination(mnt, source, dest);
546 if (rc)
547 goto error_free_return;
548 }
549 if (mnt->mount_in_ns) {
550 /* We can mount this with minijail. */
551 rc = minijail_mount(c->jail, source, mnt->destination,
552 mnt->type, mnt->flags);
553 if (rc)
554 goto error_free_return;
555 } else {
556 /* Mount this externally and unmount it on exit. */
557 if (mount(source, dest, mnt->type, mnt->flags,
558 mnt->data))
559 goto error_free_return;
560 /* Save this to unmount when shutting down. */
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700561 rc = strdup_and_free(&c->ext_mounts[c->num_ext_mounts], dest);
562 if (rc)
Luis Hector Chavez3341ed62016-06-06 08:04:04 -0700563 goto error_free_return;
564 c->num_ext_mounts++;
565 }
566
567 goto exit;
568
569error_free_return:
570 if (!rc)
571 rc = -errno;
572exit:
573 free(source);
574 free(dest);
575 return rc;
576}
577
Dylan Reide040c6b2016-05-02 18:49:02 -0700578static int do_container_mounts(struct container *c,
579 const struct container_config *config)
Dylan Reid7daf9982016-04-28 16:55:42 -0700580{
581 unsigned int i;
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700582 int rc = 0;
Dylan Reid7daf9982016-04-28 16:55:42 -0700583
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700584 unmount_external_mounts(c);
Dylan Reide040c6b2016-05-02 18:49:02 -0700585 /*
586 * Allocate space to track anything we mount in our mount namespace.
587 * This over-allocates as it has space for all mounts.
588 */
589 c->ext_mounts = calloc(config->num_mounts, sizeof(*c->ext_mounts));
590 if (!c->ext_mounts)
591 return -errno;
592
593 for (i = 0; i < config->num_mounts; ++i) {
Luis Hector Chavez3341ed62016-06-06 08:04:04 -0700594 rc = do_container_mount(c, &config->mounts[i]);
595 if (rc)
596 goto error_free_return;
Dylan Reid7daf9982016-04-28 16:55:42 -0700597 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700598
Dylan Reid7daf9982016-04-28 16:55:42 -0700599 return 0;
Dylan Reid2149be92016-04-28 18:38:57 -0700600
601error_free_return:
Dylan Reide040c6b2016-05-02 18:49:02 -0700602 unmount_external_mounts(c);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700603 return rc;
Dylan Reid7daf9982016-04-28 16:55:42 -0700604}
605
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700606static int container_create_device(const struct container *c,
607 const struct container_device *dev,
608 int minor)
609{
610 char *path = NULL;
611 int rc = 0;
612 int mode;
613
614 switch (dev->type) {
615 case 'b':
616 mode = S_IFBLK;
617 break;
618 case 'c':
619 mode = S_IFCHR;
620 break;
621 default:
622 return -EINVAL;
623 }
624 mode |= dev->fs_permissions;
625
626 if (asprintf(&path, "%s%s", c->runfsroot, dev->path) < 0)
627 goto error_free_return;
628 if (mknod(path, mode, makedev(dev->major, minor)) && errno != EEXIST)
629 goto error_free_return;
630 if (chown(path, dev->uid, dev->gid))
631 goto error_free_return;
632 if (chmod(path, dev->fs_permissions))
633 goto error_free_return;
634
635 goto exit;
636
637error_free_return:
638 rc = -errno;
639exit:
640 free(path);
641 return rc;
642}
643
Dylan Reide040c6b2016-05-02 18:49:02 -0700644int container_start(struct container *c, const struct container_config *config)
Dylan Reid837c74a2016-01-22 17:25:21 -0800645{
Dylan Reidb3621832016-03-24 10:24:57 -0700646 static const mode_t root_dir_mode = 0660;
Luis Hector Chavez945af482016-06-03 08:39:34 -0700647 int rc = 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800648 unsigned int i;
Dylan Reide040c6b2016-05-02 18:49:02 -0700649 const char *rootfs = config->rootfs;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700650 char *runfs_template = NULL;
Dylan Reid837c74a2016-01-22 17:25:21 -0800651
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700652 if (!c)
653 return -EINVAL;
Dylan Reide040c6b2016-05-02 18:49:02 -0700654 if (!config)
655 return -EINVAL;
656 if (!config->program_argv || !config->program_argv[0])
657 return -EINVAL;
658
Dylan Reid837c74a2016-01-22 17:25:21 -0800659 if (asprintf(&runfs_template, "%s/%s_XXXXXX", c->rundir, c->name) < 0)
660 return -errno;
661
662 c->runfs = mkdtemp(runfs_template);
663 if (!c->runfs) {
664 free(runfs_template);
665 return -errno;
666 }
Dylan Reidb3621832016-03-24 10:24:57 -0700667 /* Make sure the container uid can access the rootfs. */
Dylan Reid4c6af2e2016-06-22 18:04:24 -0700668 if (chmod(c->runfs, 0700))
Dylan Reidb3621832016-03-24 10:24:57 -0700669 goto error_rmdir;
Dylan Reid1874feb2016-06-22 17:53:50 -0700670 if (chown(c->runfs, config->uid, config->gid))
671 goto error_rmdir;
Dylan Reidb3621832016-03-24 10:24:57 -0700672
Luis Hector Chavez945af482016-06-03 08:39:34 -0700673 if (asprintf(&c->runfsroot, "%s/root", c->runfs) < 0)
Dylan Reid837c74a2016-01-22 17:25:21 -0800674 goto error_rmdir;
675
Luis Hector Chavez945af482016-06-03 08:39:34 -0700676 if (mkdir(c->runfsroot, root_dir_mode))
677 goto error_rmdir;
678 if (chmod(c->runfsroot, root_dir_mode))
679 goto error_rmdir;
680
681 if (mount(rootfs, c->runfsroot, "", MS_BIND | MS_RDONLY, NULL))
Dylan Reid837c74a2016-01-22 17:25:21 -0800682 goto error_rmdir;
683
684 c->jail = minijail_new();
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700685 if (!c->jail)
Luis Hector Chavez945af482016-06-03 08:39:34 -0700686 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -0800687
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700688 rc = do_container_mounts(c, config);
689 if (rc)
Dylan Reid7daf9982016-04-28 16:55:42 -0700690 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -0800691
692 c->cgroup->ops->deny_all_devices(c->cgroup);
693
Dylan Reide040c6b2016-05-02 18:49:02 -0700694 for (i = 0; i < config->num_devices; i++) {
695 const struct container_device *dev = &config->devices[i];
Dylan Reid355d5e42016-04-29 16:53:31 -0700696 int minor = dev->minor;
Dylan Reid837c74a2016-01-22 17:25:21 -0800697
Dylan Reid355d5e42016-04-29 16:53:31 -0700698 if (dev->copy_minor) {
699 struct stat st_buff;
700 if (stat(dev->path, &st_buff) < 0)
701 goto error_rmdir;
702 /* Use the minor macro to extract the device number. */
703 minor = minor(st_buff.st_rdev);
704 }
705 if (minor >= 0) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700706 rc = container_create_device(c, dev, minor);
707 if (rc)
Dylan Reid355d5e42016-04-29 16:53:31 -0700708 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -0800709 }
710
711 rc = c->cgroup->ops->add_device(c->cgroup, dev->major,
Dylan Reid355d5e42016-04-29 16:53:31 -0700712 minor, dev->read_allowed,
Dylan Reid837c74a2016-01-22 17:25:21 -0800713 dev->write_allowed,
714 dev->modify_allowed, dev->type);
715 if (rc)
716 goto error_rmdir;
717 }
718
Dylan Reidd7229582016-04-27 17:08:40 -0700719 /* Potentailly run setfiles on mounts configured outside of the jail */
Dylan Reide040c6b2016-05-02 18:49:02 -0700720 for (i = 0; i < config->num_mounts; i++) {
721 const struct container_mount *mnt = &config->mounts[i];
Dylan Reidd7229582016-04-27 17:08:40 -0700722 char *dest;
723
724 if (mnt->mount_in_ns)
725 continue;
726 if (asprintf(&dest, "%s%s", c->runfsroot, mnt->destination) < 0)
727 goto error_rmdir;
Dylan Reide040c6b2016-05-02 18:49:02 -0700728 rc = run_setfiles_command(c, config, dest);
Dylan Reidd7229582016-04-27 17:08:40 -0700729 free(dest);
730 if (rc)
731 goto error_rmdir;
732 }
733
Dylan Reid837c74a2016-01-22 17:25:21 -0800734 /* Setup and start the container with libminijail. */
735 if (asprintf(&c->pid_file_path, "%s/container.pid", c->runfs) < 0)
736 goto error_rmdir;
737 minijail_write_pid_file(c->jail, c->pid_file_path);
738 minijail_reset_signal_mask(c->jail);
739
740 /* Setup container namespaces. */
741 minijail_namespace_ipc(c->jail);
742 minijail_namespace_vfs(c->jail);
743 minijail_namespace_net(c->jail);
744 minijail_namespace_pids(c->jail);
Dylan Reid837c74a2016-01-22 17:25:21 -0800745 minijail_namespace_user(c->jail);
Dylan Reide040c6b2016-05-02 18:49:02 -0700746 rc = minijail_uidmap(c->jail, config->uid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800747 if (rc)
748 goto error_rmdir;
Dylan Reide040c6b2016-05-02 18:49:02 -0700749 rc = minijail_gidmap(c->jail, config->gid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800750 if (rc)
751 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -0800752
753 rc = minijail_enter_pivot_root(c->jail, c->runfsroot);
754 if (rc)
755 goto error_rmdir;
756
757 /* Add the cgroups configured above. */
758 rc = minijail_add_to_cgroup(c->jail, cgroup_cpu_tasks_path(c->cgroup));
759 if (rc)
760 goto error_rmdir;
761 rc = minijail_add_to_cgroup(c->jail,
762 cgroup_cpuacct_tasks_path(c->cgroup));
763 if (rc)
764 goto error_rmdir;
765 rc = minijail_add_to_cgroup(c->jail,
766 cgroup_devices_tasks_path(c->cgroup));
767 if (rc)
768 goto error_rmdir;
769 rc = minijail_add_to_cgroup(c->jail,
770 cgroup_freezer_tasks_path(c->cgroup));
771 if (rc)
772 goto error_rmdir;
773
Dylan Reide040c6b2016-05-02 18:49:02 -0700774 if (config->alt_syscall_table)
775 minijail_use_alt_syscall(c->jail, config->alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -0800776
777 minijail_run_as_init(c->jail);
778
Dylan Reid3da683b2016-04-05 03:35:35 -0700779 /* TODO(dgreid) - remove this once shared mounts are cleaned up. */
780 minijail_skip_remount_private(c->jail);
781
Dylan Reid837c74a2016-01-22 17:25:21 -0800782 rc = minijail_run_pid_pipes_no_preload(c->jail,
Dylan Reide040c6b2016-05-02 18:49:02 -0700783 config->program_argv[0],
784 config->program_argv,
Dylan Reid837c74a2016-01-22 17:25:21 -0800785 &c->init_pid, NULL, NULL,
786 NULL);
787 if (rc)
788 goto error_rmdir;
789 return 0;
790
791error_rmdir:
Luis Hector Chavez945af482016-06-03 08:39:34 -0700792 if (!rc)
793 rc = -errno;
794 container_teardown(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800795 return rc;
796}
797
798const char *container_root(struct container *c)
799{
800 return c->runfs;
801}
802
803int container_pid(struct container *c)
804{
805 return c->init_pid;
806}
807
808static int container_teardown(struct container *c)
809{
Dylan Reid837c74a2016-01-22 17:25:21 -0800810 int ret = 0;
811
Dylan Reide040c6b2016-05-02 18:49:02 -0700812 unmount_external_mounts(c);
Luis Hector Chavez945af482016-06-03 08:39:34 -0700813 if (c->runfsroot) {
814 if (umount(c->runfsroot))
815 ret = -errno;
816 if (rmdir(c->runfsroot))
817 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700818 FREE_AND_NULL(c->runfsroot);
Luis Hector Chavez945af482016-06-03 08:39:34 -0700819 }
820 if (c->pid_file_path) {
821 if (unlink(c->pid_file_path))
822 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700823 FREE_AND_NULL(c->pid_file_path);
Luis Hector Chavez945af482016-06-03 08:39:34 -0700824 }
825 if (c->runfs) {
826 if (rmdir(c->runfs))
827 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700828 FREE_AND_NULL(c->runfs);
Luis Hector Chavez945af482016-06-03 08:39:34 -0700829 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800830 return ret;
831}
832
833int container_wait(struct container *c)
834{
Dylan Reidcf745c52016-04-22 10:18:03 -0700835 int rc;
836
837 do {
838 rc = minijail_wait(c->jail);
Luis Hector Chavez4641e852016-06-02 15:40:19 -0700839 } while (rc == -EINTR);
Dylan Reidcf745c52016-04-22 10:18:03 -0700840
Luis Hector Chavez945af482016-06-03 08:39:34 -0700841 // If the process had already been reaped, still perform teardown.
842 if (rc == -ECHILD || rc >= 0) {
Dylan Reidcf745c52016-04-22 10:18:03 -0700843 rc = container_teardown(c);
Luis Hector Chavez945af482016-06-03 08:39:34 -0700844 }
Dylan Reidcf745c52016-04-22 10:18:03 -0700845 return rc;
Dylan Reid837c74a2016-01-22 17:25:21 -0800846}
847
848int container_kill(struct container *c)
849{
Luis Hector Chavez945af482016-06-03 08:39:34 -0700850 if (kill(c->init_pid, SIGKILL) && errno != ESRCH)
Dylan Reid837c74a2016-01-22 17:25:21 -0800851 return -errno;
852 return container_wait(c);
853}