blob: 67554f824df3805baf7f568bfe47d137f3d263e6 [file] [log] [blame]
Dylan Reid837c74a2016-01-22 17:25:21 -08001/* Copyright 2016 The Chromium OS Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file.
4 */
5
6#define _GNU_SOURCE /* For asprintf */
7
8#include <errno.h>
9#include <fcntl.h>
10#include <malloc.h>
11#include <signal.h>
12#include <stdio.h>
13#include <stdlib.h>
14#include <string.h>
15#include <sys/mount.h>
16#include <sys/stat.h>
17#include <sys/types.h>
Dylan Reid2bd9ea92016-04-07 20:57:47 -070018#include <sys/wait.h>
Dylan Reid837c74a2016-01-22 17:25:21 -080019#include <unistd.h>
20
21#include "container_cgroup.h"
22#include "libcontainer.h"
23#include "libminijail.h"
24
Luis Hector Chavez479b95f2016-06-06 08:01:05 -070025#define FREE_AND_NULL(ptr) \
26do { \
27 free(ptr); \
28 ptr = NULL; \
29} while(0)
30
Luis Hector Chavez945af482016-06-03 08:39:34 -070031static int container_teardown(struct container *c);
32
Luis Hector Chavez479b95f2016-06-06 08:01:05 -070033static int strdup_and_free(char **dest, const char *src)
34{
35 char *copy = strdup(src);
36 if (!copy)
37 return -ENOMEM;
38 if (*dest)
39 free(*dest);
40 *dest = copy;
41 return 0;
42}
43
Dylan Reid837c74a2016-01-22 17:25:21 -080044struct container_mount {
45 char *name;
46 char *source;
47 char *destination;
48 char *type;
49 char *data;
50 int flags;
51 int uid;
52 int gid;
53 int mode;
54 int mount_in_ns; /* True if mount should happen in new vfs ns */
55 int create; /* True if target should be created if it doesn't exist */
56};
57
58struct container_device {
59 char type; /* 'c' or 'b' for char or block */
60 char *path;
61 int fs_permissions;
62 int major;
63 int minor;
Dylan Reid355d5e42016-04-29 16:53:31 -070064 int copy_minor; /* Copy the minor from existing node, ignores |minor| */
Dylan Reid837c74a2016-01-22 17:25:21 -080065 int uid;
66 int gid;
67 int read_allowed;
68 int write_allowed;
69 int modify_allowed;
70};
71
Chinyue Chenfac909e2016-06-24 14:17:42 +080072struct container_cpu_cgroup {
73 int shares;
74 int quota;
75 int period;
76 int rt_runtime;
77 int rt_period;
78};
79
Dylan Reid837c74a2016-01-22 17:25:21 -080080/*
81 * Structure that configures how the container is run.
82 *
83 * rootfs - Path to the root of the container's filesystem.
84 * program_argv - The program to run and args, e.g. "/sbin/init".
85 * num_args - Number of args in program_argv.
Dylan Reid1874feb2016-06-22 17:53:50 -070086 * uid - The uid the container will run as.
Dylan Reid837c74a2016-01-22 17:25:21 -080087 * uid_map - Mapping of UIDs in the container, e.g. "0 100000 1024"
Dylan Reid1874feb2016-06-22 17:53:50 -070088 * gid - The gid the container will run as.
Dylan Reid837c74a2016-01-22 17:25:21 -080089 * gid_map - Mapping of GIDs in the container, e.g. "0 100000 1024"
90 * alt_syscall_table - Syscall table to use or NULL if none.
91 * mounts - Filesystems to mount in the new namespace.
92 * num_mounts - Number of above.
93 * devices - Device nodes to create.
94 * num_devices - Number of above.
Dylan Reid2bd9ea92016-04-07 20:57:47 -070095 * run_setfiles - Should run setfiles on mounts to enable selinux.
Chinyue Chenfac909e2016-06-24 14:17:42 +080096 * cpu_cgparams - CPU cgroup params.
Dylan Reid837c74a2016-01-22 17:25:21 -080097 */
98struct container_config {
99 char *rootfs;
100 char **program_argv;
101 size_t num_args;
Dylan Reid1874feb2016-06-22 17:53:50 -0700102 uid_t uid;
Dylan Reid837c74a2016-01-22 17:25:21 -0800103 char *uid_map;
Dylan Reid1874feb2016-06-22 17:53:50 -0700104 gid_t gid;
Dylan Reid837c74a2016-01-22 17:25:21 -0800105 char *gid_map;
106 char *alt_syscall_table;
107 struct container_mount *mounts;
108 size_t num_mounts;
109 struct container_device *devices;
110 size_t num_devices;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700111 char *run_setfiles;
Chinyue Chenfac909e2016-06-24 14:17:42 +0800112 struct container_cpu_cgroup cpu_cgparams;
Dylan Reid837c74a2016-01-22 17:25:21 -0800113};
114
115struct container_config *container_config_create()
116{
117 return calloc(1, sizeof(struct container_config));
118}
119
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700120static void container_free_program_args(struct container_config *c)
121{
122 int i;
123
124 if (!c->program_argv)
125 return;
126 for (i = 0; i < c->num_args; ++i) {
127 FREE_AND_NULL(c->program_argv[i]);
128 }
129 FREE_AND_NULL(c->program_argv);
130}
131
132static void container_config_free_mount(struct container_mount *mount)
133{
134 FREE_AND_NULL(mount->name);
135 FREE_AND_NULL(mount->source);
136 FREE_AND_NULL(mount->destination);
137 FREE_AND_NULL(mount->type);
138 FREE_AND_NULL(mount->data);
139}
140
141static void container_config_free_device(struct container_device *device)
142{
143 FREE_AND_NULL(device->path);
144}
145
Dylan Reid837c74a2016-01-22 17:25:21 -0800146void container_config_destroy(struct container_config *c)
147{
148 size_t i;
149
150 if (c == NULL)
151 return;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700152 FREE_AND_NULL(c->rootfs);
153 container_free_program_args(c);
154 FREE_AND_NULL(c->uid_map);
155 FREE_AND_NULL(c->gid_map);
156 FREE_AND_NULL(c->alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -0800157 for (i = 0; i < c->num_mounts; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700158 container_config_free_mount(&c->mounts[i]);
Dylan Reid837c74a2016-01-22 17:25:21 -0800159 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700160 FREE_AND_NULL(c->mounts);
Dylan Reid837c74a2016-01-22 17:25:21 -0800161 for (i = 0; i < c->num_devices; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700162 container_config_free_device(&c->devices[i]);
Dylan Reid837c74a2016-01-22 17:25:21 -0800163 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700164 FREE_AND_NULL(c->devices);
165 FREE_AND_NULL(c->run_setfiles);
166 FREE_AND_NULL(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800167}
168
169int container_config_rootfs(struct container_config *c, const char *rootfs)
170{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700171 return strdup_and_free(&c->rootfs, rootfs);
Dylan Reid837c74a2016-01-22 17:25:21 -0800172}
173
Dylan Reid11456722016-05-02 11:24:50 -0700174const char *container_config_get_rootfs(const struct container_config *c)
175{
176 return c->rootfs;
177}
178
Dylan Reid837c74a2016-01-22 17:25:21 -0800179int container_config_program_argv(struct container_config *c,
180 char **argv, size_t num_args)
181{
182 size_t i;
183
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700184 container_free_program_args(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800185 c->num_args = num_args;
186 c->program_argv = calloc(num_args + 1, sizeof(char *));
187 if (!c->program_argv)
188 return -ENOMEM;
189 for (i = 0; i < num_args; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700190 if (strdup_and_free(&c->program_argv[i], argv[i]))
191 goto error_free_return;
Dylan Reid837c74a2016-01-22 17:25:21 -0800192 }
193 c->program_argv[num_args] = NULL;
194 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700195
196error_free_return:
197 container_free_program_args(c);
198 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800199}
200
Dylan Reid11456722016-05-02 11:24:50 -0700201size_t container_config_get_num_program_args(const struct container_config *c)
202{
203 return c->num_args;
204}
205
206const char *container_config_get_program_arg(const struct container_config *c,
207 size_t index)
208{
209 if (index >= c->num_args)
210 return NULL;
211 return c->program_argv[index];
212}
213
Dylan Reid1874feb2016-06-22 17:53:50 -0700214void container_config_uid(struct container_config *c, uid_t uid)
215{
216 c->uid = uid;
217}
218
219uid_t container_config_get_uid(const struct container_config *c)
220{
221 return c->uid;
222}
223
Dylan Reid837c74a2016-01-22 17:25:21 -0800224int container_config_uid_map(struct container_config *c, const char *uid_map)
225{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700226 return strdup_and_free(&c->uid_map, uid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800227}
228
Dylan Reid1874feb2016-06-22 17:53:50 -0700229void container_config_gid(struct container_config *c, gid_t gid)
230{
231 c->gid = gid;
232}
233
234gid_t container_config_get_gid(const struct container_config *c)
235{
236 return c->gid;
237}
238
Dylan Reid837c74a2016-01-22 17:25:21 -0800239int container_config_gid_map(struct container_config *c, const char *gid_map)
240{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700241 return strdup_and_free(&c->gid_map, gid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800242}
243
244int container_config_alt_syscall_table(struct container_config *c,
245 const char *alt_syscall_table)
246{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700247 return strdup_and_free(&c->alt_syscall_table, alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -0800248}
249
250int container_config_add_mount(struct container_config *c,
251 const char *name,
252 const char *source,
253 const char *destination,
254 const char *type,
255 const char *data,
256 int flags,
257 int uid,
258 int gid,
259 int mode,
260 int mount_in_ns,
261 int create)
262{
263 struct container_mount *mount_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700264 struct container_mount *current_mount;
Dylan Reid837c74a2016-01-22 17:25:21 -0800265
266 if (name == NULL || source == NULL ||
267 destination == NULL || type == NULL)
268 return -EINVAL;
269
270 mount_ptr = realloc(c->mounts,
271 sizeof(c->mounts[0]) * (c->num_mounts + 1));
272 if (!mount_ptr)
273 return -ENOMEM;
274 c->mounts = mount_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700275 current_mount = &c->mounts[c->num_mounts];
276 memset(current_mount, 0, sizeof(struct container_mount));
277
278 if (strdup_and_free(&current_mount->name, name))
279 goto error_free_return;
280 if (strdup_and_free(&current_mount->source, source))
281 goto error_free_return;
282 if (strdup_and_free(&current_mount->destination, destination))
283 goto error_free_return;
284 if (strdup_and_free(&current_mount->type, type))
285 goto error_free_return;
286 if (data && strdup_and_free(&current_mount->data, data))
287 goto error_free_return;
288 current_mount->flags = flags;
289 current_mount->uid = uid;
290 current_mount->gid = gid;
291 current_mount->mode = mode;
292 current_mount->mount_in_ns = mount_in_ns;
293 current_mount->create = create;
Dylan Reid837c74a2016-01-22 17:25:21 -0800294 ++c->num_mounts;
295 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700296
297error_free_return:
298 container_config_free_mount(current_mount);
299 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800300}
301
302int container_config_add_device(struct container_config *c,
303 char type,
304 const char *path,
305 int fs_permissions,
306 int major,
307 int minor,
Dylan Reid355d5e42016-04-29 16:53:31 -0700308 int copy_minor,
Dylan Reid837c74a2016-01-22 17:25:21 -0800309 int uid,
310 int gid,
311 int read_allowed,
312 int write_allowed,
313 int modify_allowed)
314{
315 struct container_device *dev_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700316 struct container_device *current_dev;
Dylan Reid837c74a2016-01-22 17:25:21 -0800317
318 if (path == NULL)
319 return -EINVAL;
Dylan Reid355d5e42016-04-29 16:53:31 -0700320 /* If using a dynamic minor number, ensure that minor is -1. */
321 if (copy_minor && (minor != -1))
322 return -EINVAL;
323
Dylan Reid837c74a2016-01-22 17:25:21 -0800324 dev_ptr = realloc(c->devices,
325 sizeof(c->devices[0]) * (c->num_devices + 1));
326 if (!dev_ptr)
327 return -ENOMEM;
328 c->devices = dev_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700329 current_dev = &c->devices[c->num_devices];
330 memset(current_dev, 0, sizeof(struct container_device));
331
332 current_dev->type = type;
333 if (strdup_and_free(&current_dev->path, path))
334 goto error_free_return;
335 current_dev->fs_permissions = fs_permissions;
336 current_dev->major = major;
337 current_dev->minor = minor;
338 current_dev->copy_minor = copy_minor;
339 current_dev->uid = uid;
340 current_dev->gid = gid;
341 current_dev->read_allowed = read_allowed;
342 current_dev->write_allowed = write_allowed;
343 current_dev->modify_allowed = modify_allowed;
Dylan Reid837c74a2016-01-22 17:25:21 -0800344 ++c->num_devices;
345 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700346
347error_free_return:
348 container_config_free_device(current_dev);
349 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800350}
351
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700352int container_config_run_setfiles(struct container_config *c,
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700353 const char *setfiles_cmd)
354{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700355 return strdup_and_free(&c->run_setfiles, setfiles_cmd);
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700356}
Dylan Reid837c74a2016-01-22 17:25:21 -0800357
Dylan Reid11456722016-05-02 11:24:50 -0700358const char *container_config_get_run_setfiles(const struct container_config *c)
359{
360 return c->run_setfiles;
361}
362
Chinyue Chenfac909e2016-06-24 14:17:42 +0800363int container_config_set_cpu_shares(struct container_config *c, int shares)
364{
365 /* CPU shares must be 2 or higher. */
366 if (shares < 2)
367 return -EINVAL;
368
369 c->cpu_cgparams.shares = shares;
370 return 0;
371}
372
373int container_config_set_cpu_cfs_params(struct container_config *c,
374 int quota,
375 int period)
376{
377 /*
378 * quota could be set higher than period to utilize more than one CPU.
379 * quota could also be set as -1 to indicate the cgroup does not adhere
380 * to any CPU time restrictions.
381 */
382 if (quota <= 0 && quota != -1)
383 return -EINVAL;
384 if (period <= 0)
385 return -EINVAL;
386
387 c->cpu_cgparams.quota = quota;
388 c->cpu_cgparams.period = period;
389 return 0;
390}
391
392int container_config_set_cpu_rt_params(struct container_config *c,
393 int rt_runtime,
394 int rt_period)
395{
396 /*
397 * rt_runtime could be set as 0 to prevent the cgroup from using
398 * realtime CPU.
399 */
400 if (rt_runtime < 0 || rt_runtime >= rt_period)
401 return -EINVAL;
402
403 c->cpu_cgparams.rt_runtime = rt_runtime;
404 c->cpu_cgparams.rt_period = rt_period;
405 return 0;
406}
407
Chinyue Chen4f3fd682016-07-01 14:11:42 +0800408int container_config_get_cpu_shares(struct container_config *c)
409{
410 return c->cpu_cgparams.shares;
411}
412
413int container_config_get_cpu_quota(struct container_config *c)
414{
415 return c->cpu_cgparams.quota;
416}
417
418int container_config_get_cpu_period(struct container_config *c)
419{
420 return c->cpu_cgparams.period;
421}
422
423int container_config_get_cpu_rt_runtime(struct container_config *c)
424{
425 return c->cpu_cgparams.rt_runtime;
426}
427
428int container_config_get_cpu_rt_period(struct container_config *c)
429{
430 return c->cpu_cgparams.rt_period;
431}
432
Dylan Reid837c74a2016-01-22 17:25:21 -0800433/*
434 * Container manipulation
435 */
436struct container {
Dylan Reid837c74a2016-01-22 17:25:21 -0800437 struct container_cgroup *cgroup;
438 struct minijail *jail;
439 pid_t init_pid;
440 char *runfs;
441 char *rundir;
442 char *runfsroot;
443 char *pid_file_path;
Dylan Reide040c6b2016-05-02 18:49:02 -0700444 char **ext_mounts; /* Mounts made outside of the minijail */
445 size_t num_ext_mounts;
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700446 char *name;
Dylan Reid837c74a2016-01-22 17:25:21 -0800447};
448
449struct container *container_new(const char *name,
Dylan Reide040c6b2016-05-02 18:49:02 -0700450 const char *rundir)
Dylan Reid837c74a2016-01-22 17:25:21 -0800451{
452 struct container *c;
453
Dylan Reid837c74a2016-01-22 17:25:21 -0800454 c = calloc(1, sizeof(*c));
Dylan Reidb435c682016-04-12 04:17:49 -0700455 if (!c)
456 return NULL;
Dylan Reid837c74a2016-01-22 17:25:21 -0800457 c->cgroup = container_cgroup_new(name, "/sys/fs/cgroup");
458 c->rundir = strdup(rundir);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700459 c->name = strdup(name);
460 if (!c->cgroup || !c->rundir || !c->name) {
Dylan Reid684975e2016-05-02 15:44:47 -0700461 container_destroy(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800462 return NULL;
Dylan Reidb435c682016-04-12 04:17:49 -0700463 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800464 return c;
465}
466
467void container_destroy(struct container *c)
468{
Dylan Reid684975e2016-05-02 15:44:47 -0700469 if (c->cgroup)
470 container_cgroup_destroy(c->cgroup);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700471 if (c->jail)
472 minijail_destroy(c->jail);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700473 FREE_AND_NULL(c->name);
474 FREE_AND_NULL(c->rundir);
475 FREE_AND_NULL(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800476}
477
478static int make_dir(const char *path, int uid, int gid, int mode)
479{
480 if (mkdir(path, mode))
481 return -errno;
482 if (chmod(path, mode))
483 return -errno;
484 if (chown(path, uid, gid))
485 return -errno;
486 return 0;
487}
488
489static int touch_file(const char *path, int uid, int gid, int mode)
490{
491 int rc;
492 int fd = open(path, O_RDWR | O_CREAT, mode);
493 if (fd < 0)
494 return -errno;
495 rc = fchown(fd, uid, gid);
496 close(fd);
497
498 if (rc)
499 return -errno;
500 return 0;
501}
502
503/* Make sure the mount target exists in the new rootfs. Create if needed and
504 * possible.
505 */
506static int setup_mount_destination(const struct container_mount *mnt,
Dylan Reid2149be92016-04-28 18:38:57 -0700507 const char *source,
Dylan Reid837c74a2016-01-22 17:25:21 -0800508 const char *dest)
509{
510 int rc;
511 struct stat st_buf;
512
513 rc = stat(dest, &st_buf);
514 if (rc == 0) /* destination exists */
515 return 0;
516
517 /* Try to create the destination. Either make directory or touch a file
518 * depending on the source type.
519 */
Dylan Reid2149be92016-04-28 18:38:57 -0700520 rc = stat(source, &st_buf);
Dylan Reid837c74a2016-01-22 17:25:21 -0800521 if (rc || S_ISDIR(st_buf.st_mode) || S_ISBLK(st_buf.st_mode))
522 return make_dir(dest, mnt->uid, mnt->gid, mnt->mode);
523
524 return touch_file(dest, mnt->uid, mnt->gid, mnt->mode);
525}
526
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700527/* Fork and exec the setfiles command to configure the selinux policy. */
Dylan Reide040c6b2016-05-02 18:49:02 -0700528static int run_setfiles_command(const struct container *c,
529 const struct container_config *config,
530 const char *dest)
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700531{
532 int rc;
533 int status;
534 int pid;
535 char *context_path;
536
Dylan Reide040c6b2016-05-02 18:49:02 -0700537 if (!config->run_setfiles)
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700538 return 0;
539
Dylan Reidb3621832016-03-24 10:24:57 -0700540 /* Really gross hack to avoid setfiles on /data, this should be removed
541 * when data isn't under /home/chronos/user where we can't access it as
542 * the android user.
543 * TODO(b/28705740) - Fix permission to the data directory.
544 */
545 if (strlen(dest) >= 5 && !strcmp(&dest[strlen(dest) - 5], "/data"))
546 return 0;
547
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700548 if (asprintf(&context_path, "%s/file_contexts",
549 c->runfsroot) < 0)
550 return -errno;
551
552 pid = fork();
553 if (pid == 0) {
554 const char *argv[] = {
Dylan Reide040c6b2016-05-02 18:49:02 -0700555 config->run_setfiles,
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700556 "-r",
557 c->runfsroot,
558 context_path,
559 dest,
560 NULL,
561 };
562 const char *env[] = {
563 NULL,
564 };
565
566 execve(argv[0], (char *const*)argv, (char *const*)env);
567
568 /* Command failed to exec if execve returns. */
569 _exit(-errno);
570 }
571 free(context_path);
572 if (pid < 0)
573 return -errno;
574 do {
575 rc = waitpid(pid, &status, 0);
576 } while (rc == -1 && errno == EINTR);
577 if (rc < 0)
578 return -errno;
579 return status;
580}
581
Dylan Reide040c6b2016-05-02 18:49:02 -0700582/*
583 * Unmounts anything we mounted in this mount namespace in the opposite order
584 * that they were mounted.
585 */
586static int unmount_external_mounts(struct container *c)
587{
588 int ret = 0;
589
590 while (c->num_ext_mounts) {
591 c->num_ext_mounts--;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700592 if (!c->ext_mounts[c->num_ext_mounts])
593 continue;
Dylan Reide040c6b2016-05-02 18:49:02 -0700594 if (umount(c->ext_mounts[c->num_ext_mounts]))
595 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700596 FREE_AND_NULL(c->ext_mounts[c->num_ext_mounts]);
Dylan Reide040c6b2016-05-02 18:49:02 -0700597 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700598 FREE_AND_NULL(c->ext_mounts);
Dylan Reide040c6b2016-05-02 18:49:02 -0700599 return ret;
600}
601
Luis Hector Chavez3341ed62016-06-06 08:04:04 -0700602static int do_container_mount(struct container *c,
603 const struct container_mount *mnt)
604{
605 char *source = NULL;
606 char *dest = NULL;
607 int rc = 0;
608
609 if (asprintf(&dest, "%s%s", c->runfsroot, mnt->destination) < 0)
610 return -errno;
611
612 /*
613 * If it's a bind mount relative to rootfs, append source to
614 * rootfs path, otherwise source path is absolute.
615 */
616 if ((mnt->flags & MS_BIND) && mnt->source[0] != '/') {
617 if (asprintf(&source, "%s/%s", c->runfsroot, mnt->source) < 0)
618 goto error_free_return;
619 } else {
620 if (asprintf(&source, "%s", mnt->source) < 0)
621 goto error_free_return;
622 }
623
624 if (mnt->create) {
625 rc = setup_mount_destination(mnt, source, dest);
626 if (rc)
627 goto error_free_return;
628 }
629 if (mnt->mount_in_ns) {
630 /* We can mount this with minijail. */
631 rc = minijail_mount(c->jail, source, mnt->destination,
632 mnt->type, mnt->flags);
633 if (rc)
634 goto error_free_return;
635 } else {
636 /* Mount this externally and unmount it on exit. */
637 if (mount(source, dest, mnt->type, mnt->flags,
638 mnt->data))
639 goto error_free_return;
640 /* Save this to unmount when shutting down. */
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700641 rc = strdup_and_free(&c->ext_mounts[c->num_ext_mounts], dest);
642 if (rc)
Luis Hector Chavez3341ed62016-06-06 08:04:04 -0700643 goto error_free_return;
644 c->num_ext_mounts++;
645 }
646
647 goto exit;
648
649error_free_return:
650 if (!rc)
651 rc = -errno;
652exit:
653 free(source);
654 free(dest);
655 return rc;
656}
657
Dylan Reide040c6b2016-05-02 18:49:02 -0700658static int do_container_mounts(struct container *c,
659 const struct container_config *config)
Dylan Reid7daf9982016-04-28 16:55:42 -0700660{
661 unsigned int i;
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700662 int rc = 0;
Dylan Reid7daf9982016-04-28 16:55:42 -0700663
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700664 unmount_external_mounts(c);
Dylan Reide040c6b2016-05-02 18:49:02 -0700665 /*
666 * Allocate space to track anything we mount in our mount namespace.
667 * This over-allocates as it has space for all mounts.
668 */
669 c->ext_mounts = calloc(config->num_mounts, sizeof(*c->ext_mounts));
670 if (!c->ext_mounts)
671 return -errno;
672
673 for (i = 0; i < config->num_mounts; ++i) {
Luis Hector Chavez3341ed62016-06-06 08:04:04 -0700674 rc = do_container_mount(c, &config->mounts[i]);
675 if (rc)
676 goto error_free_return;
Dylan Reid7daf9982016-04-28 16:55:42 -0700677 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700678
Dylan Reid7daf9982016-04-28 16:55:42 -0700679 return 0;
Dylan Reid2149be92016-04-28 18:38:57 -0700680
681error_free_return:
Dylan Reide040c6b2016-05-02 18:49:02 -0700682 unmount_external_mounts(c);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700683 return rc;
Dylan Reid7daf9982016-04-28 16:55:42 -0700684}
685
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700686static int container_create_device(const struct container *c,
687 const struct container_device *dev,
688 int minor)
689{
690 char *path = NULL;
691 int rc = 0;
692 int mode;
693
694 switch (dev->type) {
695 case 'b':
696 mode = S_IFBLK;
697 break;
698 case 'c':
699 mode = S_IFCHR;
700 break;
701 default:
702 return -EINVAL;
703 }
704 mode |= dev->fs_permissions;
705
706 if (asprintf(&path, "%s%s", c->runfsroot, dev->path) < 0)
707 goto error_free_return;
708 if (mknod(path, mode, makedev(dev->major, minor)) && errno != EEXIST)
709 goto error_free_return;
710 if (chown(path, dev->uid, dev->gid))
711 goto error_free_return;
712 if (chmod(path, dev->fs_permissions))
713 goto error_free_return;
714
715 goto exit;
716
717error_free_return:
718 rc = -errno;
719exit:
720 free(path);
721 return rc;
722}
723
Dylan Reide040c6b2016-05-02 18:49:02 -0700724int container_start(struct container *c, const struct container_config *config)
Dylan Reid837c74a2016-01-22 17:25:21 -0800725{
Dylan Reidb3621832016-03-24 10:24:57 -0700726 static const mode_t root_dir_mode = 0660;
Luis Hector Chavez945af482016-06-03 08:39:34 -0700727 int rc = 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800728 unsigned int i;
Dylan Reide040c6b2016-05-02 18:49:02 -0700729 const char *rootfs = config->rootfs;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700730 char *runfs_template = NULL;
Dylan Reid837c74a2016-01-22 17:25:21 -0800731
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700732 if (!c)
733 return -EINVAL;
Dylan Reide040c6b2016-05-02 18:49:02 -0700734 if (!config)
735 return -EINVAL;
736 if (!config->program_argv || !config->program_argv[0])
737 return -EINVAL;
738
Dylan Reid837c74a2016-01-22 17:25:21 -0800739 if (asprintf(&runfs_template, "%s/%s_XXXXXX", c->rundir, c->name) < 0)
740 return -errno;
741
742 c->runfs = mkdtemp(runfs_template);
743 if (!c->runfs) {
744 free(runfs_template);
745 return -errno;
746 }
Dylan Reidb3621832016-03-24 10:24:57 -0700747 /* Make sure the container uid can access the rootfs. */
Dylan Reid4c6af2e2016-06-22 18:04:24 -0700748 if (chmod(c->runfs, 0700))
Dylan Reidb3621832016-03-24 10:24:57 -0700749 goto error_rmdir;
Dylan Reid1874feb2016-06-22 17:53:50 -0700750 if (chown(c->runfs, config->uid, config->gid))
751 goto error_rmdir;
Dylan Reidb3621832016-03-24 10:24:57 -0700752
Luis Hector Chavez945af482016-06-03 08:39:34 -0700753 if (asprintf(&c->runfsroot, "%s/root", c->runfs) < 0)
Dylan Reid837c74a2016-01-22 17:25:21 -0800754 goto error_rmdir;
755
Luis Hector Chavez945af482016-06-03 08:39:34 -0700756 if (mkdir(c->runfsroot, root_dir_mode))
757 goto error_rmdir;
758 if (chmod(c->runfsroot, root_dir_mode))
759 goto error_rmdir;
760
761 if (mount(rootfs, c->runfsroot, "", MS_BIND | MS_RDONLY, NULL))
Dylan Reid837c74a2016-01-22 17:25:21 -0800762 goto error_rmdir;
763
764 c->jail = minijail_new();
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700765 if (!c->jail)
Luis Hector Chavez945af482016-06-03 08:39:34 -0700766 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -0800767
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700768 rc = do_container_mounts(c, config);
769 if (rc)
Dylan Reid7daf9982016-04-28 16:55:42 -0700770 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -0800771
772 c->cgroup->ops->deny_all_devices(c->cgroup);
773
Dylan Reide040c6b2016-05-02 18:49:02 -0700774 for (i = 0; i < config->num_devices; i++) {
775 const struct container_device *dev = &config->devices[i];
Dylan Reid355d5e42016-04-29 16:53:31 -0700776 int minor = dev->minor;
Dylan Reid837c74a2016-01-22 17:25:21 -0800777
Dylan Reid355d5e42016-04-29 16:53:31 -0700778 if (dev->copy_minor) {
779 struct stat st_buff;
780 if (stat(dev->path, &st_buff) < 0)
Nicolas Boichatad21ace2016-06-30 15:04:29 +0800781 continue;
Dylan Reid355d5e42016-04-29 16:53:31 -0700782 /* Use the minor macro to extract the device number. */
783 minor = minor(st_buff.st_rdev);
784 }
785 if (minor >= 0) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700786 rc = container_create_device(c, dev, minor);
787 if (rc)
Dylan Reid355d5e42016-04-29 16:53:31 -0700788 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -0800789 }
790
791 rc = c->cgroup->ops->add_device(c->cgroup, dev->major,
Dylan Reid355d5e42016-04-29 16:53:31 -0700792 minor, dev->read_allowed,
Dylan Reid837c74a2016-01-22 17:25:21 -0800793 dev->write_allowed,
794 dev->modify_allowed, dev->type);
795 if (rc)
796 goto error_rmdir;
797 }
798
Dylan Reidd7229582016-04-27 17:08:40 -0700799 /* Potentailly run setfiles on mounts configured outside of the jail */
Dylan Reide040c6b2016-05-02 18:49:02 -0700800 for (i = 0; i < config->num_mounts; i++) {
801 const struct container_mount *mnt = &config->mounts[i];
Dylan Reidd7229582016-04-27 17:08:40 -0700802 char *dest;
803
804 if (mnt->mount_in_ns)
805 continue;
806 if (asprintf(&dest, "%s%s", c->runfsroot, mnt->destination) < 0)
807 goto error_rmdir;
Dylan Reide040c6b2016-05-02 18:49:02 -0700808 rc = run_setfiles_command(c, config, dest);
Dylan Reidd7229582016-04-27 17:08:40 -0700809 free(dest);
810 if (rc)
811 goto error_rmdir;
812 }
813
Chinyue Chenfac909e2016-06-24 14:17:42 +0800814 /* Setup CPU cgroup params. */
815 if (config->cpu_cgparams.shares) {
816 rc = c->cgroup->ops->set_cpu_shares(
817 c->cgroup, config->cpu_cgparams.shares);
818 if (rc)
819 goto error_rmdir;
820 }
821 if (config->cpu_cgparams.period) {
822 rc = c->cgroup->ops->set_cpu_quota(
823 c->cgroup, config->cpu_cgparams.quota);
824 if (rc)
825 goto error_rmdir;
826 rc = c->cgroup->ops->set_cpu_period(
827 c->cgroup, config->cpu_cgparams.period);
828 if (rc)
829 goto error_rmdir;
830 }
831 if (config->cpu_cgparams.rt_period) {
832 rc = c->cgroup->ops->set_cpu_rt_runtime(
833 c->cgroup, config->cpu_cgparams.rt_runtime);
834 if (rc)
835 goto error_rmdir;
836 rc = c->cgroup->ops->set_cpu_rt_period(
837 c->cgroup, config->cpu_cgparams.rt_period);
838 if (rc)
839 goto error_rmdir;
840 }
841
Dylan Reid837c74a2016-01-22 17:25:21 -0800842 /* Setup and start the container with libminijail. */
843 if (asprintf(&c->pid_file_path, "%s/container.pid", c->runfs) < 0)
844 goto error_rmdir;
845 minijail_write_pid_file(c->jail, c->pid_file_path);
846 minijail_reset_signal_mask(c->jail);
847
848 /* Setup container namespaces. */
849 minijail_namespace_ipc(c->jail);
850 minijail_namespace_vfs(c->jail);
851 minijail_namespace_net(c->jail);
852 minijail_namespace_pids(c->jail);
Dylan Reid837c74a2016-01-22 17:25:21 -0800853 minijail_namespace_user(c->jail);
Dylan Reide040c6b2016-05-02 18:49:02 -0700854 rc = minijail_uidmap(c->jail, config->uid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800855 if (rc)
856 goto error_rmdir;
Dylan Reide040c6b2016-05-02 18:49:02 -0700857 rc = minijail_gidmap(c->jail, config->gid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800858 if (rc)
859 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -0800860
861 rc = minijail_enter_pivot_root(c->jail, c->runfsroot);
862 if (rc)
863 goto error_rmdir;
864
865 /* Add the cgroups configured above. */
866 rc = minijail_add_to_cgroup(c->jail, cgroup_cpu_tasks_path(c->cgroup));
867 if (rc)
868 goto error_rmdir;
869 rc = minijail_add_to_cgroup(c->jail,
870 cgroup_cpuacct_tasks_path(c->cgroup));
871 if (rc)
872 goto error_rmdir;
873 rc = minijail_add_to_cgroup(c->jail,
874 cgroup_devices_tasks_path(c->cgroup));
875 if (rc)
876 goto error_rmdir;
877 rc = minijail_add_to_cgroup(c->jail,
878 cgroup_freezer_tasks_path(c->cgroup));
879 if (rc)
880 goto error_rmdir;
881
Dylan Reide040c6b2016-05-02 18:49:02 -0700882 if (config->alt_syscall_table)
883 minijail_use_alt_syscall(c->jail, config->alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -0800884
885 minijail_run_as_init(c->jail);
886
Dylan Reid3da683b2016-04-05 03:35:35 -0700887 /* TODO(dgreid) - remove this once shared mounts are cleaned up. */
888 minijail_skip_remount_private(c->jail);
889
Dylan Reid837c74a2016-01-22 17:25:21 -0800890 rc = minijail_run_pid_pipes_no_preload(c->jail,
Dylan Reide040c6b2016-05-02 18:49:02 -0700891 config->program_argv[0],
892 config->program_argv,
Dylan Reid837c74a2016-01-22 17:25:21 -0800893 &c->init_pid, NULL, NULL,
894 NULL);
895 if (rc)
896 goto error_rmdir;
897 return 0;
898
899error_rmdir:
Luis Hector Chavez945af482016-06-03 08:39:34 -0700900 if (!rc)
901 rc = -errno;
902 container_teardown(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800903 return rc;
904}
905
906const char *container_root(struct container *c)
907{
908 return c->runfs;
909}
910
911int container_pid(struct container *c)
912{
913 return c->init_pid;
914}
915
916static int container_teardown(struct container *c)
917{
Dylan Reid837c74a2016-01-22 17:25:21 -0800918 int ret = 0;
919
Dylan Reide040c6b2016-05-02 18:49:02 -0700920 unmount_external_mounts(c);
Luis Hector Chavez945af482016-06-03 08:39:34 -0700921 if (c->runfsroot) {
922 if (umount(c->runfsroot))
923 ret = -errno;
924 if (rmdir(c->runfsroot))
925 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700926 FREE_AND_NULL(c->runfsroot);
Luis Hector Chavez945af482016-06-03 08:39:34 -0700927 }
928 if (c->pid_file_path) {
929 if (unlink(c->pid_file_path))
930 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700931 FREE_AND_NULL(c->pid_file_path);
Luis Hector Chavez945af482016-06-03 08:39:34 -0700932 }
933 if (c->runfs) {
934 if (rmdir(c->runfs))
935 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700936 FREE_AND_NULL(c->runfs);
Luis Hector Chavez945af482016-06-03 08:39:34 -0700937 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800938 return ret;
939}
940
941int container_wait(struct container *c)
942{
Dylan Reidcf745c52016-04-22 10:18:03 -0700943 int rc;
944
945 do {
946 rc = minijail_wait(c->jail);
Luis Hector Chavez4641e852016-06-02 15:40:19 -0700947 } while (rc == -EINTR);
Dylan Reidcf745c52016-04-22 10:18:03 -0700948
Luis Hector Chavez945af482016-06-03 08:39:34 -0700949 // If the process had already been reaped, still perform teardown.
950 if (rc == -ECHILD || rc >= 0) {
Dylan Reidcf745c52016-04-22 10:18:03 -0700951 rc = container_teardown(c);
Luis Hector Chavez945af482016-06-03 08:39:34 -0700952 }
Dylan Reidcf745c52016-04-22 10:18:03 -0700953 return rc;
Dylan Reid837c74a2016-01-22 17:25:21 -0800954}
955
956int container_kill(struct container *c)
957{
Luis Hector Chavez945af482016-06-03 08:39:34 -0700958 if (kill(c->init_pid, SIGKILL) && errno != ESRCH)
Dylan Reid837c74a2016-01-22 17:25:21 -0800959 return -errno;
960 return container_wait(c);
961}