blob: 1b50160b588c7e154ed0d4d84fc7630f93243789 [file] [log] [blame]
Dylan Reid837c74a2016-01-22 17:25:21 -08001/* Copyright 2016 The Chromium OS Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file.
4 */
5
6#define _GNU_SOURCE /* For asprintf */
7
8#include <errno.h>
9#include <fcntl.h>
10#include <malloc.h>
11#include <signal.h>
12#include <stdio.h>
13#include <stdlib.h>
14#include <string.h>
15#include <sys/mount.h>
16#include <sys/stat.h>
17#include <sys/types.h>
Dylan Reid2bd9ea92016-04-07 20:57:47 -070018#include <sys/wait.h>
Dylan Reid837c74a2016-01-22 17:25:21 -080019#include <unistd.h>
20
21#include "container_cgroup.h"
22#include "libcontainer.h"
23#include "libminijail.h"
24
Luis Hector Chavez479b95f2016-06-06 08:01:05 -070025#define FREE_AND_NULL(ptr) \
26do { \
27 free(ptr); \
28 ptr = NULL; \
29} while(0)
30
Luis Hector Chavez945af482016-06-03 08:39:34 -070031static int container_teardown(struct container *c);
32
Luis Hector Chavez479b95f2016-06-06 08:01:05 -070033static int strdup_and_free(char **dest, const char *src)
34{
35 char *copy = strdup(src);
36 if (!copy)
37 return -ENOMEM;
38 if (*dest)
39 free(*dest);
40 *dest = copy;
41 return 0;
42}
43
Dylan Reid837c74a2016-01-22 17:25:21 -080044struct container_mount {
45 char *name;
46 char *source;
47 char *destination;
48 char *type;
49 char *data;
50 int flags;
51 int uid;
52 int gid;
53 int mode;
54 int mount_in_ns; /* True if mount should happen in new vfs ns */
55 int create; /* True if target should be created if it doesn't exist */
56};
57
58struct container_device {
59 char type; /* 'c' or 'b' for char or block */
60 char *path;
61 int fs_permissions;
62 int major;
63 int minor;
Dylan Reid355d5e42016-04-29 16:53:31 -070064 int copy_minor; /* Copy the minor from existing node, ignores |minor| */
Dylan Reid837c74a2016-01-22 17:25:21 -080065 int uid;
66 int gid;
67 int read_allowed;
68 int write_allowed;
69 int modify_allowed;
70};
71
Chinyue Chenfac909e2016-06-24 14:17:42 +080072struct container_cpu_cgroup {
73 int shares;
74 int quota;
75 int period;
76 int rt_runtime;
77 int rt_period;
78};
79
Dylan Reid837c74a2016-01-22 17:25:21 -080080/*
81 * Structure that configures how the container is run.
82 *
83 * rootfs - Path to the root of the container's filesystem.
84 * program_argv - The program to run and args, e.g. "/sbin/init".
85 * num_args - Number of args in program_argv.
Dylan Reid1874feb2016-06-22 17:53:50 -070086 * uid - The uid the container will run as.
Dylan Reid837c74a2016-01-22 17:25:21 -080087 * uid_map - Mapping of UIDs in the container, e.g. "0 100000 1024"
Dylan Reid1874feb2016-06-22 17:53:50 -070088 * gid - The gid the container will run as.
Dylan Reid837c74a2016-01-22 17:25:21 -080089 * gid_map - Mapping of GIDs in the container, e.g. "0 100000 1024"
90 * alt_syscall_table - Syscall table to use or NULL if none.
91 * mounts - Filesystems to mount in the new namespace.
92 * num_mounts - Number of above.
93 * devices - Device nodes to create.
94 * num_devices - Number of above.
Dylan Reid2bd9ea92016-04-07 20:57:47 -070095 * run_setfiles - Should run setfiles on mounts to enable selinux.
Chinyue Chenfac909e2016-06-24 14:17:42 +080096 * cpu_cgparams - CPU cgroup params.
Dylan Reid9e724af2016-07-21 09:58:07 -070097 * cgroup_parent - Parent dir for cgroup creation
98 * cgroup_owner - uid to own the created cgroups
Dylan Reid837c74a2016-01-22 17:25:21 -080099 */
100struct container_config {
101 char *rootfs;
102 char **program_argv;
103 size_t num_args;
Dylan Reid1874feb2016-06-22 17:53:50 -0700104 uid_t uid;
Dylan Reid837c74a2016-01-22 17:25:21 -0800105 char *uid_map;
Dylan Reid1874feb2016-06-22 17:53:50 -0700106 gid_t gid;
Dylan Reid837c74a2016-01-22 17:25:21 -0800107 char *gid_map;
108 char *alt_syscall_table;
109 struct container_mount *mounts;
110 size_t num_mounts;
111 struct container_device *devices;
112 size_t num_devices;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700113 char *run_setfiles;
Chinyue Chenfac909e2016-06-24 14:17:42 +0800114 struct container_cpu_cgroup cpu_cgparams;
Dylan Reid9e724af2016-07-21 09:58:07 -0700115 char *cgroup_parent;
116 uid_t cgroup_owner;
Dylan Reid837c74a2016-01-22 17:25:21 -0800117};
118
119struct container_config *container_config_create()
120{
121 return calloc(1, sizeof(struct container_config));
122}
123
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700124static void container_free_program_args(struct container_config *c)
125{
126 int i;
127
128 if (!c->program_argv)
129 return;
130 for (i = 0; i < c->num_args; ++i) {
131 FREE_AND_NULL(c->program_argv[i]);
132 }
133 FREE_AND_NULL(c->program_argv);
134}
135
136static void container_config_free_mount(struct container_mount *mount)
137{
138 FREE_AND_NULL(mount->name);
139 FREE_AND_NULL(mount->source);
140 FREE_AND_NULL(mount->destination);
141 FREE_AND_NULL(mount->type);
142 FREE_AND_NULL(mount->data);
143}
144
145static void container_config_free_device(struct container_device *device)
146{
147 FREE_AND_NULL(device->path);
148}
149
Dylan Reid837c74a2016-01-22 17:25:21 -0800150void container_config_destroy(struct container_config *c)
151{
152 size_t i;
153
154 if (c == NULL)
155 return;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700156 FREE_AND_NULL(c->rootfs);
157 container_free_program_args(c);
158 FREE_AND_NULL(c->uid_map);
159 FREE_AND_NULL(c->gid_map);
160 FREE_AND_NULL(c->alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -0800161 for (i = 0; i < c->num_mounts; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700162 container_config_free_mount(&c->mounts[i]);
Dylan Reid837c74a2016-01-22 17:25:21 -0800163 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700164 FREE_AND_NULL(c->mounts);
Dylan Reid837c74a2016-01-22 17:25:21 -0800165 for (i = 0; i < c->num_devices; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700166 container_config_free_device(&c->devices[i]);
Dylan Reid837c74a2016-01-22 17:25:21 -0800167 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700168 FREE_AND_NULL(c->devices);
169 FREE_AND_NULL(c->run_setfiles);
Dylan Reid9e724af2016-07-21 09:58:07 -0700170 FREE_AND_NULL(c->cgroup_parent);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700171 FREE_AND_NULL(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800172}
173
174int container_config_rootfs(struct container_config *c, const char *rootfs)
175{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700176 return strdup_and_free(&c->rootfs, rootfs);
Dylan Reid837c74a2016-01-22 17:25:21 -0800177}
178
Dylan Reid11456722016-05-02 11:24:50 -0700179const char *container_config_get_rootfs(const struct container_config *c)
180{
181 return c->rootfs;
182}
183
Dylan Reid837c74a2016-01-22 17:25:21 -0800184int container_config_program_argv(struct container_config *c,
185 char **argv, size_t num_args)
186{
187 size_t i;
188
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700189 container_free_program_args(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800190 c->num_args = num_args;
191 c->program_argv = calloc(num_args + 1, sizeof(char *));
192 if (!c->program_argv)
193 return -ENOMEM;
194 for (i = 0; i < num_args; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700195 if (strdup_and_free(&c->program_argv[i], argv[i]))
196 goto error_free_return;
Dylan Reid837c74a2016-01-22 17:25:21 -0800197 }
198 c->program_argv[num_args] = NULL;
199 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700200
201error_free_return:
202 container_free_program_args(c);
203 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800204}
205
Dylan Reid11456722016-05-02 11:24:50 -0700206size_t container_config_get_num_program_args(const struct container_config *c)
207{
208 return c->num_args;
209}
210
211const char *container_config_get_program_arg(const struct container_config *c,
212 size_t index)
213{
214 if (index >= c->num_args)
215 return NULL;
216 return c->program_argv[index];
217}
218
Dylan Reid1874feb2016-06-22 17:53:50 -0700219void container_config_uid(struct container_config *c, uid_t uid)
220{
221 c->uid = uid;
222}
223
224uid_t container_config_get_uid(const struct container_config *c)
225{
226 return c->uid;
227}
228
Dylan Reid837c74a2016-01-22 17:25:21 -0800229int container_config_uid_map(struct container_config *c, const char *uid_map)
230{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700231 return strdup_and_free(&c->uid_map, uid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800232}
233
Dylan Reid1874feb2016-06-22 17:53:50 -0700234void container_config_gid(struct container_config *c, gid_t gid)
235{
236 c->gid = gid;
237}
238
239gid_t container_config_get_gid(const struct container_config *c)
240{
241 return c->gid;
242}
243
Dylan Reid837c74a2016-01-22 17:25:21 -0800244int container_config_gid_map(struct container_config *c, const char *gid_map)
245{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700246 return strdup_and_free(&c->gid_map, gid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800247}
248
249int container_config_alt_syscall_table(struct container_config *c,
250 const char *alt_syscall_table)
251{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700252 return strdup_and_free(&c->alt_syscall_table, alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -0800253}
254
255int container_config_add_mount(struct container_config *c,
256 const char *name,
257 const char *source,
258 const char *destination,
259 const char *type,
260 const char *data,
261 int flags,
262 int uid,
263 int gid,
264 int mode,
265 int mount_in_ns,
266 int create)
267{
268 struct container_mount *mount_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700269 struct container_mount *current_mount;
Dylan Reid837c74a2016-01-22 17:25:21 -0800270
271 if (name == NULL || source == NULL ||
272 destination == NULL || type == NULL)
273 return -EINVAL;
274
275 mount_ptr = realloc(c->mounts,
276 sizeof(c->mounts[0]) * (c->num_mounts + 1));
277 if (!mount_ptr)
278 return -ENOMEM;
279 c->mounts = mount_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700280 current_mount = &c->mounts[c->num_mounts];
281 memset(current_mount, 0, sizeof(struct container_mount));
282
283 if (strdup_and_free(&current_mount->name, name))
284 goto error_free_return;
285 if (strdup_and_free(&current_mount->source, source))
286 goto error_free_return;
287 if (strdup_and_free(&current_mount->destination, destination))
288 goto error_free_return;
289 if (strdup_and_free(&current_mount->type, type))
290 goto error_free_return;
291 if (data && strdup_and_free(&current_mount->data, data))
292 goto error_free_return;
293 current_mount->flags = flags;
294 current_mount->uid = uid;
295 current_mount->gid = gid;
296 current_mount->mode = mode;
297 current_mount->mount_in_ns = mount_in_ns;
298 current_mount->create = create;
Dylan Reid837c74a2016-01-22 17:25:21 -0800299 ++c->num_mounts;
300 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700301
302error_free_return:
303 container_config_free_mount(current_mount);
304 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800305}
306
307int container_config_add_device(struct container_config *c,
308 char type,
309 const char *path,
310 int fs_permissions,
311 int major,
312 int minor,
Dylan Reid355d5e42016-04-29 16:53:31 -0700313 int copy_minor,
Dylan Reid837c74a2016-01-22 17:25:21 -0800314 int uid,
315 int gid,
316 int read_allowed,
317 int write_allowed,
318 int modify_allowed)
319{
320 struct container_device *dev_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700321 struct container_device *current_dev;
Dylan Reid837c74a2016-01-22 17:25:21 -0800322
323 if (path == NULL)
324 return -EINVAL;
Dylan Reid355d5e42016-04-29 16:53:31 -0700325 /* If using a dynamic minor number, ensure that minor is -1. */
326 if (copy_minor && (minor != -1))
327 return -EINVAL;
328
Dylan Reid837c74a2016-01-22 17:25:21 -0800329 dev_ptr = realloc(c->devices,
330 sizeof(c->devices[0]) * (c->num_devices + 1));
331 if (!dev_ptr)
332 return -ENOMEM;
333 c->devices = dev_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700334 current_dev = &c->devices[c->num_devices];
335 memset(current_dev, 0, sizeof(struct container_device));
336
337 current_dev->type = type;
338 if (strdup_and_free(&current_dev->path, path))
339 goto error_free_return;
340 current_dev->fs_permissions = fs_permissions;
341 current_dev->major = major;
342 current_dev->minor = minor;
343 current_dev->copy_minor = copy_minor;
344 current_dev->uid = uid;
345 current_dev->gid = gid;
346 current_dev->read_allowed = read_allowed;
347 current_dev->write_allowed = write_allowed;
348 current_dev->modify_allowed = modify_allowed;
Dylan Reid837c74a2016-01-22 17:25:21 -0800349 ++c->num_devices;
350 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700351
352error_free_return:
353 container_config_free_device(current_dev);
354 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800355}
356
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700357int container_config_run_setfiles(struct container_config *c,
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700358 const char *setfiles_cmd)
359{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700360 return strdup_and_free(&c->run_setfiles, setfiles_cmd);
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700361}
Dylan Reid837c74a2016-01-22 17:25:21 -0800362
Dylan Reid11456722016-05-02 11:24:50 -0700363const char *container_config_get_run_setfiles(const struct container_config *c)
364{
365 return c->run_setfiles;
366}
367
Chinyue Chenfac909e2016-06-24 14:17:42 +0800368int container_config_set_cpu_shares(struct container_config *c, int shares)
369{
370 /* CPU shares must be 2 or higher. */
371 if (shares < 2)
372 return -EINVAL;
373
374 c->cpu_cgparams.shares = shares;
375 return 0;
376}
377
378int container_config_set_cpu_cfs_params(struct container_config *c,
379 int quota,
380 int period)
381{
382 /*
383 * quota could be set higher than period to utilize more than one CPU.
384 * quota could also be set as -1 to indicate the cgroup does not adhere
385 * to any CPU time restrictions.
386 */
387 if (quota <= 0 && quota != -1)
388 return -EINVAL;
389 if (period <= 0)
390 return -EINVAL;
391
392 c->cpu_cgparams.quota = quota;
393 c->cpu_cgparams.period = period;
394 return 0;
395}
396
397int container_config_set_cpu_rt_params(struct container_config *c,
398 int rt_runtime,
399 int rt_period)
400{
401 /*
402 * rt_runtime could be set as 0 to prevent the cgroup from using
403 * realtime CPU.
404 */
405 if (rt_runtime < 0 || rt_runtime >= rt_period)
406 return -EINVAL;
407
408 c->cpu_cgparams.rt_runtime = rt_runtime;
409 c->cpu_cgparams.rt_period = rt_period;
410 return 0;
411}
412
Chinyue Chen4f3fd682016-07-01 14:11:42 +0800413int container_config_get_cpu_shares(struct container_config *c)
414{
415 return c->cpu_cgparams.shares;
416}
417
418int container_config_get_cpu_quota(struct container_config *c)
419{
420 return c->cpu_cgparams.quota;
421}
422
423int container_config_get_cpu_period(struct container_config *c)
424{
425 return c->cpu_cgparams.period;
426}
427
428int container_config_get_cpu_rt_runtime(struct container_config *c)
429{
430 return c->cpu_cgparams.rt_runtime;
431}
432
433int container_config_get_cpu_rt_period(struct container_config *c)
434{
435 return c->cpu_cgparams.rt_period;
436}
437
Dylan Reid9e724af2016-07-21 09:58:07 -0700438int container_config_set_cgroup_parent(struct container_config *c,
439 const char *parent,
440 uid_t cgroup_owner)
441{
442 c->cgroup_owner = cgroup_owner;
443 return strdup_and_free(&c->cgroup_parent, parent);
444}
445
446const char *container_config_get_cgroup_parent(struct container_config *c)
447{
448 return c->cgroup_parent;
449}
450
Dylan Reid837c74a2016-01-22 17:25:21 -0800451/*
452 * Container manipulation
453 */
454struct container {
Dylan Reid837c74a2016-01-22 17:25:21 -0800455 struct container_cgroup *cgroup;
456 struct minijail *jail;
457 pid_t init_pid;
458 char *runfs;
459 char *rundir;
460 char *runfsroot;
461 char *pid_file_path;
Dylan Reide040c6b2016-05-02 18:49:02 -0700462 char **ext_mounts; /* Mounts made outside of the minijail */
463 size_t num_ext_mounts;
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700464 char *name;
Dylan Reid837c74a2016-01-22 17:25:21 -0800465};
466
467struct container *container_new(const char *name,
Dylan Reide040c6b2016-05-02 18:49:02 -0700468 const char *rundir)
Dylan Reid837c74a2016-01-22 17:25:21 -0800469{
Keshav Santhanam998fd7d2016-07-12 13:33:00 -0700470 return container_new_with_cgroup_parent(name, rundir, NULL);
471}
472
473struct container *container_new_with_cgroup_parent(const char *name,
474 const char *rundir,
475 const char *cgroup_parent)
476{
Dylan Reid837c74a2016-01-22 17:25:21 -0800477 struct container *c;
478
Dylan Reid837c74a2016-01-22 17:25:21 -0800479 c = calloc(1, sizeof(*c));
Dylan Reidb435c682016-04-12 04:17:49 -0700480 if (!c)
481 return NULL;
Keshav Santhanam998fd7d2016-07-12 13:33:00 -0700482 c->cgroup = container_cgroup_new(name, "/sys/fs/cgroup", cgroup_parent);
Dylan Reid837c74a2016-01-22 17:25:21 -0800483 c->rundir = strdup(rundir);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700484 c->name = strdup(name);
485 if (!c->cgroup || !c->rundir || !c->name) {
Dylan Reid684975e2016-05-02 15:44:47 -0700486 container_destroy(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800487 return NULL;
Dylan Reidb435c682016-04-12 04:17:49 -0700488 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800489 return c;
490}
491
492void container_destroy(struct container *c)
493{
Dylan Reid684975e2016-05-02 15:44:47 -0700494 if (c->cgroup)
495 container_cgroup_destroy(c->cgroup);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700496 if (c->jail)
497 minijail_destroy(c->jail);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700498 FREE_AND_NULL(c->name);
499 FREE_AND_NULL(c->rundir);
500 FREE_AND_NULL(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800501}
502
503static int make_dir(const char *path, int uid, int gid, int mode)
504{
505 if (mkdir(path, mode))
506 return -errno;
507 if (chmod(path, mode))
508 return -errno;
509 if (chown(path, uid, gid))
510 return -errno;
511 return 0;
512}
513
514static int touch_file(const char *path, int uid, int gid, int mode)
515{
516 int rc;
517 int fd = open(path, O_RDWR | O_CREAT, mode);
518 if (fd < 0)
519 return -errno;
520 rc = fchown(fd, uid, gid);
521 close(fd);
522
523 if (rc)
524 return -errno;
525 return 0;
526}
527
528/* Make sure the mount target exists in the new rootfs. Create if needed and
529 * possible.
530 */
531static int setup_mount_destination(const struct container_mount *mnt,
Dylan Reid2149be92016-04-28 18:38:57 -0700532 const char *source,
Dylan Reid837c74a2016-01-22 17:25:21 -0800533 const char *dest)
534{
535 int rc;
536 struct stat st_buf;
537
538 rc = stat(dest, &st_buf);
539 if (rc == 0) /* destination exists */
540 return 0;
541
542 /* Try to create the destination. Either make directory or touch a file
543 * depending on the source type.
544 */
Dylan Reid2149be92016-04-28 18:38:57 -0700545 rc = stat(source, &st_buf);
Dylan Reid837c74a2016-01-22 17:25:21 -0800546 if (rc || S_ISDIR(st_buf.st_mode) || S_ISBLK(st_buf.st_mode))
547 return make_dir(dest, mnt->uid, mnt->gid, mnt->mode);
548
549 return touch_file(dest, mnt->uid, mnt->gid, mnt->mode);
550}
551
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700552/* Fork and exec the setfiles command to configure the selinux policy. */
Dylan Reide040c6b2016-05-02 18:49:02 -0700553static int run_setfiles_command(const struct container *c,
554 const struct container_config *config,
555 const char *dest)
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700556{
557 int rc;
558 int status;
559 int pid;
560 char *context_path;
561
Dylan Reide040c6b2016-05-02 18:49:02 -0700562 if (!config->run_setfiles)
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700563 return 0;
564
Dylan Reidb3621832016-03-24 10:24:57 -0700565 /* Really gross hack to avoid setfiles on /data, this should be removed
566 * when data isn't under /home/chronos/user where we can't access it as
567 * the android user.
568 * TODO(b/28705740) - Fix permission to the data directory.
569 */
570 if (strlen(dest) >= 5 && !strcmp(&dest[strlen(dest) - 5], "/data"))
571 return 0;
572
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700573 if (asprintf(&context_path, "%s/file_contexts",
574 c->runfsroot) < 0)
575 return -errno;
576
577 pid = fork();
578 if (pid == 0) {
579 const char *argv[] = {
Dylan Reide040c6b2016-05-02 18:49:02 -0700580 config->run_setfiles,
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700581 "-r",
582 c->runfsroot,
583 context_path,
584 dest,
585 NULL,
586 };
587 const char *env[] = {
588 NULL,
589 };
590
591 execve(argv[0], (char *const*)argv, (char *const*)env);
592
593 /* Command failed to exec if execve returns. */
594 _exit(-errno);
595 }
596 free(context_path);
597 if (pid < 0)
598 return -errno;
599 do {
600 rc = waitpid(pid, &status, 0);
601 } while (rc == -1 && errno == EINTR);
602 if (rc < 0)
603 return -errno;
604 return status;
605}
606
Dylan Reide040c6b2016-05-02 18:49:02 -0700607/*
608 * Unmounts anything we mounted in this mount namespace in the opposite order
609 * that they were mounted.
610 */
611static int unmount_external_mounts(struct container *c)
612{
613 int ret = 0;
614
615 while (c->num_ext_mounts) {
616 c->num_ext_mounts--;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700617 if (!c->ext_mounts[c->num_ext_mounts])
618 continue;
Dylan Reide040c6b2016-05-02 18:49:02 -0700619 if (umount(c->ext_mounts[c->num_ext_mounts]))
620 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700621 FREE_AND_NULL(c->ext_mounts[c->num_ext_mounts]);
Dylan Reide040c6b2016-05-02 18:49:02 -0700622 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700623 FREE_AND_NULL(c->ext_mounts);
Dylan Reide040c6b2016-05-02 18:49:02 -0700624 return ret;
625}
626
Luis Hector Chavez3341ed62016-06-06 08:04:04 -0700627static int do_container_mount(struct container *c,
628 const struct container_mount *mnt)
629{
630 char *source = NULL;
631 char *dest = NULL;
632 int rc = 0;
633
634 if (asprintf(&dest, "%s%s", c->runfsroot, mnt->destination) < 0)
635 return -errno;
636
637 /*
638 * If it's a bind mount relative to rootfs, append source to
639 * rootfs path, otherwise source path is absolute.
640 */
641 if ((mnt->flags & MS_BIND) && mnt->source[0] != '/') {
642 if (asprintf(&source, "%s/%s", c->runfsroot, mnt->source) < 0)
643 goto error_free_return;
644 } else {
645 if (asprintf(&source, "%s", mnt->source) < 0)
646 goto error_free_return;
647 }
648
649 if (mnt->create) {
650 rc = setup_mount_destination(mnt, source, dest);
651 if (rc)
652 goto error_free_return;
653 }
654 if (mnt->mount_in_ns) {
655 /* We can mount this with minijail. */
Dylan Reid36b9c012016-06-24 18:27:08 -0700656 rc = minijail_mount_with_data(c->jail, source, mnt->destination,
657 mnt->type, mnt->flags, mnt->data);
Luis Hector Chavez3341ed62016-06-06 08:04:04 -0700658 if (rc)
659 goto error_free_return;
660 } else {
661 /* Mount this externally and unmount it on exit. */
662 if (mount(source, dest, mnt->type, mnt->flags,
663 mnt->data))
664 goto error_free_return;
665 /* Save this to unmount when shutting down. */
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700666 rc = strdup_and_free(&c->ext_mounts[c->num_ext_mounts], dest);
667 if (rc)
Luis Hector Chavez3341ed62016-06-06 08:04:04 -0700668 goto error_free_return;
669 c->num_ext_mounts++;
670 }
671
672 goto exit;
673
674error_free_return:
675 if (!rc)
676 rc = -errno;
677exit:
678 free(source);
679 free(dest);
680 return rc;
681}
682
Dylan Reide040c6b2016-05-02 18:49:02 -0700683static int do_container_mounts(struct container *c,
684 const struct container_config *config)
Dylan Reid7daf9982016-04-28 16:55:42 -0700685{
686 unsigned int i;
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700687 int rc = 0;
Dylan Reid7daf9982016-04-28 16:55:42 -0700688
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700689 unmount_external_mounts(c);
Dylan Reide040c6b2016-05-02 18:49:02 -0700690 /*
691 * Allocate space to track anything we mount in our mount namespace.
692 * This over-allocates as it has space for all mounts.
693 */
694 c->ext_mounts = calloc(config->num_mounts, sizeof(*c->ext_mounts));
695 if (!c->ext_mounts)
696 return -errno;
697
698 for (i = 0; i < config->num_mounts; ++i) {
Luis Hector Chavez3341ed62016-06-06 08:04:04 -0700699 rc = do_container_mount(c, &config->mounts[i]);
700 if (rc)
701 goto error_free_return;
Dylan Reid7daf9982016-04-28 16:55:42 -0700702 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700703
Dylan Reid7daf9982016-04-28 16:55:42 -0700704 return 0;
Dylan Reid2149be92016-04-28 18:38:57 -0700705
706error_free_return:
Dylan Reide040c6b2016-05-02 18:49:02 -0700707 unmount_external_mounts(c);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700708 return rc;
Dylan Reid7daf9982016-04-28 16:55:42 -0700709}
710
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700711static int container_create_device(const struct container *c,
712 const struct container_device *dev,
713 int minor)
714{
715 char *path = NULL;
716 int rc = 0;
717 int mode;
718
719 switch (dev->type) {
720 case 'b':
721 mode = S_IFBLK;
722 break;
723 case 'c':
724 mode = S_IFCHR;
725 break;
726 default:
727 return -EINVAL;
728 }
729 mode |= dev->fs_permissions;
730
731 if (asprintf(&path, "%s%s", c->runfsroot, dev->path) < 0)
732 goto error_free_return;
733 if (mknod(path, mode, makedev(dev->major, minor)) && errno != EEXIST)
734 goto error_free_return;
735 if (chown(path, dev->uid, dev->gid))
736 goto error_free_return;
737 if (chmod(path, dev->fs_permissions))
738 goto error_free_return;
739
740 goto exit;
741
742error_free_return:
743 rc = -errno;
744exit:
745 free(path);
746 return rc;
747}
748
Dylan Reide040c6b2016-05-02 18:49:02 -0700749int container_start(struct container *c, const struct container_config *config)
Dylan Reid837c74a2016-01-22 17:25:21 -0800750{
Dylan Reidb3621832016-03-24 10:24:57 -0700751 static const mode_t root_dir_mode = 0660;
Luis Hector Chavez945af482016-06-03 08:39:34 -0700752 int rc = 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800753 unsigned int i;
Dylan Reide040c6b2016-05-02 18:49:02 -0700754 const char *rootfs = config->rootfs;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700755 char *runfs_template = NULL;
Dylan Reid837c74a2016-01-22 17:25:21 -0800756
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700757 if (!c)
758 return -EINVAL;
Dylan Reide040c6b2016-05-02 18:49:02 -0700759 if (!config)
760 return -EINVAL;
761 if (!config->program_argv || !config->program_argv[0])
762 return -EINVAL;
763
Dylan Reid837c74a2016-01-22 17:25:21 -0800764 if (asprintf(&runfs_template, "%s/%s_XXXXXX", c->rundir, c->name) < 0)
765 return -errno;
766
767 c->runfs = mkdtemp(runfs_template);
768 if (!c->runfs) {
769 free(runfs_template);
770 return -errno;
771 }
Dylan Reidb3621832016-03-24 10:24:57 -0700772 /* Make sure the container uid can access the rootfs. */
Dylan Reid4c6af2e2016-06-22 18:04:24 -0700773 if (chmod(c->runfs, 0700))
Dylan Reidb3621832016-03-24 10:24:57 -0700774 goto error_rmdir;
Dylan Reid1874feb2016-06-22 17:53:50 -0700775 if (chown(c->runfs, config->uid, config->gid))
776 goto error_rmdir;
Dylan Reidb3621832016-03-24 10:24:57 -0700777
Luis Hector Chavez945af482016-06-03 08:39:34 -0700778 if (asprintf(&c->runfsroot, "%s/root", c->runfs) < 0)
Dylan Reid837c74a2016-01-22 17:25:21 -0800779 goto error_rmdir;
780
Luis Hector Chavez945af482016-06-03 08:39:34 -0700781 if (mkdir(c->runfsroot, root_dir_mode))
782 goto error_rmdir;
783 if (chmod(c->runfsroot, root_dir_mode))
784 goto error_rmdir;
785
786 if (mount(rootfs, c->runfsroot, "", MS_BIND | MS_RDONLY, NULL))
Dylan Reid837c74a2016-01-22 17:25:21 -0800787 goto error_rmdir;
788
789 c->jail = minijail_new();
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700790 if (!c->jail)
Luis Hector Chavez945af482016-06-03 08:39:34 -0700791 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -0800792
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700793 rc = do_container_mounts(c, config);
794 if (rc)
Dylan Reid7daf9982016-04-28 16:55:42 -0700795 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -0800796
797 c->cgroup->ops->deny_all_devices(c->cgroup);
798
Dylan Reide040c6b2016-05-02 18:49:02 -0700799 for (i = 0; i < config->num_devices; i++) {
800 const struct container_device *dev = &config->devices[i];
Dylan Reid355d5e42016-04-29 16:53:31 -0700801 int minor = dev->minor;
Dylan Reid837c74a2016-01-22 17:25:21 -0800802
Dylan Reid355d5e42016-04-29 16:53:31 -0700803 if (dev->copy_minor) {
804 struct stat st_buff;
805 if (stat(dev->path, &st_buff) < 0)
Nicolas Boichatad21ace2016-06-30 15:04:29 +0800806 continue;
Dylan Reid355d5e42016-04-29 16:53:31 -0700807 /* Use the minor macro to extract the device number. */
808 minor = minor(st_buff.st_rdev);
809 }
810 if (minor >= 0) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700811 rc = container_create_device(c, dev, minor);
812 if (rc)
Dylan Reid355d5e42016-04-29 16:53:31 -0700813 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -0800814 }
815
816 rc = c->cgroup->ops->add_device(c->cgroup, dev->major,
Dylan Reid355d5e42016-04-29 16:53:31 -0700817 minor, dev->read_allowed,
Dylan Reid837c74a2016-01-22 17:25:21 -0800818 dev->write_allowed,
819 dev->modify_allowed, dev->type);
820 if (rc)
821 goto error_rmdir;
822 }
823
Dylan Reidd7229582016-04-27 17:08:40 -0700824 /* Potentailly run setfiles on mounts configured outside of the jail */
Dylan Reide040c6b2016-05-02 18:49:02 -0700825 for (i = 0; i < config->num_mounts; i++) {
826 const struct container_mount *mnt = &config->mounts[i];
Dylan Reidd7229582016-04-27 17:08:40 -0700827 char *dest;
828
829 if (mnt->mount_in_ns)
830 continue;
831 if (asprintf(&dest, "%s%s", c->runfsroot, mnt->destination) < 0)
832 goto error_rmdir;
Dylan Reide040c6b2016-05-02 18:49:02 -0700833 rc = run_setfiles_command(c, config, dest);
Dylan Reidd7229582016-04-27 17:08:40 -0700834 free(dest);
835 if (rc)
836 goto error_rmdir;
837 }
838
Chinyue Chenfac909e2016-06-24 14:17:42 +0800839 /* Setup CPU cgroup params. */
840 if (config->cpu_cgparams.shares) {
841 rc = c->cgroup->ops->set_cpu_shares(
842 c->cgroup, config->cpu_cgparams.shares);
843 if (rc)
844 goto error_rmdir;
845 }
846 if (config->cpu_cgparams.period) {
847 rc = c->cgroup->ops->set_cpu_quota(
848 c->cgroup, config->cpu_cgparams.quota);
849 if (rc)
850 goto error_rmdir;
851 rc = c->cgroup->ops->set_cpu_period(
852 c->cgroup, config->cpu_cgparams.period);
853 if (rc)
854 goto error_rmdir;
855 }
856 if (config->cpu_cgparams.rt_period) {
857 rc = c->cgroup->ops->set_cpu_rt_runtime(
858 c->cgroup, config->cpu_cgparams.rt_runtime);
859 if (rc)
860 goto error_rmdir;
861 rc = c->cgroup->ops->set_cpu_rt_period(
862 c->cgroup, config->cpu_cgparams.rt_period);
863 if (rc)
864 goto error_rmdir;
865 }
866
Dylan Reid837c74a2016-01-22 17:25:21 -0800867 /* Setup and start the container with libminijail. */
868 if (asprintf(&c->pid_file_path, "%s/container.pid", c->runfs) < 0)
869 goto error_rmdir;
870 minijail_write_pid_file(c->jail, c->pid_file_path);
871 minijail_reset_signal_mask(c->jail);
872
873 /* Setup container namespaces. */
874 minijail_namespace_ipc(c->jail);
875 minijail_namespace_vfs(c->jail);
876 minijail_namespace_net(c->jail);
877 minijail_namespace_pids(c->jail);
Dylan Reid837c74a2016-01-22 17:25:21 -0800878 minijail_namespace_user(c->jail);
Dylan Reide040c6b2016-05-02 18:49:02 -0700879 rc = minijail_uidmap(c->jail, config->uid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800880 if (rc)
881 goto error_rmdir;
Dylan Reide040c6b2016-05-02 18:49:02 -0700882 rc = minijail_gidmap(c->jail, config->gid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800883 if (rc)
884 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -0800885
886 rc = minijail_enter_pivot_root(c->jail, c->runfsroot);
887 if (rc)
888 goto error_rmdir;
889
890 /* Add the cgroups configured above. */
891 rc = minijail_add_to_cgroup(c->jail, cgroup_cpu_tasks_path(c->cgroup));
892 if (rc)
893 goto error_rmdir;
894 rc = minijail_add_to_cgroup(c->jail,
895 cgroup_cpuacct_tasks_path(c->cgroup));
896 if (rc)
897 goto error_rmdir;
898 rc = minijail_add_to_cgroup(c->jail,
899 cgroup_devices_tasks_path(c->cgroup));
900 if (rc)
901 goto error_rmdir;
902 rc = minijail_add_to_cgroup(c->jail,
903 cgroup_freezer_tasks_path(c->cgroup));
904 if (rc)
905 goto error_rmdir;
906
Dylan Reide040c6b2016-05-02 18:49:02 -0700907 if (config->alt_syscall_table)
908 minijail_use_alt_syscall(c->jail, config->alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -0800909
910 minijail_run_as_init(c->jail);
911
Dylan Reid3da683b2016-04-05 03:35:35 -0700912 /* TODO(dgreid) - remove this once shared mounts are cleaned up. */
913 minijail_skip_remount_private(c->jail);
914
Dylan Reid837c74a2016-01-22 17:25:21 -0800915 rc = minijail_run_pid_pipes_no_preload(c->jail,
Dylan Reide040c6b2016-05-02 18:49:02 -0700916 config->program_argv[0],
917 config->program_argv,
Dylan Reid837c74a2016-01-22 17:25:21 -0800918 &c->init_pid, NULL, NULL,
919 NULL);
920 if (rc)
921 goto error_rmdir;
922 return 0;
923
924error_rmdir:
Luis Hector Chavez945af482016-06-03 08:39:34 -0700925 if (!rc)
926 rc = -errno;
927 container_teardown(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800928 return rc;
929}
930
931const char *container_root(struct container *c)
932{
933 return c->runfs;
934}
935
936int container_pid(struct container *c)
937{
938 return c->init_pid;
939}
940
941static int container_teardown(struct container *c)
942{
Dylan Reid837c74a2016-01-22 17:25:21 -0800943 int ret = 0;
944
Dylan Reide040c6b2016-05-02 18:49:02 -0700945 unmount_external_mounts(c);
Luis Hector Chavez945af482016-06-03 08:39:34 -0700946 if (c->runfsroot) {
947 if (umount(c->runfsroot))
948 ret = -errno;
949 if (rmdir(c->runfsroot))
950 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700951 FREE_AND_NULL(c->runfsroot);
Luis Hector Chavez945af482016-06-03 08:39:34 -0700952 }
953 if (c->pid_file_path) {
954 if (unlink(c->pid_file_path))
955 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700956 FREE_AND_NULL(c->pid_file_path);
Luis Hector Chavez945af482016-06-03 08:39:34 -0700957 }
958 if (c->runfs) {
959 if (rmdir(c->runfs))
960 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700961 FREE_AND_NULL(c->runfs);
Luis Hector Chavez945af482016-06-03 08:39:34 -0700962 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800963 return ret;
964}
965
966int container_wait(struct container *c)
967{
Dylan Reidcf745c52016-04-22 10:18:03 -0700968 int rc;
969
970 do {
971 rc = minijail_wait(c->jail);
Luis Hector Chavez4641e852016-06-02 15:40:19 -0700972 } while (rc == -EINTR);
Dylan Reidcf745c52016-04-22 10:18:03 -0700973
Luis Hector Chavez945af482016-06-03 08:39:34 -0700974 // If the process had already been reaped, still perform teardown.
975 if (rc == -ECHILD || rc >= 0) {
Dylan Reidcf745c52016-04-22 10:18:03 -0700976 rc = container_teardown(c);
Luis Hector Chavez945af482016-06-03 08:39:34 -0700977 }
Dylan Reidcf745c52016-04-22 10:18:03 -0700978 return rc;
Dylan Reid837c74a2016-01-22 17:25:21 -0800979}
980
981int container_kill(struct container *c)
982{
Luis Hector Chavez945af482016-06-03 08:39:34 -0700983 if (kill(c->init_pid, SIGKILL) && errno != ESRCH)
Dylan Reid837c74a2016-01-22 17:25:21 -0800984 return -errno;
985 return container_wait(c);
986}