blob: cdfaba1215a1f6cfd648e4ab83d5676ecec77cac [file] [log] [blame]
Dylan Reid837c74a2016-01-22 17:25:21 -08001/* Copyright 2016 The Chromium OS Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file.
4 */
5
6#define _GNU_SOURCE /* For asprintf */
7
8#include <errno.h>
9#include <fcntl.h>
Mike Frysinger05e594e2017-01-10 02:11:08 -050010#if USE_device_mapper
11#include <libdevmapper.h>
12#endif
Dylan Reid837c74a2016-01-22 17:25:21 -080013#include <malloc.h>
14#include <signal.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <string.h>
18#include <sys/mount.h>
19#include <sys/stat.h>
20#include <sys/types.h>
Dylan Reid2bd9ea92016-04-07 20:57:47 -070021#include <sys/wait.h>
Dylan Reid837c74a2016-01-22 17:25:21 -080022#include <unistd.h>
23
Mike Frysinger412dbd22017-01-06 01:50:34 -050024#include <linux/loop.h>
25
Dylan Reid837c74a2016-01-22 17:25:21 -080026#include "container_cgroup.h"
27#include "libcontainer.h"
28#include "libminijail.h"
29
Luis Hector Chavez479b95f2016-06-06 08:01:05 -070030#define FREE_AND_NULL(ptr) \
31do { \
32 free(ptr); \
33 ptr = NULL; \
34} while(0)
35
Yusuke Sato91f11f02016-12-02 16:15:13 -080036#define MAX_NUM_SETFILES_ARGS 128
37
Mike Frysinger412dbd22017-01-06 01:50:34 -050038static const char loopdev_ctl[] = "/dev/loop-control";
Mike Frysinger05e594e2017-01-10 02:11:08 -050039#if USE_device_mapper
40static const char dm_dev_prefix[] = "/dev/mapper/";
41#endif
Mike Frysinger412dbd22017-01-06 01:50:34 -050042
Luis Hector Chavez945af482016-06-03 08:39:34 -070043static int container_teardown(struct container *c);
44
Luis Hector Chavez479b95f2016-06-06 08:01:05 -070045static int strdup_and_free(char **dest, const char *src)
46{
47 char *copy = strdup(src);
48 if (!copy)
49 return -ENOMEM;
50 if (*dest)
51 free(*dest);
52 *dest = copy;
53 return 0;
54}
55
Dylan Reid837c74a2016-01-22 17:25:21 -080056struct container_mount {
57 char *name;
58 char *source;
59 char *destination;
60 char *type;
61 char *data;
Mike Frysinger05e594e2017-01-10 02:11:08 -050062 char *verity;
Dylan Reid837c74a2016-01-22 17:25:21 -080063 int flags;
64 int uid;
65 int gid;
66 int mode;
67 int mount_in_ns; /* True if mount should happen in new vfs ns */
68 int create; /* True if target should be created if it doesn't exist */
Mike Frysinger412dbd22017-01-06 01:50:34 -050069 int loopback; /* True if target should be mounted via loopback */
Dylan Reid837c74a2016-01-22 17:25:21 -080070};
71
72struct container_device {
73 char type; /* 'c' or 'b' for char or block */
74 char *path;
75 int fs_permissions;
76 int major;
77 int minor;
Dylan Reid355d5e42016-04-29 16:53:31 -070078 int copy_minor; /* Copy the minor from existing node, ignores |minor| */
Dylan Reid837c74a2016-01-22 17:25:21 -080079 int uid;
80 int gid;
81 int read_allowed;
82 int write_allowed;
83 int modify_allowed;
84};
85
Chinyue Chenfac909e2016-06-24 14:17:42 +080086struct container_cpu_cgroup {
87 int shares;
88 int quota;
89 int period;
90 int rt_runtime;
91 int rt_period;
92};
93
Dylan Reid837c74a2016-01-22 17:25:21 -080094/*
95 * Structure that configures how the container is run.
96 *
Mike Frysingerb22acdf2017-01-08 02:02:35 -050097 * config_root - Path to the root of the container itself.
Dylan Reid837c74a2016-01-22 17:25:21 -080098 * rootfs - Path to the root of the container's filesystem.
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -070099 * rootfs_mount_flags - Flags that will be passed to mount() for the rootfs.
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700100 * premounted_runfs - Path to where the container will be run.
101 * pid_file_path - Path to the file where the pid should be written.
Dylan Reid837c74a2016-01-22 17:25:21 -0800102 * program_argv - The program to run and args, e.g. "/sbin/init".
103 * num_args - Number of args in program_argv.
Dylan Reid1874feb2016-06-22 17:53:50 -0700104 * uid - The uid the container will run as.
Dylan Reid837c74a2016-01-22 17:25:21 -0800105 * uid_map - Mapping of UIDs in the container, e.g. "0 100000 1024"
Dylan Reid1874feb2016-06-22 17:53:50 -0700106 * gid - The gid the container will run as.
Dylan Reid837c74a2016-01-22 17:25:21 -0800107 * gid_map - Mapping of GIDs in the container, e.g. "0 100000 1024"
108 * alt_syscall_table - Syscall table to use or NULL if none.
109 * mounts - Filesystems to mount in the new namespace.
110 * num_mounts - Number of above.
111 * devices - Device nodes to create.
112 * num_devices - Number of above.
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700113 * run_setfiles - Should run setfiles on mounts to enable selinux.
Chinyue Chenfac909e2016-06-24 14:17:42 +0800114 * cpu_cgparams - CPU cgroup params.
Dylan Reid9e724af2016-07-21 09:58:07 -0700115 * cgroup_parent - Parent dir for cgroup creation
116 * cgroup_owner - uid to own the created cgroups
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700117 * cgroup_group - gid to own the created cgroups
Keshav Santhanam1b6bf672016-08-10 18:35:12 -0700118 * share_host_netns - Enable sharing of the host network namespace.
Dylan Reidc4335842016-11-11 10:24:52 -0800119 * keep_fds_open - Allow the child process to keep open FDs (for stdin/out/err).
Dylan Reid837c74a2016-01-22 17:25:21 -0800120 */
121struct container_config {
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500122 char *config_root;
Dylan Reid837c74a2016-01-22 17:25:21 -0800123 char *rootfs;
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -0700124 unsigned long rootfs_mount_flags;
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700125 char *premounted_runfs;
126 char *pid_file_path;
Dylan Reid837c74a2016-01-22 17:25:21 -0800127 char **program_argv;
128 size_t num_args;
Dylan Reid1874feb2016-06-22 17:53:50 -0700129 uid_t uid;
Dylan Reid837c74a2016-01-22 17:25:21 -0800130 char *uid_map;
Dylan Reid1874feb2016-06-22 17:53:50 -0700131 gid_t gid;
Dylan Reid837c74a2016-01-22 17:25:21 -0800132 char *gid_map;
133 char *alt_syscall_table;
134 struct container_mount *mounts;
135 size_t num_mounts;
136 struct container_device *devices;
137 size_t num_devices;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700138 char *run_setfiles;
Chinyue Chenfac909e2016-06-24 14:17:42 +0800139 struct container_cpu_cgroup cpu_cgparams;
Dylan Reid9e724af2016-07-21 09:58:07 -0700140 char *cgroup_parent;
141 uid_t cgroup_owner;
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700142 gid_t cgroup_group;
Keshav Santhanam1b6bf672016-08-10 18:35:12 -0700143 int share_host_netns;
Dylan Reidc4335842016-11-11 10:24:52 -0800144 int keep_fds_open;
Dylan Reid837c74a2016-01-22 17:25:21 -0800145};
146
147struct container_config *container_config_create()
148{
149 return calloc(1, sizeof(struct container_config));
150}
151
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700152static void container_free_program_args(struct container_config *c)
153{
154 int i;
155
156 if (!c->program_argv)
157 return;
158 for (i = 0; i < c->num_args; ++i) {
159 FREE_AND_NULL(c->program_argv[i]);
160 }
161 FREE_AND_NULL(c->program_argv);
162}
163
164static void container_config_free_mount(struct container_mount *mount)
165{
166 FREE_AND_NULL(mount->name);
167 FREE_AND_NULL(mount->source);
168 FREE_AND_NULL(mount->destination);
169 FREE_AND_NULL(mount->type);
170 FREE_AND_NULL(mount->data);
171}
172
173static void container_config_free_device(struct container_device *device)
174{
175 FREE_AND_NULL(device->path);
176}
177
Dylan Reid837c74a2016-01-22 17:25:21 -0800178void container_config_destroy(struct container_config *c)
179{
180 size_t i;
181
182 if (c == NULL)
183 return;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700184 FREE_AND_NULL(c->rootfs);
185 container_free_program_args(c);
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700186 FREE_AND_NULL(c->premounted_runfs);
187 FREE_AND_NULL(c->pid_file_path);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700188 FREE_AND_NULL(c->uid_map);
189 FREE_AND_NULL(c->gid_map);
190 FREE_AND_NULL(c->alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -0800191 for (i = 0; i < c->num_mounts; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700192 container_config_free_mount(&c->mounts[i]);
Dylan Reid837c74a2016-01-22 17:25:21 -0800193 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700194 FREE_AND_NULL(c->mounts);
Dylan Reid837c74a2016-01-22 17:25:21 -0800195 for (i = 0; i < c->num_devices; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700196 container_config_free_device(&c->devices[i]);
Dylan Reid837c74a2016-01-22 17:25:21 -0800197 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700198 FREE_AND_NULL(c->devices);
199 FREE_AND_NULL(c->run_setfiles);
Dylan Reid9e724af2016-07-21 09:58:07 -0700200 FREE_AND_NULL(c->cgroup_parent);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700201 FREE_AND_NULL(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800202}
203
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500204int container_config_config_root(struct container_config *c,
205 const char *config_root)
206{
207 return strdup_and_free(&c->config_root, config_root);
208}
209
210const char *container_config_get_config_root(const struct container_config *c)
211{
212 return c->config_root;
213}
214
Dylan Reid837c74a2016-01-22 17:25:21 -0800215int container_config_rootfs(struct container_config *c, const char *rootfs)
216{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700217 return strdup_and_free(&c->rootfs, rootfs);
Dylan Reid837c74a2016-01-22 17:25:21 -0800218}
219
Dylan Reid11456722016-05-02 11:24:50 -0700220const char *container_config_get_rootfs(const struct container_config *c)
221{
222 return c->rootfs;
223}
224
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -0700225void container_config_rootfs_mount_flags(struct container_config *c,
226 unsigned long rootfs_mount_flags)
227{
228 /* Since we are going to add MS_REMOUNT anyways, add it here so we can
229 * simply check against zero later. MS_BIND is also added to avoid
230 * re-mounting the original filesystem, since the rootfs is always
231 * bind-mounted.
232 */
233 c->rootfs_mount_flags = MS_REMOUNT | MS_BIND | rootfs_mount_flags;
234}
235
236unsigned long container_config_get_rootfs_mount_flags(
237 const struct container_config *c)
238{
239 return c->rootfs_mount_flags;
240}
241
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700242int container_config_premounted_runfs(struct container_config *c, const char *runfs)
243{
244 return strdup_and_free(&c->premounted_runfs, runfs);
245}
246
247const char *container_config_get_premounted_runfs(const struct container_config *c)
248{
249 return c->premounted_runfs;
250}
251
252int container_config_pid_file(struct container_config *c, const char *path)
253{
254 return strdup_and_free(&c->pid_file_path, path);
255}
256
257const char *container_config_get_pid_file(const struct container_config *c)
258{
259 return c->pid_file_path;
260}
261
Dylan Reid837c74a2016-01-22 17:25:21 -0800262int container_config_program_argv(struct container_config *c,
Dylan Reid17fd53f2016-11-18 19:14:41 -0800263 const char **argv, size_t num_args)
Dylan Reid837c74a2016-01-22 17:25:21 -0800264{
265 size_t i;
266
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700267 container_free_program_args(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800268 c->num_args = num_args;
269 c->program_argv = calloc(num_args + 1, sizeof(char *));
270 if (!c->program_argv)
271 return -ENOMEM;
272 for (i = 0; i < num_args; ++i) {
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700273 if (strdup_and_free(&c->program_argv[i], argv[i]))
274 goto error_free_return;
Dylan Reid837c74a2016-01-22 17:25:21 -0800275 }
276 c->program_argv[num_args] = NULL;
277 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700278
279error_free_return:
280 container_free_program_args(c);
281 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800282}
283
Dylan Reid11456722016-05-02 11:24:50 -0700284size_t container_config_get_num_program_args(const struct container_config *c)
285{
286 return c->num_args;
287}
288
289const char *container_config_get_program_arg(const struct container_config *c,
290 size_t index)
291{
292 if (index >= c->num_args)
293 return NULL;
294 return c->program_argv[index];
295}
296
Dylan Reid1874feb2016-06-22 17:53:50 -0700297void container_config_uid(struct container_config *c, uid_t uid)
298{
299 c->uid = uid;
300}
301
302uid_t container_config_get_uid(const struct container_config *c)
303{
304 return c->uid;
305}
306
Dylan Reid837c74a2016-01-22 17:25:21 -0800307int container_config_uid_map(struct container_config *c, const char *uid_map)
308{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700309 return strdup_and_free(&c->uid_map, uid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800310}
311
Dylan Reid1874feb2016-06-22 17:53:50 -0700312void container_config_gid(struct container_config *c, gid_t gid)
313{
314 c->gid = gid;
315}
316
317gid_t container_config_get_gid(const struct container_config *c)
318{
319 return c->gid;
320}
321
Dylan Reid837c74a2016-01-22 17:25:21 -0800322int container_config_gid_map(struct container_config *c, const char *gid_map)
323{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700324 return strdup_and_free(&c->gid_map, gid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -0800325}
326
327int container_config_alt_syscall_table(struct container_config *c,
328 const char *alt_syscall_table)
329{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700330 return strdup_and_free(&c->alt_syscall_table, alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -0800331}
332
333int container_config_add_mount(struct container_config *c,
334 const char *name,
335 const char *source,
336 const char *destination,
337 const char *type,
338 const char *data,
Mike Frysinger05e594e2017-01-10 02:11:08 -0500339 const char *verity,
Dylan Reid837c74a2016-01-22 17:25:21 -0800340 int flags,
341 int uid,
342 int gid,
343 int mode,
344 int mount_in_ns,
Mike Frysinger412dbd22017-01-06 01:50:34 -0500345 int create,
346 int loopback)
Dylan Reid837c74a2016-01-22 17:25:21 -0800347{
348 struct container_mount *mount_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700349 struct container_mount *current_mount;
Dylan Reid837c74a2016-01-22 17:25:21 -0800350
351 if (name == NULL || source == NULL ||
352 destination == NULL || type == NULL)
353 return -EINVAL;
354
355 mount_ptr = realloc(c->mounts,
356 sizeof(c->mounts[0]) * (c->num_mounts + 1));
357 if (!mount_ptr)
358 return -ENOMEM;
359 c->mounts = mount_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700360 current_mount = &c->mounts[c->num_mounts];
361 memset(current_mount, 0, sizeof(struct container_mount));
362
363 if (strdup_and_free(&current_mount->name, name))
364 goto error_free_return;
365 if (strdup_and_free(&current_mount->source, source))
366 goto error_free_return;
367 if (strdup_and_free(&current_mount->destination, destination))
368 goto error_free_return;
369 if (strdup_and_free(&current_mount->type, type))
370 goto error_free_return;
371 if (data && strdup_and_free(&current_mount->data, data))
372 goto error_free_return;
Mike Frysinger05e594e2017-01-10 02:11:08 -0500373 if (verity && strdup_and_free(&current_mount->verity, verity))
374 goto error_free_return;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700375 current_mount->flags = flags;
376 current_mount->uid = uid;
377 current_mount->gid = gid;
378 current_mount->mode = mode;
379 current_mount->mount_in_ns = mount_in_ns;
380 current_mount->create = create;
Mike Frysinger412dbd22017-01-06 01:50:34 -0500381 current_mount->loopback = loopback;
Dylan Reid837c74a2016-01-22 17:25:21 -0800382 ++c->num_mounts;
383 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700384
385error_free_return:
386 container_config_free_mount(current_mount);
387 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800388}
389
390int container_config_add_device(struct container_config *c,
391 char type,
392 const char *path,
393 int fs_permissions,
394 int major,
395 int minor,
Dylan Reid355d5e42016-04-29 16:53:31 -0700396 int copy_minor,
Dylan Reid837c74a2016-01-22 17:25:21 -0800397 int uid,
398 int gid,
399 int read_allowed,
400 int write_allowed,
401 int modify_allowed)
402{
403 struct container_device *dev_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700404 struct container_device *current_dev;
Dylan Reid837c74a2016-01-22 17:25:21 -0800405
406 if (path == NULL)
407 return -EINVAL;
Dylan Reid355d5e42016-04-29 16:53:31 -0700408 /* If using a dynamic minor number, ensure that minor is -1. */
409 if (copy_minor && (minor != -1))
410 return -EINVAL;
411
Dylan Reid837c74a2016-01-22 17:25:21 -0800412 dev_ptr = realloc(c->devices,
413 sizeof(c->devices[0]) * (c->num_devices + 1));
414 if (!dev_ptr)
415 return -ENOMEM;
416 c->devices = dev_ptr;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700417 current_dev = &c->devices[c->num_devices];
418 memset(current_dev, 0, sizeof(struct container_device));
419
420 current_dev->type = type;
421 if (strdup_and_free(&current_dev->path, path))
422 goto error_free_return;
423 current_dev->fs_permissions = fs_permissions;
424 current_dev->major = major;
425 current_dev->minor = minor;
426 current_dev->copy_minor = copy_minor;
427 current_dev->uid = uid;
428 current_dev->gid = gid;
429 current_dev->read_allowed = read_allowed;
430 current_dev->write_allowed = write_allowed;
431 current_dev->modify_allowed = modify_allowed;
Dylan Reid837c74a2016-01-22 17:25:21 -0800432 ++c->num_devices;
433 return 0;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700434
435error_free_return:
436 container_config_free_device(current_dev);
437 return -ENOMEM;
Dylan Reid837c74a2016-01-22 17:25:21 -0800438}
439
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700440int container_config_run_setfiles(struct container_config *c,
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700441 const char *setfiles_cmd)
442{
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700443 return strdup_and_free(&c->run_setfiles, setfiles_cmd);
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700444}
Dylan Reid837c74a2016-01-22 17:25:21 -0800445
Dylan Reid11456722016-05-02 11:24:50 -0700446const char *container_config_get_run_setfiles(const struct container_config *c)
447{
448 return c->run_setfiles;
449}
450
Chinyue Chenfac909e2016-06-24 14:17:42 +0800451int container_config_set_cpu_shares(struct container_config *c, int shares)
452{
453 /* CPU shares must be 2 or higher. */
454 if (shares < 2)
455 return -EINVAL;
456
457 c->cpu_cgparams.shares = shares;
458 return 0;
459}
460
461int container_config_set_cpu_cfs_params(struct container_config *c,
462 int quota,
463 int period)
464{
465 /*
466 * quota could be set higher than period to utilize more than one CPU.
467 * quota could also be set as -1 to indicate the cgroup does not adhere
468 * to any CPU time restrictions.
469 */
470 if (quota <= 0 && quota != -1)
471 return -EINVAL;
472 if (period <= 0)
473 return -EINVAL;
474
475 c->cpu_cgparams.quota = quota;
476 c->cpu_cgparams.period = period;
477 return 0;
478}
479
480int container_config_set_cpu_rt_params(struct container_config *c,
481 int rt_runtime,
482 int rt_period)
483{
484 /*
485 * rt_runtime could be set as 0 to prevent the cgroup from using
486 * realtime CPU.
487 */
488 if (rt_runtime < 0 || rt_runtime >= rt_period)
489 return -EINVAL;
490
491 c->cpu_cgparams.rt_runtime = rt_runtime;
492 c->cpu_cgparams.rt_period = rt_period;
493 return 0;
494}
495
Chinyue Chen4f3fd682016-07-01 14:11:42 +0800496int container_config_get_cpu_shares(struct container_config *c)
497{
498 return c->cpu_cgparams.shares;
499}
500
501int container_config_get_cpu_quota(struct container_config *c)
502{
503 return c->cpu_cgparams.quota;
504}
505
506int container_config_get_cpu_period(struct container_config *c)
507{
508 return c->cpu_cgparams.period;
509}
510
511int container_config_get_cpu_rt_runtime(struct container_config *c)
512{
513 return c->cpu_cgparams.rt_runtime;
514}
515
516int container_config_get_cpu_rt_period(struct container_config *c)
517{
518 return c->cpu_cgparams.rt_period;
519}
520
Dylan Reid9e724af2016-07-21 09:58:07 -0700521int container_config_set_cgroup_parent(struct container_config *c,
522 const char *parent,
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700523 uid_t cgroup_owner, gid_t cgroup_group)
Dylan Reid9e724af2016-07-21 09:58:07 -0700524{
525 c->cgroup_owner = cgroup_owner;
Dmitry Torokhov14eef722016-09-27 16:40:37 -0700526 c->cgroup_group = cgroup_group;
Dylan Reid9e724af2016-07-21 09:58:07 -0700527 return strdup_and_free(&c->cgroup_parent, parent);
528}
529
530const char *container_config_get_cgroup_parent(struct container_config *c)
531{
532 return c->cgroup_parent;
533}
534
Keshav Santhanam1b6bf672016-08-10 18:35:12 -0700535void container_config_share_host_netns(struct container_config *c)
536{
537 c->share_host_netns = 1;
538}
539
540int get_container_config_share_host_netns(struct container_config *c)
541{
542 return c->share_host_netns;
543}
544
Dylan Reidc4335842016-11-11 10:24:52 -0800545void container_config_keep_fds_open(struct container_config *c)
546{
547 c->keep_fds_open = 1;
548}
549
Dylan Reid837c74a2016-01-22 17:25:21 -0800550/*
551 * Container manipulation
552 */
553struct container {
Dylan Reid837c74a2016-01-22 17:25:21 -0800554 struct container_cgroup *cgroup;
555 struct minijail *jail;
556 pid_t init_pid;
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500557 char *config_root;
Dylan Reid837c74a2016-01-22 17:25:21 -0800558 char *runfs;
559 char *rundir;
560 char *runfsroot;
561 char *pid_file_path;
Dylan Reide040c6b2016-05-02 18:49:02 -0700562 char **ext_mounts; /* Mounts made outside of the minijail */
563 size_t num_ext_mounts;
Mike Frysinger412dbd22017-01-06 01:50:34 -0500564 char **loopdevs;
565 size_t num_loopdevs;
Mike Frysinger05e594e2017-01-10 02:11:08 -0500566 char **device_mappers;
567 size_t num_device_mappers;
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700568 char *name;
Dylan Reid837c74a2016-01-22 17:25:21 -0800569};
570
571struct container *container_new(const char *name,
Dylan Reide040c6b2016-05-02 18:49:02 -0700572 const char *rundir)
Dylan Reid837c74a2016-01-22 17:25:21 -0800573{
574 struct container *c;
575
Dylan Reid837c74a2016-01-22 17:25:21 -0800576 c = calloc(1, sizeof(*c));
Dylan Reidb435c682016-04-12 04:17:49 -0700577 if (!c)
578 return NULL;
Dylan Reid837c74a2016-01-22 17:25:21 -0800579 c->rundir = strdup(rundir);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700580 c->name = strdup(name);
Dylan Reida9966422016-07-21 10:11:34 -0700581 if (!c->rundir || !c->name) {
Dylan Reid684975e2016-05-02 15:44:47 -0700582 container_destroy(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800583 return NULL;
Dylan Reidb435c682016-04-12 04:17:49 -0700584 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800585 return c;
586}
587
588void container_destroy(struct container *c)
589{
Dylan Reid684975e2016-05-02 15:44:47 -0700590 if (c->cgroup)
591 container_cgroup_destroy(c->cgroup);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -0700592 if (c->jail)
593 minijail_destroy(c->jail);
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500594 FREE_AND_NULL(c->config_root);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700595 FREE_AND_NULL(c->name);
596 FREE_AND_NULL(c->rundir);
597 FREE_AND_NULL(c);
Dylan Reid837c74a2016-01-22 17:25:21 -0800598}
599
Stephen Barber1a398c72017-01-23 12:39:44 -0800600/*
601 * Given a uid/gid map of "inside1 outside1 length1, ...", and an id
602 * inside of the user namespace, return the equivalent outside id, or
603 * return < 0 on error.
604 */
605static int get_userns_outside_id(const char *map, int id)
606{
607 char *map_copy, *mapping, *saveptr1, *saveptr2;
608 int inside, outside, length;
609 int result = 0;
610 errno = 0;
611
612 if (asprintf(&map_copy, "%s", map) < 0)
613 return -ENOMEM;
614
615 mapping = strtok_r(map_copy, ",", &saveptr1);
616 while (mapping) {
617 inside = strtol(strtok_r(mapping, " ", &saveptr2), NULL, 10);
618 outside = strtol(strtok_r(NULL, " ", &saveptr2), NULL, 10);
619 length = strtol(strtok_r(NULL, "\0", &saveptr2), NULL, 10);
620 if (errno) {
621 goto error_free_return;
622 } else if (inside < 0 || outside < 0 || length < 0) {
623 errno = EINVAL;
624 goto error_free_return;
625 }
626
627 if (id >= inside && id <= (inside + length)) {
628 result = (id - inside) + outside;
629 goto exit;
630 }
631
632 mapping = strtok_r(NULL, ",", &saveptr1);
633 }
634 errno = EINVAL;
635
636error_free_return:
637 result = -errno;
638exit:
639 free(map_copy);
640 return result;
641}
642
Dylan Reid837c74a2016-01-22 17:25:21 -0800643static int make_dir(const char *path, int uid, int gid, int mode)
644{
645 if (mkdir(path, mode))
646 return -errno;
647 if (chmod(path, mode))
648 return -errno;
649 if (chown(path, uid, gid))
650 return -errno;
651 return 0;
652}
653
654static int touch_file(const char *path, int uid, int gid, int mode)
655{
656 int rc;
657 int fd = open(path, O_RDWR | O_CREAT, mode);
658 if (fd < 0)
659 return -errno;
660 rc = fchown(fd, uid, gid);
661 close(fd);
662
663 if (rc)
664 return -errno;
665 return 0;
666}
667
668/* Make sure the mount target exists in the new rootfs. Create if needed and
669 * possible.
670 */
Stephen Barber1a398c72017-01-23 12:39:44 -0800671static int setup_mount_destination(const struct container_config *config,
672 const struct container_mount *mnt,
Dylan Reid2149be92016-04-28 18:38:57 -0700673 const char *source,
Dylan Reid837c74a2016-01-22 17:25:21 -0800674 const char *dest)
675{
Stephen Barber1a398c72017-01-23 12:39:44 -0800676 int uid_userns, gid_userns;
Dylan Reid837c74a2016-01-22 17:25:21 -0800677 int rc;
678 struct stat st_buf;
679
680 rc = stat(dest, &st_buf);
681 if (rc == 0) /* destination exists */
682 return 0;
683
684 /* Try to create the destination. Either make directory or touch a file
685 * depending on the source type.
686 */
Stephen Barber1a398c72017-01-23 12:39:44 -0800687 uid_userns = get_userns_outside_id(config->uid_map, mnt->uid);
688 if (uid_userns < 0)
689 return uid_userns;
690 gid_userns = get_userns_outside_id(config->gid_map, mnt->gid);
691 if (gid_userns < 0)
692 return gid_userns;
693
Dylan Reid2149be92016-04-28 18:38:57 -0700694 rc = stat(source, &st_buf);
Dylan Reid837c74a2016-01-22 17:25:21 -0800695 if (rc || S_ISDIR(st_buf.st_mode) || S_ISBLK(st_buf.st_mode))
Stephen Barber1a398c72017-01-23 12:39:44 -0800696 return make_dir(dest, uid_userns, gid_userns, mnt->mode);
Dylan Reid837c74a2016-01-22 17:25:21 -0800697
Stephen Barber1a398c72017-01-23 12:39:44 -0800698 return touch_file(dest, uid_userns, gid_userns, mnt->mode);
Dylan Reid837c74a2016-01-22 17:25:21 -0800699}
700
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700701/* Fork and exec the setfiles command to configure the selinux policy. */
Dylan Reide040c6b2016-05-02 18:49:02 -0700702static int run_setfiles_command(const struct container *c,
703 const struct container_config *config,
Yusuke Sato91f11f02016-12-02 16:15:13 -0800704 char *const *destinations, size_t num_destinations)
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700705{
706 int rc;
707 int status;
708 int pid;
709 char *context_path;
710
Dylan Reide040c6b2016-05-02 18:49:02 -0700711 if (!config->run_setfiles)
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700712 return 0;
713
714 if (asprintf(&context_path, "%s/file_contexts",
715 c->runfsroot) < 0)
716 return -errno;
717
718 pid = fork();
719 if (pid == 0) {
Yusuke Sato91f11f02016-12-02 16:15:13 -0800720 size_t i;
721 size_t arg_index = 0;
722 const char *argv[MAX_NUM_SETFILES_ARGS];
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700723 const char *env[] = {
724 NULL,
725 };
726
Yusuke Sato91f11f02016-12-02 16:15:13 -0800727 argv[arg_index++] = config->run_setfiles;
728 argv[arg_index++] = "-r";
729 argv[arg_index++] = c->runfsroot;
730 argv[arg_index++] = context_path;
731 if (arg_index + num_destinations >= MAX_NUM_SETFILES_ARGS)
732 _exit(-E2BIG);
733 for (i = 0; i < num_destinations; ++i) {
734 argv[arg_index++] = destinations[i];
735 }
736 argv[arg_index] = NULL;
737
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700738 execve(argv[0], (char *const*)argv, (char *const*)env);
739
740 /* Command failed to exec if execve returns. */
741 _exit(-errno);
742 }
743 free(context_path);
744 if (pid < 0)
745 return -errno;
746 do {
747 rc = waitpid(pid, &status, 0);
748 } while (rc == -1 && errno == EINTR);
749 if (rc < 0)
750 return -errno;
751 return status;
752}
753
Mike Frysinger412dbd22017-01-06 01:50:34 -0500754/* Find a free loop device and attach it. */
755static int loopdev_setup(char **loopdev_ret, const char *source)
756{
757 int ret = 0;
758 int source_fd = -1;
759 int control_fd = -1;
760 int loop_fd = -1;
761 char *loopdev = NULL;
762
763 source_fd = open(source, O_RDONLY|O_CLOEXEC);
764 if (source_fd < 0)
765 goto error;
766
767 control_fd = open(loopdev_ctl, O_RDWR|O_NOFOLLOW|O_CLOEXEC);
768 if (control_fd < 0)
769 goto error;
770
771 while (1) {
772 int num = ioctl(control_fd, LOOP_CTL_GET_FREE);
773 if (num < 0)
774 goto error;
775
776 if (asprintf(&loopdev, "/dev/loop%i", num) < 0)
777 goto error;
778
779 loop_fd = open(loopdev, O_RDONLY|O_NOFOLLOW|O_CLOEXEC);
780 if (loop_fd < 0)
781 goto error;
782
783 if (ioctl(loop_fd, LOOP_SET_FD, source_fd) == 0)
784 break;
785
786 if (errno != EBUSY)
787 goto error;
788
789 /* Clean up resources for the next pass. */
790 free(loopdev);
791 close(loop_fd);
792 }
793
794 *loopdev_ret = loopdev;
795 goto exit;
796
797error:
798 ret = -errno;
799 free(loopdev);
800exit:
801 if (source_fd != -1)
802 close(source_fd);
803 if (control_fd != -1)
804 close(control_fd);
805 if (loop_fd != -1)
806 close(loop_fd);
807 return ret;
808}
809
810/* Detach the specified loop device. */
811static int loopdev_detach(const char *loopdev)
812{
813 int ret = 0;
814 int fd;
815
816 fd = open(loopdev, O_RDONLY|O_NOFOLLOW|O_CLOEXEC);
817 if (fd < 0)
818 goto error;
819 if (ioctl(fd, LOOP_CLR_FD) < 0)
820 goto error;
821
822 goto exit;
823
824error:
825 ret = -errno;
826exit:
827 if (fd != -1)
828 close(fd);
829 return ret;
830}
831
Mike Frysinger05e594e2017-01-10 02:11:08 -0500832/* Create a new device mapper target for the source. */
833static int dm_setup(char **dm_path_ret, char **dm_name_ret, const char *source,
834 const char *verity_cmdline)
835{
836 int ret = 0;
837#if USE_device_mapper
838 char *p;
839 char *dm_path = NULL;
840 char *dm_name = NULL;
841 char *verity = NULL;
842 struct dm_task *dmt = NULL;
843 uint32_t cookie = 0;
844
845 /* Normalize the name into something unique-esque. */
846 if (asprintf(&dm_name, "cros-containers-%s", source) < 0)
847 goto error;
848 p = dm_name;
849 while ((p = strchr(p, '/')) != NULL)
850 *p++ = '_';
851
852 /* Get the /dev path for the higher levels to mount. */
853 if (asprintf(&dm_path, "%s%s", dm_dev_prefix, dm_name) < 0)
854 goto error;
855
856 /* Insert the source path in the verity command line. */
857 size_t source_len = strlen(source);
858 verity = malloc(strlen(verity_cmdline) + source_len * 2 + 1);
859 strcpy(verity, verity_cmdline);
860 while ((p = strstr(verity, "@DEV@")) != NULL) {
861 memmove(p + source_len, p + 5, strlen(p + 5) + 1);
862 memcpy(p, source, source_len);
863 }
864
865 /* Extract the first three parameters for dm-verity settings. */
866 char ttype[20];
867 unsigned long long start, size;
868 int n;
869 if (sscanf(verity, "%llu %llu %10s %n", &start, &size, ttype, &n) != 3)
870 goto error;
871
872 /* Finally create the device mapper. */
873 dmt = dm_task_create(DM_DEVICE_CREATE);
874 if (dmt == NULL)
875 goto error;
876
877 if (!dm_task_set_name(dmt, dm_name))
878 goto error;
879
880 if (!dm_task_set_ro(dmt))
881 goto error;
882
883 if (!dm_task_add_target(dmt, start, size, ttype, verity + n))
884 goto error;
885
886 if (!dm_task_set_cookie(dmt, &cookie, 0))
887 goto error;
888
889 if (!dm_task_run(dmt))
890 goto error;
891
892 /* Make sure the node exists before we continue. */
893 dm_udev_wait(cookie);
894
895 *dm_path_ret = dm_path;
896 *dm_name_ret = dm_name;
897 goto exit;
898
899error:
900 ret = -errno;
901 free(dm_name);
902 free(dm_path);
903exit:
904 free(verity);
905 if (dmt)
906 dm_task_destroy(dmt);
907#endif
908 return ret;
909}
910
911/* Tear down the device mapper target. */
912static int dm_detach(const char *dm_name)
913{
914 int ret = 0;
915#if USE_device_mapper
916 struct dm_task *dmt;
917
918 dmt = dm_task_create(DM_DEVICE_REMOVE);
919 if (dmt == NULL)
920 goto error;
921
922 if (!dm_task_set_name(dmt, dm_name))
923 goto error;
924
925 if (!dm_task_run(dmt))
926 goto error;
927
928 goto exit;
929
930error:
931 ret = -errno;
932exit:
933 dm_task_destroy(dmt);
934#endif
935 return ret;
936}
937
Dylan Reide040c6b2016-05-02 18:49:02 -0700938/*
939 * Unmounts anything we mounted in this mount namespace in the opposite order
940 * that they were mounted.
941 */
942static int unmount_external_mounts(struct container *c)
943{
944 int ret = 0;
945
946 while (c->num_ext_mounts) {
947 c->num_ext_mounts--;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700948 if (!c->ext_mounts[c->num_ext_mounts])
949 continue;
Dylan Reide040c6b2016-05-02 18:49:02 -0700950 if (umount(c->ext_mounts[c->num_ext_mounts]))
951 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700952 FREE_AND_NULL(c->ext_mounts[c->num_ext_mounts]);
Dylan Reide040c6b2016-05-02 18:49:02 -0700953 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700954 FREE_AND_NULL(c->ext_mounts);
Mike Frysinger412dbd22017-01-06 01:50:34 -0500955
956 while (c->num_loopdevs) {
957 c->num_loopdevs--;
958 if (loopdev_detach(c->loopdevs[c->num_loopdevs]))
959 ret = -errno;
960 FREE_AND_NULL(c->loopdevs[c->num_loopdevs]);
961 }
962 FREE_AND_NULL(c->loopdevs);
963
Mike Frysinger05e594e2017-01-10 02:11:08 -0500964 while (c->num_device_mappers) {
965 c->num_device_mappers--;
966 if (dm_detach(c->device_mappers[c->num_device_mappers]))
967 ret = -errno;
968 FREE_AND_NULL(c->device_mappers[c->num_device_mappers]);
969 }
970 FREE_AND_NULL(c->device_mappers);
971
Dylan Reide040c6b2016-05-02 18:49:02 -0700972 return ret;
973}
974
Junichi Uekawa5d272772016-07-21 16:07:19 +0900975/*
976 * Match mount_one in minijail, mount one mountpoint with
977 * consideration for combination of MS_BIND/MS_RDONLY flag.
978 */
979static int mount_external(const char *src, const char *dest, const char *type,
980 unsigned long flags, const void *data)
981{
982 int remount_ro = 0;
983
984 /*
985 * R/O bind mounts have to be remounted since 'bind' and 'ro'
986 * can't both be specified in the original bind mount.
987 * Remount R/O after the initial mount.
988 */
989 if ((flags & MS_BIND) && (flags & MS_RDONLY)) {
990 remount_ro = 1;
991 flags &= ~MS_RDONLY;
992 }
993
994 if (mount(src, dest, type, flags, data) == -1)
995 return -1;
996
997 if (remount_ro) {
998 flags |= MS_RDONLY;
999 if (mount(src, dest, NULL, flags | MS_REMOUNT, data) == -1)
1000 return -1;
1001 }
1002
1003 return 0;
1004}
1005
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001006static int do_container_mount(struct container *c,
Stephen Barber1a398c72017-01-23 12:39:44 -08001007 const struct container_config *config,
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001008 const struct container_mount *mnt)
1009{
Mike Frysinger05e594e2017-01-10 02:11:08 -05001010 char *dm_source = NULL;
Mike Frysinger412dbd22017-01-06 01:50:34 -05001011 char *loop_source = NULL;
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001012 char *source = NULL;
1013 char *dest = NULL;
1014 int rc = 0;
1015
1016 if (asprintf(&dest, "%s%s", c->runfsroot, mnt->destination) < 0)
1017 return -errno;
1018
1019 /*
1020 * If it's a bind mount relative to rootfs, append source to
1021 * rootfs path, otherwise source path is absolute.
1022 */
1023 if ((mnt->flags & MS_BIND) && mnt->source[0] != '/') {
1024 if (asprintf(&source, "%s/%s", c->runfsroot, mnt->source) < 0)
1025 goto error_free_return;
Mike Frysingerb22acdf2017-01-08 02:02:35 -05001026 } else if (mnt->loopback && mnt->source[0] != '/' && c->config_root) {
1027 if (asprintf(&source, "%s/%s", c->config_root, mnt->source) < 0)
1028 goto error_free_return;
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001029 } else {
1030 if (asprintf(&source, "%s", mnt->source) < 0)
1031 goto error_free_return;
1032 }
1033
1034 if (mnt->create) {
Stephen Barber1a398c72017-01-23 12:39:44 -08001035 rc = setup_mount_destination(config, mnt, source, dest);
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001036 if (rc)
1037 goto error_free_return;
1038 }
Mike Frysinger412dbd22017-01-06 01:50:34 -05001039 if (mnt->loopback) {
1040 /* Record this loopback file for cleanup later. */
1041 loop_source = source;
1042 source = NULL;
1043 rc = loopdev_setup(&source, loop_source);
1044 if (rc)
1045 goto error_free_return;
1046
Mike Frysinger05e594e2017-01-10 02:11:08 -05001047 /* Save this to cleanup when shutting down. */
Mike Frysinger412dbd22017-01-06 01:50:34 -05001048 rc = strdup_and_free(&c->loopdevs[c->num_loopdevs], source);
1049 if (rc)
1050 goto error_free_return;
1051 c->num_loopdevs++;
1052 }
Mike Frysinger05e594e2017-01-10 02:11:08 -05001053 if (mnt->verity) {
1054 /* Set this device up via dm-verity. */
1055 char *dm_name;
1056 dm_source = source;
1057 source = NULL;
1058 rc = dm_setup(&source, &dm_name, dm_source, mnt->verity);
1059 if (rc)
1060 goto error_free_return;
1061
1062 /* Save this to cleanup when shutting down. */
1063 rc = strdup_and_free(&c->device_mappers[c->num_device_mappers],
1064 dm_name);
1065 free(dm_name);
1066 if (rc)
1067 goto error_free_return;
1068 c->num_device_mappers++;
1069 }
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001070 if (mnt->mount_in_ns) {
1071 /* We can mount this with minijail. */
Dylan Reid36b9c012016-06-24 18:27:08 -07001072 rc = minijail_mount_with_data(c->jail, source, mnt->destination,
1073 mnt->type, mnt->flags, mnt->data);
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001074 if (rc)
1075 goto error_free_return;
1076 } else {
1077 /* Mount this externally and unmount it on exit. */
Junichi Uekawa5d272772016-07-21 16:07:19 +09001078 if (mount_external(source, dest, mnt->type, mnt->flags,
1079 mnt->data))
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001080 goto error_free_return;
1081 /* Save this to unmount when shutting down. */
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001082 rc = strdup_and_free(&c->ext_mounts[c->num_ext_mounts], dest);
1083 if (rc)
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001084 goto error_free_return;
1085 c->num_ext_mounts++;
1086 }
1087
1088 goto exit;
1089
1090error_free_return:
1091 if (!rc)
1092 rc = -errno;
1093exit:
Mike Frysinger05e594e2017-01-10 02:11:08 -05001094 free(dm_source);
Mike Frysinger412dbd22017-01-06 01:50:34 -05001095 free(loop_source);
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001096 free(source);
1097 free(dest);
1098 return rc;
1099}
1100
Dylan Reide040c6b2016-05-02 18:49:02 -07001101static int do_container_mounts(struct container *c,
1102 const struct container_config *config)
Dylan Reid7daf9982016-04-28 16:55:42 -07001103{
1104 unsigned int i;
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -07001105 int rc = 0;
Dylan Reid7daf9982016-04-28 16:55:42 -07001106
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001107 unmount_external_mounts(c);
Dylan Reide040c6b2016-05-02 18:49:02 -07001108 /*
1109 * Allocate space to track anything we mount in our mount namespace.
1110 * This over-allocates as it has space for all mounts.
1111 */
1112 c->ext_mounts = calloc(config->num_mounts, sizeof(*c->ext_mounts));
1113 if (!c->ext_mounts)
1114 return -errno;
Mike Frysinger412dbd22017-01-06 01:50:34 -05001115 c->loopdevs = calloc(config->num_mounts, sizeof(*c->loopdevs));
1116 if (!c->loopdevs)
1117 return -errno;
Mike Frysinger05e594e2017-01-10 02:11:08 -05001118 c->device_mappers = calloc(config->num_mounts, sizeof(*c->device_mappers));
1119 if (!c->device_mappers)
1120 return -errno;
Dylan Reide040c6b2016-05-02 18:49:02 -07001121
1122 for (i = 0; i < config->num_mounts; ++i) {
Stephen Barber1a398c72017-01-23 12:39:44 -08001123 rc = do_container_mount(c, config, &config->mounts[i]);
Luis Hector Chavez3341ed62016-06-06 08:04:04 -07001124 if (rc)
1125 goto error_free_return;
Dylan Reid7daf9982016-04-28 16:55:42 -07001126 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001127
Dylan Reid7daf9982016-04-28 16:55:42 -07001128 return 0;
Dylan Reid2149be92016-04-28 18:38:57 -07001129
1130error_free_return:
Dylan Reide040c6b2016-05-02 18:49:02 -07001131 unmount_external_mounts(c);
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -07001132 return rc;
Dylan Reid7daf9982016-04-28 16:55:42 -07001133}
1134
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001135static int container_create_device(const struct container *c,
Stephen Barber1a398c72017-01-23 12:39:44 -08001136 const struct container_config *config,
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001137 const struct container_device *dev,
1138 int minor)
1139{
1140 char *path = NULL;
1141 int rc = 0;
1142 int mode;
Stephen Barber1a398c72017-01-23 12:39:44 -08001143 int uid_userns, gid_userns;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001144
1145 switch (dev->type) {
1146 case 'b':
1147 mode = S_IFBLK;
1148 break;
1149 case 'c':
1150 mode = S_IFCHR;
1151 break;
1152 default:
1153 return -EINVAL;
1154 }
1155 mode |= dev->fs_permissions;
1156
Stephen Barber1a398c72017-01-23 12:39:44 -08001157 uid_userns = get_userns_outside_id(config->uid_map, dev->uid);
1158 if (uid_userns < 0)
1159 return uid_userns;
1160 gid_userns = get_userns_outside_id(config->gid_map, dev->gid);
1161 if (gid_userns < 0)
1162 return gid_userns;
1163
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001164 if (asprintf(&path, "%s%s", c->runfsroot, dev->path) < 0)
1165 goto error_free_return;
1166 if (mknod(path, mode, makedev(dev->major, minor)) && errno != EEXIST)
1167 goto error_free_return;
Stephen Barber1a398c72017-01-23 12:39:44 -08001168 if (chown(path, uid_userns, gid_userns))
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001169 goto error_free_return;
1170 if (chmod(path, dev->fs_permissions))
1171 goto error_free_return;
1172
1173 goto exit;
1174
1175error_free_return:
1176 rc = -errno;
1177exit:
1178 free(path);
1179 return rc;
1180}
1181
Stephen Barber1a398c72017-01-23 12:39:44 -08001182
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001183static int mount_runfs(struct container *c, const struct container_config *config)
Dylan Reid837c74a2016-01-22 17:25:21 -08001184{
Dylan Reidb3621832016-03-24 10:24:57 -07001185 static const mode_t root_dir_mode = 0660;
Dylan Reide040c6b2016-05-02 18:49:02 -07001186 const char *rootfs = config->rootfs;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001187 char *runfs_template = NULL;
Stephen Barber1a398c72017-01-23 12:39:44 -08001188 int uid_userns, gid_userns;
Dylan Reid837c74a2016-01-22 17:25:21 -08001189
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001190 if (asprintf(&runfs_template, "%s/%s_XXXXXX", c->rundir, c->name) < 0)
1191 return -ENOMEM;
1192
1193 c->runfs = mkdtemp(runfs_template);
1194 if (!c->runfs) {
1195 free(runfs_template);
1196 return -errno;
1197 }
1198
Stephen Barber1a398c72017-01-23 12:39:44 -08001199 uid_userns = get_userns_outside_id(config->uid_map, config->uid);
1200 if (uid_userns < 0)
1201 return uid_userns;
1202 gid_userns = get_userns_outside_id(config->gid_map, config->gid);
1203 if (gid_userns < 0)
1204 return gid_userns;
1205
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001206 /* Make sure the container uid can access the rootfs. */
1207 if (chmod(c->runfs, 0700))
1208 return -errno;
Stephen Barber1a398c72017-01-23 12:39:44 -08001209 if (chown(c->runfs, uid_userns, gid_userns))
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001210 return -errno;
1211
1212 if (asprintf(&c->runfsroot, "%s/root", c->runfs) < 0)
1213 return -errno;
1214
1215 if (mkdir(c->runfsroot, root_dir_mode))
1216 return -errno;
1217 if (chmod(c->runfsroot, root_dir_mode))
1218 return -errno;
1219
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -07001220 if (mount(rootfs, c->runfsroot, "", MS_BIND, NULL))
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001221 return -errno;
1222
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -07001223 /* MS_BIND ignores any flags passed to it (except MS_REC). We need a
1224 * second call to mount() to actually set them.
1225 */
1226 if (config->rootfs_mount_flags &&
1227 mount(rootfs, c->runfsroot, "",
1228 config->rootfs_mount_flags, NULL)) {
1229 return -errno;
1230 }
1231
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001232 return 0;
1233}
1234
1235int container_start(struct container *c, const struct container_config *config)
1236{
1237 int rc = 0;
1238 unsigned int i;
Stephen Barber1a398c72017-01-23 12:39:44 -08001239 int cgroup_uid, cgroup_gid;
Yusuke Sato91f11f02016-12-02 16:15:13 -08001240 char **destinations;
1241 size_t num_destinations;
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001242
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001243 if (!c)
1244 return -EINVAL;
Dylan Reide040c6b2016-05-02 18:49:02 -07001245 if (!config)
1246 return -EINVAL;
1247 if (!config->program_argv || !config->program_argv[0])
1248 return -EINVAL;
1249
Mike Frysingerb22acdf2017-01-08 02:02:35 -05001250 if (config->config_root) {
1251 c->config_root = strdup(config->config_root);
1252 if (!c->config_root) {
1253 rc = -ENOMEM;
1254 goto error_rmdir;
1255 }
1256 }
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001257 if (config->premounted_runfs) {
1258 c->runfs = NULL;
1259 c->runfsroot = strdup(config->premounted_runfs);
1260 if (!c->runfsroot) {
1261 rc = -ENOMEM;
1262 goto error_rmdir;
1263 }
1264 } else {
1265 rc = mount_runfs(c, config);
1266 if (rc)
1267 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001268 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001269
1270 c->jail = minijail_new();
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001271 if (!c->jail)
Luis Hector Chavez945af482016-06-03 08:39:34 -07001272 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001273
Luis Hector Chavez8e7b6d52016-06-02 20:40:43 -07001274 rc = do_container_mounts(c, config);
1275 if (rc)
Dylan Reid7daf9982016-04-28 16:55:42 -07001276 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001277
Stephen Barber1a398c72017-01-23 12:39:44 -08001278 cgroup_uid = get_userns_outside_id(config->uid_map,
1279 config->cgroup_owner);
1280 if (cgroup_uid < 0) {
1281 rc = cgroup_uid;
1282 goto error_rmdir;
1283 }
1284 cgroup_gid = get_userns_outside_id(config->gid_map,
1285 config->cgroup_group);
1286 if (cgroup_gid < 0) {
1287 rc = cgroup_gid;
1288 goto error_rmdir;
1289 }
1290
Dylan Reida9966422016-07-21 10:11:34 -07001291 c->cgroup = container_cgroup_new(c->name,
1292 "/sys/fs/cgroup",
1293 config->cgroup_parent,
Stephen Barber1a398c72017-01-23 12:39:44 -08001294 cgroup_uid,
1295 cgroup_gid);
Dylan Reida9966422016-07-21 10:11:34 -07001296 if (!c->cgroup)
1297 goto error_rmdir;
1298
Keshav Santhanam268fa032016-07-14 09:59:24 -07001299 /* Must be root to modify device cgroup or mknod */
1300 if (getuid() == 0) {
1301 c->cgroup->ops->deny_all_devices(c->cgroup);
Dylan Reid837c74a2016-01-22 17:25:21 -08001302
Keshav Santhanam268fa032016-07-14 09:59:24 -07001303 for (i = 0; i < config->num_devices; i++) {
1304 const struct container_device *dev = &config->devices[i];
1305 int minor = dev->minor;
Dylan Reid837c74a2016-01-22 17:25:21 -08001306
Keshav Santhanam268fa032016-07-14 09:59:24 -07001307 if (dev->copy_minor) {
1308 struct stat st_buff;
1309 if (stat(dev->path, &st_buff) < 0)
1310 continue;
1311 /* Use the minor macro to extract the device number. */
1312 minor = minor(st_buff.st_rdev);
1313 }
1314 if (minor >= 0) {
Stephen Barber1a398c72017-01-23 12:39:44 -08001315 rc = container_create_device(c, config, dev, minor);
Keshav Santhanam268fa032016-07-14 09:59:24 -07001316 if (rc)
1317 goto error_rmdir;
1318 }
1319
1320 rc = c->cgroup->ops->add_device(c->cgroup, dev->major,
1321 minor, dev->read_allowed,
1322 dev->write_allowed,
1323 dev->modify_allowed, dev->type);
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001324 if (rc)
Dylan Reid355d5e42016-04-29 16:53:31 -07001325 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001326 }
Mike Frysinger412dbd22017-01-06 01:50:34 -05001327
1328 for (i = 0; i < c->num_loopdevs; ++i) {
1329 struct stat st;
1330
1331 if (stat(c->loopdevs[i], &st) < 0)
1332 goto error_rmdir;
1333 rc = c->cgroup->ops->add_device(c->cgroup, major(st.st_rdev),
1334 minor(st.st_rdev), 1, 0, 0, 'b');
1335 if (rc)
1336 goto error_rmdir;
1337 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001338 }
1339
Dylan Reidd7229582016-04-27 17:08:40 -07001340 /* Potentailly run setfiles on mounts configured outside of the jail */
Yusuke Sato91f11f02016-12-02 16:15:13 -08001341 destinations = calloc(config->num_mounts, sizeof(char *));
1342 num_destinations = 0;
Dylan Reide040c6b2016-05-02 18:49:02 -07001343 for (i = 0; i < config->num_mounts; i++) {
1344 const struct container_mount *mnt = &config->mounts[i];
Yusuke Sato91f11f02016-12-02 16:15:13 -08001345 char* dest = mnt->destination;
Dylan Reidd7229582016-04-27 17:08:40 -07001346
1347 if (mnt->mount_in_ns)
1348 continue;
Junichi Uekawa5d272772016-07-21 16:07:19 +09001349 if (mnt->flags & MS_RDONLY)
1350 continue;
Yusuke Sato91f11f02016-12-02 16:15:13 -08001351
Yusuke Satod33db432016-12-05 16:24:37 -08001352 /* A hack to avoid setfiles on /data and /cache. */
1353 if (!strcmp(dest, "/data") || !strcmp(dest, "/cache"))
Yusuke Sato91f11f02016-12-02 16:15:13 -08001354 continue;
1355
1356 if (asprintf(&dest, "%s%s", c->runfsroot, mnt->destination) < 0) {
1357 size_t j;
1358 for (j = 0; j < num_destinations; ++j) {
1359 free(destinations[j]);
1360 }
1361 free(destinations);
Dylan Reidd7229582016-04-27 17:08:40 -07001362 goto error_rmdir;
Yusuke Sato91f11f02016-12-02 16:15:13 -08001363 }
1364
1365 destinations[num_destinations++] = dest;
Dylan Reidd7229582016-04-27 17:08:40 -07001366 }
Yusuke Sato91f11f02016-12-02 16:15:13 -08001367 if (num_destinations) {
1368 size_t i;
1369 rc = run_setfiles_command(c, config, destinations, num_destinations);
1370 for (i = 0; i < num_destinations; ++i) {
1371 free(destinations[i]);
1372 }
1373 }
1374 free(destinations);
1375 if (rc)
1376 goto error_rmdir;
Dylan Reidd7229582016-04-27 17:08:40 -07001377
Chinyue Chenfac909e2016-06-24 14:17:42 +08001378 /* Setup CPU cgroup params. */
1379 if (config->cpu_cgparams.shares) {
1380 rc = c->cgroup->ops->set_cpu_shares(
1381 c->cgroup, config->cpu_cgparams.shares);
1382 if (rc)
1383 goto error_rmdir;
1384 }
1385 if (config->cpu_cgparams.period) {
1386 rc = c->cgroup->ops->set_cpu_quota(
1387 c->cgroup, config->cpu_cgparams.quota);
1388 if (rc)
1389 goto error_rmdir;
1390 rc = c->cgroup->ops->set_cpu_period(
1391 c->cgroup, config->cpu_cgparams.period);
1392 if (rc)
1393 goto error_rmdir;
1394 }
1395 if (config->cpu_cgparams.rt_period) {
1396 rc = c->cgroup->ops->set_cpu_rt_runtime(
1397 c->cgroup, config->cpu_cgparams.rt_runtime);
1398 if (rc)
1399 goto error_rmdir;
1400 rc = c->cgroup->ops->set_cpu_rt_period(
1401 c->cgroup, config->cpu_cgparams.rt_period);
1402 if (rc)
1403 goto error_rmdir;
1404 }
1405
Dylan Reid837c74a2016-01-22 17:25:21 -08001406 /* Setup and start the container with libminijail. */
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001407 if (config->pid_file_path) {
1408 c->pid_file_path = strdup(config->pid_file_path);
1409 if (!c->pid_file_path) {
1410 rc = -ENOMEM;
1411 goto error_rmdir;
1412 }
1413 } else if (c->runfs) {
1414 if (asprintf(&c->pid_file_path, "%s/container.pid", c->runfs) < 0) {
1415 rc = -ENOMEM;
1416 goto error_rmdir;
1417 }
1418 }
1419
1420 if (c->pid_file_path)
1421 minijail_write_pid_file(c->jail, c->pid_file_path);
Dylan Reid837c74a2016-01-22 17:25:21 -08001422 minijail_reset_signal_mask(c->jail);
1423
1424 /* Setup container namespaces. */
1425 minijail_namespace_ipc(c->jail);
1426 minijail_namespace_vfs(c->jail);
Keshav Santhanam1b6bf672016-08-10 18:35:12 -07001427 if (!config->share_host_netns)
1428 minijail_namespace_net(c->jail);
Dylan Reid837c74a2016-01-22 17:25:21 -08001429 minijail_namespace_pids(c->jail);
Dylan Reid837c74a2016-01-22 17:25:21 -08001430 minijail_namespace_user(c->jail);
Mike Frysingerfbd60552017-01-03 17:28:48 -05001431 if (getuid() != 0)
1432 minijail_namespace_user_disable_setgroups(c->jail);
Dylan Reidc6ca1042016-07-11 15:03:27 -07001433 minijail_namespace_cgroups(c->jail);
Dylan Reide040c6b2016-05-02 18:49:02 -07001434 rc = minijail_uidmap(c->jail, config->uid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -08001435 if (rc)
1436 goto error_rmdir;
Dylan Reide040c6b2016-05-02 18:49:02 -07001437 rc = minijail_gidmap(c->jail, config->gid_map);
Dylan Reid837c74a2016-01-22 17:25:21 -08001438 if (rc)
1439 goto error_rmdir;
Dylan Reid837c74a2016-01-22 17:25:21 -08001440
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001441 /* Set the UID/GID inside the container if not 0. */
Stephen Barber1a398c72017-01-23 12:39:44 -08001442 if (get_userns_outside_id(config->uid_map, config->uid) < 0)
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001443 goto error_rmdir;
Stephen Barber1a398c72017-01-23 12:39:44 -08001444 else if (config->uid > 0)
1445 minijail_change_uid(c->jail, config->uid);
1446 if (get_userns_outside_id(config->gid_map, config->gid) < 0)
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001447 goto error_rmdir;
Stephen Barber1a398c72017-01-23 12:39:44 -08001448 else if (config->gid > 0)
1449 minijail_change_gid(c->jail, config->gid);
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001450
Dylan Reid837c74a2016-01-22 17:25:21 -08001451 rc = minijail_enter_pivot_root(c->jail, c->runfsroot);
1452 if (rc)
1453 goto error_rmdir;
1454
1455 /* Add the cgroups configured above. */
Dmitry Torokhov0d253a62017-01-05 09:41:33 -08001456 for (i = 0; i < NUM_CGROUP_TYPES; i++) {
1457 if (c->cgroup->cgroup_tasks_paths[i]) {
1458 rc = minijail_add_to_cgroup(c->jail,
1459 c->cgroup->cgroup_tasks_paths[i]);
1460 if (rc)
1461 goto error_rmdir;
1462 }
1463 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001464
Dylan Reide040c6b2016-05-02 18:49:02 -07001465 if (config->alt_syscall_table)
1466 minijail_use_alt_syscall(c->jail, config->alt_syscall_table);
Dylan Reid837c74a2016-01-22 17:25:21 -08001467
1468 minijail_run_as_init(c->jail);
1469
Dylan Reid3da683b2016-04-05 03:35:35 -07001470 /* TODO(dgreid) - remove this once shared mounts are cleaned up. */
1471 minijail_skip_remount_private(c->jail);
1472
Dylan Reidc4335842016-11-11 10:24:52 -08001473 if (!config->keep_fds_open)
1474 minijail_close_open_fds(c->jail);
Luis Hector Chaveze18e7d42016-10-12 07:35:32 -07001475
Dylan Reid837c74a2016-01-22 17:25:21 -08001476 rc = minijail_run_pid_pipes_no_preload(c->jail,
Dylan Reide040c6b2016-05-02 18:49:02 -07001477 config->program_argv[0],
1478 config->program_argv,
Dylan Reid837c74a2016-01-22 17:25:21 -08001479 &c->init_pid, NULL, NULL,
1480 NULL);
1481 if (rc)
1482 goto error_rmdir;
1483 return 0;
1484
1485error_rmdir:
Luis Hector Chavez945af482016-06-03 08:39:34 -07001486 if (!rc)
1487 rc = -errno;
1488 container_teardown(c);
Dylan Reid837c74a2016-01-22 17:25:21 -08001489 return rc;
1490}
1491
1492const char *container_root(struct container *c)
1493{
1494 return c->runfs;
1495}
1496
1497int container_pid(struct container *c)
1498{
1499 return c->init_pid;
1500}
1501
1502static int container_teardown(struct container *c)
1503{
Dylan Reid837c74a2016-01-22 17:25:21 -08001504 int ret = 0;
1505
Dylan Reide040c6b2016-05-02 18:49:02 -07001506 unmount_external_mounts(c);
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001507 if (c->runfsroot && c->runfs) {
Luis Hector Chavez945af482016-06-03 08:39:34 -07001508 if (umount(c->runfsroot))
1509 ret = -errno;
1510 if (rmdir(c->runfsroot))
1511 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001512 FREE_AND_NULL(c->runfsroot);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001513 }
1514 if (c->pid_file_path) {
1515 if (unlink(c->pid_file_path))
1516 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001517 FREE_AND_NULL(c->pid_file_path);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001518 }
1519 if (c->runfs) {
1520 if (rmdir(c->runfs))
1521 ret = -errno;
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001522 FREE_AND_NULL(c->runfs);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001523 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001524 return ret;
1525}
1526
1527int container_wait(struct container *c)
1528{
Dylan Reidcf745c52016-04-22 10:18:03 -07001529 int rc;
1530
1531 do {
1532 rc = minijail_wait(c->jail);
Luis Hector Chavez4641e852016-06-02 15:40:19 -07001533 } while (rc == -EINTR);
Dylan Reidcf745c52016-04-22 10:18:03 -07001534
Luis Hector Chavez945af482016-06-03 08:39:34 -07001535 // If the process had already been reaped, still perform teardown.
1536 if (rc == -ECHILD || rc >= 0) {
Dylan Reidcf745c52016-04-22 10:18:03 -07001537 rc = container_teardown(c);
Luis Hector Chavez945af482016-06-03 08:39:34 -07001538 }
Dylan Reidcf745c52016-04-22 10:18:03 -07001539 return rc;
Dylan Reid837c74a2016-01-22 17:25:21 -08001540}
1541
1542int container_kill(struct container *c)
1543{
Luis Hector Chavez945af482016-06-03 08:39:34 -07001544 if (kill(c->init_pid, SIGKILL) && errno != ESRCH)
Dylan Reid837c74a2016-01-22 17:25:21 -08001545 return -errno;
1546 return container_wait(c);
1547}