blob: b05d19a0d0cd2c5ad02bab2326ab8513772f3706 [file] [log] [blame]
Luis Hector Chavez81efb332017-09-18 14:01:29 -07001// Copyright 2016 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
Dylan Reid837c74a2016-01-22 17:25:21 -08004
Dylan Reid837c74a2016-01-22 17:25:21 -08005#include <errno.h>
6#include <fcntl.h>
Dylan Reid837c74a2016-01-22 17:25:21 -08007#include <signal.h>
Luis Hector Chavezff5978f2017-06-27 12:52:58 -07008#include <stdint.h>
Dylan Reid837c74a2016-01-22 17:25:21 -08009#include <stdlib.h>
10#include <string.h>
11#include <sys/mount.h>
12#include <sys/stat.h>
13#include <sys/types.h>
Dylan Reid2bd9ea92016-04-07 20:57:47 -070014#include <sys/wait.h>
Luis Hector Chavez836d7b22017-09-14 15:11:15 -070015#include <syscall.h>
Dylan Reid837c74a2016-01-22 17:25:21 -080016#include <unistd.h>
17
Luis Hector Chavez644d2042017-09-19 18:56:44 -070018#include <map>
Luis Hector Chavez5381d002017-09-16 12:54:24 -070019#include <memory>
yusukesbbc37a72017-11-21 09:51:54 -080020#include <ostream>
Stephen Barber771653f2017-10-04 23:48:57 -070021#include <set>
yusukesbbc37a72017-11-21 09:51:54 -080022#include <sstream>
Luis Hector Chavez5381d002017-09-16 12:54:24 -070023#include <string>
Luis Hector Chavez644d2042017-09-19 18:56:44 -070024#include <utility>
Luis Hector Chavez5381d002017-09-16 12:54:24 -070025#include <vector>
26
27#include <base/bind.h>
28#include <base/bind_helpers.h>
29#include <base/callback_helpers.h>
30#include <base/files/file_path.h>
31#include <base/files/file_util.h>
32#include <base/files/scoped_file.h>
Luis Hector Chavez835d39e2017-09-19 15:16:31 -070033#include <base/logging.h>
Luis Hector Chavez5381d002017-09-16 12:54:24 -070034#include <base/macros.h>
35#include <base/strings/string_util.h>
36#include <base/strings/stringprintf.h>
Luis Hector Chavez836d7b22017-09-14 15:11:15 -070037#include <libminijail.h>
Luis Hector Chavez626f5c82017-09-18 11:19:32 -070038#include <scoped_minijail.h>
Mike Frysinger412dbd22017-01-06 01:50:34 -050039
Luis Hector Chavez76ae9ac2017-09-20 21:13:08 -070040#include "libcontainer/cgroup.h"
Luis Hector Chavez644d2042017-09-19 18:56:44 -070041#include "libcontainer/config.h"
Luis Hector Chavez836d7b22017-09-14 15:11:15 -070042#include "libcontainer/libcontainer.h"
Luis Hector Chavez81efb332017-09-18 14:01:29 -070043#include "libcontainer/libcontainer_util.h"
Yusuke Sato91f11f02016-12-02 16:15:13 -080044
yusukesbbc37a72017-11-21 09:51:54 -080045#define QUOTE(s) ('"' + std::string(s) + '"')
46
Luis Hector Chavez5381d002017-09-16 12:54:24 -070047namespace {
48
Luis Hector Chavez81efb332017-09-18 14:01:29 -070049using libcontainer::DeviceMapperDetach;
50using libcontainer::DeviceMapperSetup;
51using libcontainer::GetUsernsOutsideId;
52using libcontainer::LoopdevDetach;
53using libcontainer::LoopdevSetup;
54using libcontainer::MakeDir;
55using libcontainer::MountExternal;
56using libcontainer::TouchFile;
Mike Frysinger412dbd22017-01-06 01:50:34 -050057
Luis Hector Chavez81efb332017-09-18 14:01:29 -070058constexpr size_t kMaxNumSetfilesArgs = 128;
59constexpr size_t kMaxRlimits = 32; // Linux defines 15 at the time of writing.
Luis Hector Chavez479b95f2016-06-06 08:01:05 -070060
Luis Hector Chavez5381d002017-09-16 12:54:24 -070061struct Mount {
62 std::string name;
63 base::FilePath source;
64 base::FilePath destination;
65 std::string type;
66 std::string data;
67 std::string verity;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -070068 int flags;
69 int uid;
70 int gid;
71 int mode;
Luis Hector Chavez5381d002017-09-16 12:54:24 -070072
73 // True if mount should happen in new vfs ns.
74 bool mount_in_ns;
75
76 // True if target should be created if it doesn't exist.
77 bool create;
78
79 // True if target should be mounted via loopback.
80 bool loopback;
Dylan Reid837c74a2016-01-22 17:25:21 -080081};
82
Luis Hector Chaveze1062e82017-09-18 09:57:37 -070083struct Device {
84 // 'c' or 'b' for char or block
85 char type;
86 base::FilePath path;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -070087 int fs_permissions;
88 int major;
89 int minor;
Luis Hector Chaveze1062e82017-09-18 09:57:37 -070090
91 // Copy the minor from existing node, ignores |minor|.
92 bool copy_minor;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -070093 int uid;
94 int gid;
Dylan Reid4843d6b2017-03-31 18:14:30 -070095};
96
Luis Hector Chaveze1062e82017-09-18 09:57:37 -070097struct CgroupDevice {
98 bool allow;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -070099 char type;
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700100
101 // -1 for either major or minor means all.
102 int major;
103 int minor;
104
105 bool read;
106 bool write;
107 bool modify;
Dylan Reid837c74a2016-01-22 17:25:21 -0800108};
109
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700110struct CpuCgroup {
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700111 int shares;
112 int quota;
113 int period;
114 int rt_runtime;
115 int rt_period;
Chinyue Chenfac909e2016-06-24 14:17:42 +0800116};
117
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700118struct Rlimit {
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700119 int type;
120 uint32_t cur;
121 uint32_t max;
Dylan Reid93fa4602017-06-06 13:39:31 -0700122};
123
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700124} // namespace
125
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700126// Structure that configures how the container is run.
Dylan Reid837c74a2016-01-22 17:25:21 -0800127struct container_config {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700128 // Path to the root of the container itself.
129 base::FilePath config_root;
130
131 // Path to the root of the container's filesystem.
132 base::FilePath rootfs;
133
134 // Flags that will be passed to mount() for the rootfs.
yusukesb7b9a042017-12-08 13:14:25 -0800135 unsigned long rootfs_mount_flags = 0x0;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700136
137 // Path to where the container will be run.
138 base::FilePath premounted_runfs;
139
140 // Path to the file where the pid should be written.
141 base::FilePath pid_file_path;
142
143 // The program to run and args, e.g. "/sbin/init".
144 std::vector<std::string> program_argv;
145
146 // The uid the container will run as.
yusukesb7b9a042017-12-08 13:14:25 -0800147 uid_t uid = 0;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700148
149 // Mapping of UIDs in the container, e.g. "0 100000 1024"
150 std::string uid_map;
151
152 // The gid the container will run as.
yusukesb7b9a042017-12-08 13:14:25 -0800153 gid_t gid = 0;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700154
155 // Mapping of GIDs in the container, e.g. "0 100000 1024"
156 std::string gid_map;
157
158 // Syscall table to use or nullptr if none.
159 std::string alt_syscall_table;
160
161 // Filesystems to mount in the new namespace.
Luis Hector Chavez5381d002017-09-16 12:54:24 -0700162 std::vector<Mount> mounts;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700163
Stephen Barber771653f2017-10-04 23:48:57 -0700164 // Namespaces that should be used for the container.
165 std::set<std::string> namespaces;
166
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700167 // Device nodes to create.
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700168 std::vector<Device> devices;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700169
170 // Device node cgroup permissions.
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700171 std::vector<CgroupDevice> cgroup_devices;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700172
173 // Should run setfiles on mounts to enable selinux.
174 std::string run_setfiles;
175
176 // CPU cgroup params.
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700177 CpuCgroup cpu_cgparams;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700178
179 // Parent dir for cgroup creation
180 base::FilePath cgroup_parent;
181
182 // uid to own the created cgroups
yusukesb7b9a042017-12-08 13:14:25 -0800183 uid_t cgroup_owner = 0;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700184
185 // gid to own the created cgroups
yusukesb7b9a042017-12-08 13:14:25 -0800186 gid_t cgroup_group = 0;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700187
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700188 // Allow the child process to keep open FDs (for stdin/out/err).
yusukesf125f332017-12-08 13:45:15 -0800189 bool keep_fds_open = false;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700190
191 // Array of rlimits for the contained process.
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700192 Rlimit rlimits[kMaxRlimits];
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700193
194 // The number of elements in `rlimits`.
yusukesb7b9a042017-12-08 13:14:25 -0800195 int num_rlimits = 0;
yusukesf125f332017-12-08 13:45:15 -0800196 bool use_capmask = false;
197 bool use_capmask_ambient = false;
yusukesb7b9a042017-12-08 13:14:25 -0800198 uint64_t capmask = 0x0;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700199
200 // The mask of securebits to skip when restricting caps.
yusukesb7b9a042017-12-08 13:14:25 -0800201 uint64_t securebits_skip_mask = 0x0;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700202
203 // Whether the container needs an extra process to be run as init.
yusukesf125f332017-12-08 13:45:15 -0800204 bool do_init = false;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700205
206 // The SELinux context name the container will run under.
207 std::string selinux_context;
208
209 // A function pointer to be called prior to calling execve(2).
yusukesb7b9a042017-12-08 13:14:25 -0800210 minijail_hook_t pre_start_hook = nullptr;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700211
212 // Parameter that will be passed to pre_start_hook().
yusukesb7b9a042017-12-08 13:14:25 -0800213 void* pre_start_hook_payload = nullptr;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700214
Luis Hector Chaveze03926a2017-09-28 17:28:49 -0700215 // A list of file descriptors to inherit.
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700216 std::vector<int> inherited_fds;
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700217
218 // A list of hooks that will be called upon minijail reaching various states
219 // of execution.
220 std::map<minijail_hook_event_t, std::vector<libcontainer::HookCallback>>
221 hooks;
Dylan Reid837c74a2016-01-22 17:25:21 -0800222};
223
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700224// Container manipulation
225struct container {
Luis Hector Chavez76ae9ac2017-09-20 21:13:08 -0700226 std::unique_ptr<libcontainer::Cgroup> cgroup;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700227 ScopedMinijail jail;
Luis Hector Chavez15d0d1a2017-10-12 09:30:19 -0700228 pid_t init_pid = -1;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700229 base::FilePath config_root;
230 base::FilePath runfs;
231 base::FilePath rundir;
232 base::FilePath runfsroot;
233 base::FilePath pid_file_path;
234
235 // Mounts made outside of the minijail.
236 std::vector<base::FilePath> ext_mounts;
237 std::vector<base::FilePath> loopdev_paths;
238 std::vector<std::string> device_mappers;
239 std::string name;
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700240
241 std::vector<std::pair<libcontainer::HookState,
242 std::vector<libcontainer::HookCallback>>>
243 hook_states;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700244};
245
246namespace {
247
yusukesbbc37a72017-11-21 09:51:54 -0800248std::ostream& operator<<(std::ostream& stream, const Mount& mount) {
249 stream << "mount:" << std::endl
250 << " name: " << QUOTE(mount.name) << std::endl
251 << " source: " << QUOTE(mount.source.value()) << std::endl
252 << " destination: " << QUOTE(mount.destination.value()) << std::endl
253 << " type: " << QUOTE(mount.type) << std::endl
254 << " data: " << QUOTE(mount.data) << std::endl
255 << " verity: " << QUOTE(mount.verity) << std::endl
256 << " flags: 0x" << std::hex << mount.flags << std::dec << std::endl
257 << " uid: " << mount.uid << std::endl
258 << " gid: " << mount.gid << std::endl
259 << " mode: 0" << std::oct << mount.mode << std::dec << std::endl
260 << " mount_in_ns: " << mount.mount_in_ns << std::endl
261 << " create: " << mount.create << std::endl
262 << " loopback: " << mount.loopback << std::endl;
263
264 return stream;
265}
266
267std::ostream& operator<<(std::ostream& stream, const Device& device) {
268 stream << "device:" << std::endl
269 << " type: " << device.type << std::endl
270 << " path: " << QUOTE(device.path.value()) << std::endl
271 << " fs_permissions: 0" << std::oct << device.fs_permissions
272 << std::dec << std::endl
273 << " major: " << device.major << std::endl
274 << " minor: " << device.minor << std::endl
275 << " copy_minor: " << device.copy_minor << std::endl
276 << " uid: " << device.uid << std::endl
277 << " gid: " << device.gid << std::endl;
278
279 return stream;
280}
281
282std::ostream& operator<<(std::ostream& stream,
283 const CgroupDevice& cgroup_device) {
284 stream << "cgroup_device:" << std::endl
285 << " allow: " << cgroup_device.allow << std::endl
286 << " type: " << cgroup_device.type << std::endl
287 << " major: " << cgroup_device.major << std::endl
288 << " minor: " << cgroup_device.minor << std::endl
289 << " read: " << cgroup_device.read << std::endl
290 << " write: " << cgroup_device.write << std::endl
291 << " modify: " << cgroup_device.modify << std::endl;
292
293 return stream;
294}
295
296std::ostream& operator<<(std::ostream& stream, const CpuCgroup& cpu_cgroup) {
297 stream << "cpu_cgroup:" << std::endl
298 << " shares: " << cpu_cgroup.shares << std::endl
299 << " quota: " << cpu_cgroup.quota << std::endl
300 << " period: " << cpu_cgroup.period << std::endl
301 << " rt_runtime: " << cpu_cgroup.rt_runtime << std::endl
302 << " rt_period: " << cpu_cgroup.rt_period << std::endl;
303
304 return stream;
305}
306
307std::ostream& operator<<(std::ostream& stream, const Rlimit& rlimit) {
308 stream << "rlimit:" << std::endl
309 << " type: " << rlimit.type << std::endl
310 << " cur: " << rlimit.cur << std::endl
311 << " max: " << rlimit.max << std::endl;
312
313 return stream;
314}
315
316std::ostream& operator<<(std::ostream& stream, const container_config* c) {
317 stream << "config_root: " << QUOTE(c->config_root.value()) << std::endl
318 << "rootfs: " << QUOTE(c->rootfs.value()) << std::endl
319 << "rootfs_mount_flags: 0x" << std::hex << c->rootfs_mount_flags
320 << std::dec << std::endl
321 << "premounted_runfs: " << QUOTE(c->premounted_runfs.value())
322 << std::endl
323 << "pid_file_path: " << QUOTE(c->pid_file_path.value()) << std::endl
324 << "program_argv: size=" << c->program_argv.size() << std::endl;
325
326 for (const std::string& argv : c->program_argv)
327 stream << " " << QUOTE(argv) << std::endl;
328
329 stream << "uid: " << c->uid << std::endl
330 << "uid_map: " << QUOTE(c->uid_map) << std::endl
331 << "gid: " << c->gid << std::endl
332 << "gid_map: " << QUOTE(c->gid_map) << std::endl
333 << "alt_syscall_table: " << QUOTE(c->alt_syscall_table) << std::endl;
334
335 for (const auto& mount : c->mounts)
336 stream << mount;
337
338 stream << "namespaces: size=" << c->namespaces.size() << std::endl;
339 for (const std::string& ns : c->namespaces)
340 stream << " " << QUOTE(ns) << std::endl;
341
342 for (const auto& device : c->devices)
343 stream << device;
344
345 for (const auto& cgroup_device : c->cgroup_devices)
346 stream << cgroup_device;
347
348 stream << "run_setfiles: " << QUOTE(c->run_setfiles) << std::endl
349 << c->cpu_cgparams
350 << "cgroup_parent: " << QUOTE(c->cgroup_parent.value()) << std::endl
351 << "cgroup_owner: " << c->cgroup_owner << std::endl
352 << "cgroup_group: " << c->cgroup_group << std::endl
353 << "keep_fds_open: " << c->keep_fds_open << std::endl;
354
355 stream << "num_rlimits: " << c->num_rlimits << std::endl;
356 for (size_t i = 0; i < c->num_rlimits; ++i)
357 stream << c->rlimits[i];
358
yusukesf125f332017-12-08 13:45:15 -0800359 stream << "use_capmask: " << c->use_capmask << std::endl
yusukesbbc37a72017-11-21 09:51:54 -0800360 << "use_capmask_ambient: " << c->use_capmask_ambient << std::endl
361 << "capmask: 0x" << std::hex << c->capmask << std::dec << std::endl
362 << "securebits_skip_mask: 0x" << std::hex << c->securebits_skip_mask
363 << std::dec << std::endl
364 << "do_init: " << c->do_init << std::endl
365 << "selinux_context: " << QUOTE(c->selinux_context) << std::endl
366 << "pre_start_hook: " << reinterpret_cast<void*>(c->pre_start_hook)
367 << std::endl
368 << "pre_start_hook_payload: " << c->pre_start_hook_payload << std::endl
369 << "inherited_fds: size=" << c->inherited_fds.size() << std::endl;
370
371 for (int fd : c->inherited_fds)
372 stream << " " << fd << std::endl;
373
374 stream << "hooks: size=" << c->hooks.size() << std::endl;
375
376 return stream;
377}
378
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700379// Returns the path for |path_in_container| in the outer namespace.
380base::FilePath GetPathInOuterNamespace(
381 const base::FilePath& root, const base::FilePath& path_in_container) {
382 if (path_in_container.IsAbsolute())
383 return base::FilePath(root.value() + path_in_container.value());
384 return root.Append(path_in_container);
385}
386
387// Make sure the mount target exists in the new rootfs. Create if needed and
388// possible.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700389bool SetupMountDestination(const struct container_config* config,
390 const Mount& mount,
391 const base::FilePath& source,
392 const base::FilePath& dest) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700393 struct stat st_buf;
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700394 if (stat(dest.value().c_str(), &st_buf) == 0) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700395 // destination exists.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700396 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700397 }
398
399 // Try to create the destination. Either make directory or touch a file
400 // depending on the source type.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700401 int uid_userns;
402 if (!GetUsernsOutsideId(config->uid_map, mount.uid, &uid_userns))
403 return false;
404 int gid_userns;
405 if (!GetUsernsOutsideId(config->gid_map, mount.gid, &gid_userns))
406 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700407
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700408 if (stat(source.value().c_str(), &st_buf) != 0 || S_ISDIR(st_buf.st_mode) ||
409 S_ISBLK(st_buf.st_mode)) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700410 return MakeDir(dest, uid_userns, gid_userns, mount.mode);
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700411 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700412
413 return TouchFile(dest, uid_userns, gid_userns, mount.mode);
414}
415
416// Fork and exec the setfiles command to configure the selinux policy.
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700417bool RunSetfilesCommand(const struct container* c,
418 const struct container_config* config,
419 const std::vector<base::FilePath>& destinations,
420 pid_t container_pid) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700421 int pid = fork();
422 if (pid == 0) {
423 size_t arg_index = 0;
424 const char* argv[kMaxNumSetfilesArgs];
425 const char* env[] = {
426 nullptr,
427 };
428
429 base::FilePath context_path = c->runfsroot.Append("file_contexts");
430
431 argv[arg_index++] = config->run_setfiles.c_str();
432 argv[arg_index++] = "-r";
433 argv[arg_index++] = c->runfsroot.value().c_str();
434 argv[arg_index++] = context_path.value().c_str();
435 if (arg_index + destinations.size() >= kMaxNumSetfilesArgs)
436 _exit(-E2BIG);
437 for (const auto& destination : destinations)
438 argv[arg_index++] = destination.value().c_str();
439 argv[arg_index] = nullptr;
440
441 execve(
442 argv[0], const_cast<char* const*>(argv), const_cast<char* const*>(env));
443
444 /* Command failed to exec if execve returns. */
445 _exit(-errno);
446 }
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700447 if (pid < 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700448 PLOG(ERROR) << "Failed to fork to run setfiles";
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700449 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700450 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700451
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700452 int status;
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700453 if (HANDLE_EINTR(waitpid(pid, &status, 0)) < 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700454 PLOG(ERROR) << "Failed to wait for setfiles";
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700455 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700456 }
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700457 if (!WIFEXITED(status)) {
458 LOG(ERROR) << "setfiles did not terminate cleanly";
459 return false;
460 }
461 if (WEXITSTATUS(status) != 0) {
462 LOG(ERROR) << "setfiles exited with non-zero status: "
463 << WEXITSTATUS(status);
464 return false;
465 }
466 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700467}
468
469// Unmounts anything we mounted in this mount namespace in the opposite order
470// that they were mounted.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700471bool UnmountExternalMounts(struct container* c) {
472 bool ret = true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700473
474 for (auto it = c->ext_mounts.rbegin(); it != c->ext_mounts.rend(); ++it) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700475 if (umount(it->value().c_str()) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700476 PLOG(ERROR) << "Failed to unmount " << it->value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700477 ret = false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700478 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700479 }
480 c->ext_mounts.clear();
481
482 for (auto it = c->loopdev_paths.rbegin(); it != c->loopdev_paths.rend();
483 ++it) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700484 if (!LoopdevDetach(*it))
485 ret = false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700486 }
487 c->loopdev_paths.clear();
488
489 for (auto it = c->device_mappers.rbegin(); it != c->device_mappers.rend();
490 ++it) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700491 if (!DeviceMapperDetach(*it))
492 ret = false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700493 }
494 c->device_mappers.clear();
495
496 return ret;
497}
498
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700499bool DoContainerMount(struct container* c,
500 const struct container_config* config,
501 const Mount& mount) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700502 base::FilePath dest =
503 GetPathInOuterNamespace(c->runfsroot, mount.destination);
504
505 // If it's a bind mount relative to rootfs, append source to
506 // rootfs path, otherwise source path is absolute.
507 base::FilePath source;
508 if ((mount.flags & MS_BIND) && !mount.source.IsAbsolute()) {
509 source = GetPathInOuterNamespace(c->runfsroot, mount.source);
510 } else if (mount.loopback && !mount.source.IsAbsolute() &&
511 !c->config_root.empty()) {
512 source = GetPathInOuterNamespace(c->config_root, mount.source);
513 } else {
514 source = mount.source;
515 }
516
517 // Only create the destinations for external mounts, minijail will take
518 // care of those mounted in the new namespace.
519 if (mount.create && !mount.mount_in_ns) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700520 if (!SetupMountDestination(config, mount, source, dest))
521 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700522 }
523 if (mount.loopback) {
524 // Record this loopback file for cleanup later.
525 base::FilePath loop_source = source;
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700526 if (!LoopdevSetup(loop_source, &source))
527 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700528
529 // Save this to cleanup when shutting down.
530 c->loopdev_paths.push_back(source);
531 }
532 if (!mount.verity.empty()) {
533 // Set this device up via dm-verity.
534 std::string dm_name;
535 base::FilePath dm_source = source;
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700536 if (!DeviceMapperSetup(dm_source, mount.verity, &source, &dm_name))
537 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700538
539 // Save this to cleanup when shutting down.
540 c->device_mappers.push_back(dm_name);
541 }
542 if (mount.mount_in_ns) {
543 // We can mount this with minijail.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700544 if (minijail_mount_with_data(
545 c->jail.get(), source.value().c_str(),
546 mount.destination.value().c_str(), mount.type.c_str(), mount.flags,
547 mount.data.empty() ? nullptr : mount.data.c_str()) != 0) {
548 return false;
549 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700550 } else {
551 // Mount this externally and unmount it on exit.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700552 if (!MountExternal(source.value(), dest.value(), mount.type, mount.flags,
553 mount.data)) {
554 return false;
555 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700556 // Save this to unmount when shutting down.
557 c->ext_mounts.push_back(dest);
558 }
559
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700560 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700561}
562
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700563bool DoContainerMounts(struct container* c,
564 const struct container_config* config) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700565 UnmountExternalMounts(c);
566
567 // This will run in all the error cases.
568 base::ScopedClosureRunner teardown(base::Bind(
569 base::IgnoreResult(&UnmountExternalMounts), base::Unretained(c)));
570
571 for (const auto& mount : config->mounts) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700572 if (!DoContainerMount(c, config, mount))
573 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700574 }
575
576 // The mounts have been done successfully, no need to tear them down anymore.
577 ignore_result(teardown.Release());
578
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700579 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700580}
581
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700582bool ContainerCreateDevice(const struct container* c,
583 const struct container_config* config,
584 const Device& dev,
585 int minor) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700586 mode_t mode = dev.fs_permissions;
587 switch (dev.type) {
588 case 'b':
589 mode |= S_IFBLK;
590 break;
591 case 'c':
592 mode |= S_IFCHR;
593 break;
594 default:
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700595 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700596 }
597
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700598 int uid_userns;
599 if (!GetUsernsOutsideId(config->uid_map, dev.uid, &uid_userns))
600 return false;
601 int gid_userns;
602 if (!GetUsernsOutsideId(config->gid_map, dev.gid, &gid_userns))
603 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700604
605 base::FilePath path = GetPathInOuterNamespace(c->runfsroot, dev.path);
Luis Hector Chavez92278e82017-10-16 11:30:27 -0700606 if (!libcontainer::CreateDirectoryOwnedBy(path.DirName(), 0755, uid_userns,
607 gid_userns)) {
Luis Hector Chavez5d51abb2017-10-11 17:05:57 -0700608 PLOG(ERROR) << "Failed to create parent directory for " << path.value();
609 return false;
610 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700611 if (mknod(path.value().c_str(), mode, makedev(dev.major, minor)) != 0 &&
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700612 errno != EEXIST) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700613 PLOG(ERROR) << "Failed to mknod " << path.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700614 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700615 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700616 if (chown(path.value().c_str(), uid_userns, gid_userns) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700617 PLOG(ERROR) << "Failed to chown " << path.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700618 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700619 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700620 if (chmod(path.value().c_str(), dev.fs_permissions) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700621 PLOG(ERROR) << "Failed to chmod " << path.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700622 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700623 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700624
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700625 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700626}
627
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700628bool MountRunfs(struct container* c, const struct container_config* config) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700629 {
630 std::string runfs_template = base::StringPrintf(
631 "%s/%s_XXXXXX", c->rundir.value().c_str(), c->name.c_str());
632 // TODO(lhchavez): Replace this with base::CreateTemporaryDirInDir().
633 char* runfs_path = mkdtemp(const_cast<char*>(runfs_template.c_str()));
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700634 if (!runfs_path) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700635 PLOG(ERROR) << "Failed to mkdtemp in " << c->rundir.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700636 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700637 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700638 c->runfs = base::FilePath(runfs_path);
639 }
640
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700641 int uid_userns;
642 if (!GetUsernsOutsideId(config->uid_map, config->uid, &uid_userns))
643 return false;
644 int gid_userns;
645 if (!GetUsernsOutsideId(config->gid_map, config->gid, &gid_userns))
646 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700647
648 // Make sure the container uid can access the rootfs.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700649 if (chmod(c->runfs.value().c_str(), 0700) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700650 PLOG(ERROR) << "Failed to chmod " << c->runfs.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700651 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700652 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700653 if (chown(c->runfs.value().c_str(), uid_userns, gid_userns) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700654 PLOG(ERROR) << "Failed to chown " << c->runfs.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700655 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700656 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700657
658 c->runfsroot = c->runfs.Append("root");
659
660 constexpr mode_t kRootDirMode = 0660;
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700661 if (mkdir(c->runfsroot.value().c_str(), kRootDirMode) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700662 PLOG(ERROR) << "Failed to mkdir " << c->runfsroot.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700663 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700664 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700665 if (chmod(c->runfsroot.value().c_str(), kRootDirMode) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700666 PLOG(ERROR) << "Failed to chmod " << c->runfsroot.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700667 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700668 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700669
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700670 if (mount(config->rootfs.value().c_str(), c->runfsroot.value().c_str(), "",
671 MS_BIND | (config->rootfs_mount_flags & MS_REC), nullptr) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700672 PLOG(ERROR) << "Failed to bind-mount " << config->rootfs.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700673 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700674 }
675
676 // MS_BIND ignores any flags passed to it (except MS_REC). We need a
677 // second call to mount() to actually set them.
678 if (config->rootfs_mount_flags &&
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700679 mount(config->rootfs.value().c_str(), c->runfsroot.value().c_str(), "",
680 (config->rootfs_mount_flags & ~MS_REC), nullptr) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700681 PLOG(ERROR) << "Failed to remount " << c->runfsroot.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700682 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700683 }
684
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700685 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700686}
687
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700688bool CreateDeviceNodes(struct container* c,
689 const struct container_config* config,
690 pid_t container_pid) {
691 for (const auto& dev : config->devices) {
692 int minor = dev.minor;
693
694 if (dev.copy_minor) {
695 struct stat st_buff;
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700696 if (stat(dev.path.value().c_str(), &st_buff) != 0)
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700697 continue;
698 minor = minor(st_buff.st_rdev);
699 }
700 if (minor < 0)
701 continue;
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700702 if (!ContainerCreateDevice(c, config, dev, minor))
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700703 return false;
704 }
705
706 return true;
707}
708
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700709bool DeviceSetup(struct container* c, const struct container_config* config) {
Luis Hector Chavez76ae9ac2017-09-20 21:13:08 -0700710 c->cgroup->DenyAllDevices();
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700711
712 for (const auto& dev : config->cgroup_devices) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700713 if (!c->cgroup->AddDevice(dev.allow, dev.major, dev.minor, dev.read,
714 dev.write, dev.modify, dev.type)) {
715 return false;
716 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700717 }
718
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700719 for (const auto& loopdev_path : c->loopdev_paths) {
720 struct stat st;
721
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700722 if (stat(loopdev_path.value().c_str(), &st) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700723 PLOG(ERROR) << "Failed to stat " << loopdev_path.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700724 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700725 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700726 if (!c->cgroup->AddDevice(1, major(st.st_rdev), minor(st.st_rdev), 1, 0, 0,
727 'b')) {
728 return false;
729 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700730 }
731
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700732 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700733}
734
735int Setexeccon(void* payload) {
736 char* init_domain = reinterpret_cast<char*>(payload);
737 pid_t tid = syscall(SYS_gettid);
738
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700739 if (tid < 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700740 PLOG(ERROR) << "Failed to gettid";
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700741 return -errno;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700742 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700743
744 std::string exec_path =
745 base::StringPrintf("/proc/self/task/%d/attr/exec", tid);
746
747 base::ScopedFD fd(open(exec_path.c_str(), O_WRONLY | O_CLOEXEC));
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700748 if (!fd.is_valid()) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700749 PLOG(ERROR) << "Failed to open " << exec_path;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700750 return -errno;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700751 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700752
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700753 if (!base::WriteFileDescriptor(fd.get(), init_domain, strlen(init_domain))) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700754 PLOG(ERROR) << "Failed to write the SELinux label to " << exec_path;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700755 return -errno;
756 }
757
758 return 0;
759}
760
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700761bool ContainerTeardown(struct container* c) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700762 UnmountExternalMounts(c);
763 if (!c->runfsroot.empty() && !c->runfs.empty()) {
764 /* |c->runfsroot| may have been mounted recursively. Thus use
765 * MNT_DETACH to "immediately disconnect the filesystem and all
766 * filesystems mounted below it from each other and from the
767 * mount table". Otherwise one would need to unmount every
768 * single dependent mount before unmounting |c->runfsroot|
769 * itself.
770 */
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700771 if (umount2(c->runfsroot.value().c_str(), MNT_DETACH) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700772 PLOG(ERROR) << "Failed to detach " << c->runfsroot.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700773 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700774 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700775 if (rmdir(c->runfsroot.value().c_str()) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700776 PLOG(ERROR) << "Failed to rmdir " << c->runfsroot.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700777 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700778 }
Luis Hector Chavez15d0d1a2017-10-12 09:30:19 -0700779 c->runfsroot = base::FilePath();
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700780 }
781 if (!c->pid_file_path.empty()) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700782 if (unlink(c->pid_file_path.value().c_str()) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700783 PLOG(ERROR) << "Failed to unlink " << c->pid_file_path.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700784 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700785 }
Luis Hector Chavez15d0d1a2017-10-12 09:30:19 -0700786 c->pid_file_path = base::FilePath();
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700787 }
788 if (!c->runfs.empty()) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700789 if (rmdir(c->runfs.value().c_str()) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700790 PLOG(ERROR) << "Failed to rmdir " << c->runfs.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700791 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700792 }
Luis Hector Chavez15d0d1a2017-10-12 09:30:19 -0700793 c->runfs = base::FilePath();
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700794 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700795 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700796}
797
Luis Hector Chavez15d0d1a2017-10-12 09:30:19 -0700798void CancelContainerStart(struct container* c) {
799 if (c->init_pid != -1)
800 container_kill(c);
801 ContainerTeardown(c);
802}
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700803
804} // namespace
805
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700806struct container_config* container_config_create() {
Luis Hector Chavez5381d002017-09-16 12:54:24 -0700807 return new (std::nothrow) struct container_config();
Dylan Reid837c74a2016-01-22 17:25:21 -0800808}
809
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700810void container_config_destroy(struct container_config* c) {
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700811 if (c == nullptr)
812 return;
Luis Hector Chavez5381d002017-09-16 12:54:24 -0700813 delete c;
Dylan Reid837c74a2016-01-22 17:25:21 -0800814}
815
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700816int container_config_config_root(struct container_config* c,
817 const char* config_root) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700818 c->config_root = base::FilePath(config_root);
819 return 0;
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500820}
821
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700822const char* container_config_get_config_root(const struct container_config* c) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700823 return c->config_root.value().c_str();
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500824}
825
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700826int container_config_rootfs(struct container_config* c, const char* rootfs) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700827 c->rootfs = base::FilePath(rootfs);
828 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800829}
830
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700831const char* container_config_get_rootfs(const struct container_config* c) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700832 return c->rootfs.value().c_str();
Dylan Reid11456722016-05-02 11:24:50 -0700833}
834
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700835void container_config_rootfs_mount_flags(struct container_config* c,
836 unsigned long rootfs_mount_flags) {
837 /* Since we are going to add MS_REMOUNT anyways, add it here so we can
838 * simply check against zero later. MS_BIND is also added to avoid
839 * re-mounting the original filesystem, since the rootfs is always
840 * bind-mounted.
841 */
842 c->rootfs_mount_flags = MS_REMOUNT | MS_BIND | rootfs_mount_flags;
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -0700843}
844
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700845unsigned long container_config_get_rootfs_mount_flags(
846 const struct container_config* c) {
847 return c->rootfs_mount_flags;
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -0700848}
849
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700850int container_config_premounted_runfs(struct container_config* c,
851 const char* runfs) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700852 c->premounted_runfs = base::FilePath(runfs);
853 return 0;
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700854}
855
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700856const char* container_config_get_premounted_runfs(
857 const struct container_config* c) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700858 return c->premounted_runfs.value().c_str();
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700859}
860
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700861int container_config_pid_file(struct container_config* c, const char* path) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700862 c->pid_file_path = base::FilePath(path);
863 return 0;
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700864}
865
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700866const char* container_config_get_pid_file(const struct container_config* c) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700867 return c->pid_file_path.value().c_str();
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700868}
869
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700870int container_config_program_argv(struct container_config* c,
871 const char** argv,
872 size_t num_args) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700873 if (num_args < 1) {
874 errno = EINVAL;
875 return -1;
876 }
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700877 c->program_argv.clear();
878 c->program_argv.reserve(num_args);
879 for (size_t i = 0; i < num_args; ++i)
880 c->program_argv.emplace_back(argv[i]);
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700881 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800882}
883
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700884size_t container_config_get_num_program_args(const struct container_config* c) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700885 return c->program_argv.size();
Dylan Reid11456722016-05-02 11:24:50 -0700886}
887
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700888const char* container_config_get_program_arg(const struct container_config* c,
889 size_t index) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700890 if (index >= c->program_argv.size())
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700891 return nullptr;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700892 return c->program_argv[index].c_str();
Dylan Reid11456722016-05-02 11:24:50 -0700893}
894
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700895void container_config_uid(struct container_config* c, uid_t uid) {
896 c->uid = uid;
Dylan Reid1874feb2016-06-22 17:53:50 -0700897}
898
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700899uid_t container_config_get_uid(const struct container_config* c) {
900 return c->uid;
Dylan Reid1874feb2016-06-22 17:53:50 -0700901}
902
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700903int container_config_uid_map(struct container_config* c, const char* uid_map) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700904 c->uid_map = uid_map;
905 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800906}
907
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700908void container_config_gid(struct container_config* c, gid_t gid) {
909 c->gid = gid;
Dylan Reid1874feb2016-06-22 17:53:50 -0700910}
911
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700912gid_t container_config_get_gid(const struct container_config* c) {
913 return c->gid;
Dylan Reid1874feb2016-06-22 17:53:50 -0700914}
915
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700916int container_config_gid_map(struct container_config* c, const char* gid_map) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700917 c->gid_map = gid_map;
918 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800919}
920
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700921int container_config_alt_syscall_table(struct container_config* c,
922 const char* alt_syscall_table) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700923 c->alt_syscall_table = alt_syscall_table;
924 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800925}
926
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700927int container_config_add_rlimit(struct container_config* c,
928 int type,
929 uint32_t cur,
930 uint32_t max) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700931 if (c->num_rlimits >= kMaxRlimits) {
932 errno = ENOMEM;
933 return -1;
934 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700935 c->rlimits[c->num_rlimits].type = type;
936 c->rlimits[c->num_rlimits].cur = cur;
937 c->rlimits[c->num_rlimits].max = max;
938 c->num_rlimits++;
939 return 0;
Dylan Reid93fa4602017-06-06 13:39:31 -0700940}
941
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700942int container_config_add_mount(struct container_config* c,
943 const char* name,
944 const char* source,
945 const char* destination,
946 const char* type,
947 const char* data,
948 const char* verity,
949 int flags,
950 int uid,
951 int gid,
952 int mode,
953 int mount_in_ns,
954 int create,
955 int loopback) {
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700956 if (name == nullptr || source == nullptr || destination == nullptr ||
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700957 type == nullptr) {
958 errno = EINVAL;
959 return -1;
960 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800961
Luis Hector Chavez5381d002017-09-16 12:54:24 -0700962 c->mounts.emplace_back(Mount{name,
963 base::FilePath(source),
964 base::FilePath(destination),
965 type,
966 data ? data : "",
967 verity ? verity : "",
968 flags,
969 uid,
970 gid,
971 mode,
972 mount_in_ns != 0,
973 create != 0,
974 loopback != 0});
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700975
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700976 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800977}
978
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700979int container_config_add_cgroup_device(struct container_config* c,
980 int allow,
981 char type,
982 int major,
983 int minor,
984 int read,
985 int write,
986 int modify) {
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700987 c->cgroup_devices.emplace_back(CgroupDevice{
988 allow != 0, type, major, minor, read != 0, write != 0, modify != 0});
Dylan Reid4843d6b2017-03-31 18:14:30 -0700989
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700990 return 0;
Dylan Reid4843d6b2017-03-31 18:14:30 -0700991}
992
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700993int container_config_add_device(struct container_config* c,
994 char type,
995 const char* path,
996 int fs_permissions,
997 int major,
998 int minor,
999 int copy_minor,
1000 int uid,
1001 int gid,
1002 int read_allowed,
1003 int write_allowed,
1004 int modify_allowed) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001005 if (path == nullptr) {
1006 errno = EINVAL;
1007 return -1;
1008 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001009 /* If using a dynamic minor number, ensure that minor is -1. */
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001010 if (copy_minor && (minor != -1)) {
1011 errno = EINVAL;
1012 return -1;
1013 }
Dylan Reid355d5e42016-04-29 16:53:31 -07001014
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001015 if (read_allowed || write_allowed || modify_allowed) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001016 if (container_config_add_cgroup_device(c, 1, type, major, minor,
1017 read_allowed, write_allowed,
1018 modify_allowed) != 0) {
1019 errno = ENOMEM;
1020 return -1;
Luis Hector Chaveze1062e82017-09-18 09:57:37 -07001021 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001022 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001023
Luis Hector Chaveze1062e82017-09-18 09:57:37 -07001024 c->devices.emplace_back(Device{
1025 type,
1026 base::FilePath(path),
1027 fs_permissions,
1028 major,
1029 minor,
1030 copy_minor != 0,
1031 uid,
1032 gid,
1033 });
1034
1035 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -08001036}
1037
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001038int container_config_run_setfiles(struct container_config* c,
1039 const char* setfiles_cmd) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001040 c->run_setfiles = setfiles_cmd;
1041 return 0;
Dylan Reid2bd9ea92016-04-07 20:57:47 -07001042}
Dylan Reid837c74a2016-01-22 17:25:21 -08001043
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001044const char* container_config_get_run_setfiles(
1045 const struct container_config* c) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001046 return c->run_setfiles.c_str();
Dylan Reid11456722016-05-02 11:24:50 -07001047}
1048
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001049int container_config_set_cpu_shares(struct container_config* c, int shares) {
1050 /* CPU shares must be 2 or higher. */
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001051 if (shares < 2) {
1052 errno = EINVAL;
1053 return -1;
1054 }
Chinyue Chenfac909e2016-06-24 14:17:42 +08001055
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001056 c->cpu_cgparams.shares = shares;
1057 return 0;
Chinyue Chenfac909e2016-06-24 14:17:42 +08001058}
1059
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001060int container_config_set_cpu_cfs_params(struct container_config* c,
1061 int quota,
1062 int period) {
1063 /*
1064 * quota could be set higher than period to utilize more than one CPU.
1065 * quota could also be set as -1 to indicate the cgroup does not adhere
1066 * to any CPU time restrictions.
1067 */
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001068 if (quota <= 0 && quota != -1) {
1069 errno = EINVAL;
1070 return -1;
1071 }
1072 if (period <= 0) {
1073 errno = EINVAL;
1074 return -1;
1075 }
Chinyue Chenfac909e2016-06-24 14:17:42 +08001076
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001077 c->cpu_cgparams.quota = quota;
1078 c->cpu_cgparams.period = period;
1079 return 0;
Chinyue Chenfac909e2016-06-24 14:17:42 +08001080}
1081
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001082int container_config_set_cpu_rt_params(struct container_config* c,
1083 int rt_runtime,
1084 int rt_period) {
1085 /*
1086 * rt_runtime could be set as 0 to prevent the cgroup from using
1087 * realtime CPU.
1088 */
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001089 if (rt_runtime < 0 || rt_runtime >= rt_period) {
1090 errno = EINVAL;
1091 return -1;
1092 }
Chinyue Chenfac909e2016-06-24 14:17:42 +08001093
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001094 c->cpu_cgparams.rt_runtime = rt_runtime;
1095 c->cpu_cgparams.rt_period = rt_period;
1096 return 0;
Chinyue Chenfac909e2016-06-24 14:17:42 +08001097}
1098
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001099int container_config_get_cpu_shares(struct container_config* c) {
1100 return c->cpu_cgparams.shares;
Chinyue Chen4f3fd682016-07-01 14:11:42 +08001101}
1102
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001103int container_config_get_cpu_quota(struct container_config* c) {
1104 return c->cpu_cgparams.quota;
Chinyue Chen4f3fd682016-07-01 14:11:42 +08001105}
1106
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001107int container_config_get_cpu_period(struct container_config* c) {
1108 return c->cpu_cgparams.period;
Chinyue Chen4f3fd682016-07-01 14:11:42 +08001109}
1110
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001111int container_config_get_cpu_rt_runtime(struct container_config* c) {
1112 return c->cpu_cgparams.rt_runtime;
Chinyue Chen4f3fd682016-07-01 14:11:42 +08001113}
1114
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001115int container_config_get_cpu_rt_period(struct container_config* c) {
1116 return c->cpu_cgparams.rt_period;
Chinyue Chen4f3fd682016-07-01 14:11:42 +08001117}
1118
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001119int container_config_set_cgroup_parent(struct container_config* c,
1120 const char* parent,
1121 uid_t cgroup_owner,
1122 gid_t cgroup_group) {
1123 c->cgroup_owner = cgroup_owner;
1124 c->cgroup_group = cgroup_group;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001125 c->cgroup_parent = base::FilePath(parent);
1126 return 0;
Dylan Reid9e724af2016-07-21 09:58:07 -07001127}
1128
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001129const char* container_config_get_cgroup_parent(struct container_config* c) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001130 return c->cgroup_parent.value().c_str();
Dylan Reid9e724af2016-07-21 09:58:07 -07001131}
1132
Stephen Barber771653f2017-10-04 23:48:57 -07001133int container_config_namespaces(struct container_config* c,
1134 const char** namespaces,
1135 size_t num_ns) {
1136 if (num_ns < 1)
1137 return -EINVAL;
1138 c->namespaces.clear();
1139 for (size_t i = 0; i < num_ns; ++i)
1140 c->namespaces.emplace(namespaces[i]);
1141 return 0;
Keshav Santhanam1b6bf672016-08-10 18:35:12 -07001142}
1143
Stephen Barber771653f2017-10-04 23:48:57 -07001144size_t container_config_get_num_namespaces(const struct container_config* c) {
1145 return c->namespaces.size();
1146}
1147
1148bool container_config_has_namespace(const struct container_config* c,
1149 const char* ns) {
1150 return c->namespaces.find(ns) != c->namespaces.end();
Keshav Santhanam1b6bf672016-08-10 18:35:12 -07001151}
1152
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001153void container_config_keep_fds_open(struct container_config* c) {
yusukesf125f332017-12-08 13:45:15 -08001154 c->keep_fds_open = true;
Dylan Reidc4335842016-11-11 10:24:52 -08001155}
1156
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001157void container_config_set_capmask(struct container_config* c,
1158 uint64_t capmask,
1159 int ambient) {
yusukesf125f332017-12-08 13:45:15 -08001160 c->use_capmask = true;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001161 c->capmask = capmask;
1162 c->use_capmask_ambient = ambient;
Luis Hector Chavezff5978f2017-06-27 12:52:58 -07001163}
1164
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001165void container_config_set_securebits_skip_mask(struct container_config* c,
1166 uint64_t securebits_skip_mask) {
1167 c->securebits_skip_mask = securebits_skip_mask;
Luis Hector Chavezcd44ba72017-06-30 13:01:38 -07001168}
1169
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001170void container_config_set_run_as_init(struct container_config* c,
1171 int run_as_init) {
1172 c->do_init = !run_as_init;
Luis Hector Chavezdac65c32017-07-21 10:30:23 -07001173}
1174
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001175int container_config_set_selinux_context(struct container_config* c,
1176 const char* context) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001177 if (!context) {
1178 errno = EINVAL;
1179 return -1;
1180 }
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001181 c->selinux_context = context;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001182 return 0;
Luis Hector Chavez15e8e672017-07-20 15:13:27 -07001183}
1184
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001185void container_config_set_pre_execve_hook(struct container_config* c,
1186 int (*hook)(void*),
1187 void* payload) {
1188 c->pre_start_hook = hook;
1189 c->pre_start_hook_payload = payload;
Luis Hector Chavezf8e8f4c2017-08-01 01:09:39 -07001190}
1191
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001192void container_config_add_hook(struct container_config* c,
1193 minijail_hook_event_t event,
1194 libcontainer::HookCallback callback) {
1195 auto it = c->hooks.insert(
1196 std::make_pair(event, std::vector<libcontainer::HookCallback>()));
1197 it.first->second.emplace_back(std::move(callback));
1198}
1199
Luis Hector Chaveze03926a2017-09-28 17:28:49 -07001200int container_config_add_hook(struct container_config* c,
1201 minijail_hook_event_t event,
1202 const char* filename,
1203 const char** argv,
1204 size_t num_args,
1205 int* pstdin_fd,
1206 int* pstdout_fd,
1207 int* pstderr_fd) {
1208 std::vector<std::string> args;
1209 args.reserve(num_args);
1210 for (size_t i = 0; i < num_args; ++i)
1211 args.emplace_back(argv[i]);
1212
1213 // First element of the array belongs to the parent and the second one belongs
1214 // to the child.
1215 base::ScopedFD stdin_fds[2], stdout_fds[2], stderr_fds[2];
1216 if (pstdin_fd) {
1217 if (!libcontainer::Pipe2(&stdin_fds[1], &stdin_fds[0], 0))
1218 return -1;
1219 }
1220 if (pstdout_fd) {
1221 if (!libcontainer::Pipe2(&stdout_fds[0], &stdout_fds[0], 0))
1222 return -1;
1223 }
1224 if (pstderr_fd) {
1225 if (!libcontainer::Pipe2(&stderr_fds[0], &stderr_fds[0], 0))
1226 return -1;
1227 }
1228
1229 // After this point the call has been successful, so we can now commit to
1230 // whatever pipes we have opened.
1231 if (pstdin_fd) {
1232 *pstdin_fd = stdin_fds[0].release();
1233 c->inherited_fds.emplace_back(stdin_fds[1].get());
1234 }
1235 if (pstdout_fd) {
1236 *pstdout_fd = stdout_fds[0].release();
1237 c->inherited_fds.emplace_back(stdout_fds[1].get());
1238 }
1239 if (pstderr_fd) {
1240 *pstderr_fd = stderr_fds[0].release();
1241 c->inherited_fds.emplace_back(stderr_fds[1].get());
1242 }
1243 container_config_add_hook(
1244 c, event,
1245 libcontainer::CreateExecveCallback(
1246 base::FilePath(filename), args, std::move(stdin_fds[1]),
1247 std::move(stdout_fds[1]), std::move(stderr_fds[1])));
1248 return 0;
1249}
1250
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001251int container_config_inherit_fds(struct container_config* c,
1252 int* inherited_fds,
1253 size_t inherited_fd_count) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001254 if (!c->inherited_fds.empty()) {
1255 errno = EINVAL;
1256 return -1;
1257 }
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001258 for (size_t i = 0; i < inherited_fd_count; ++i)
1259 c->inherited_fds.emplace_back(inherited_fds[i]);
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001260 return 0;
Luis Hector Chavezf8e8f4c2017-08-01 01:09:39 -07001261}
1262
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001263struct container* container_new(const char* name, const char* rundir) {
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001264 struct container* c = new (std::nothrow) container();
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001265 if (!c)
1266 return nullptr;
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001267 c->rundir = base::FilePath(rundir);
1268 c->name = name;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001269 return c;
Dylan Reid837c74a2016-01-22 17:25:21 -08001270}
1271
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001272void container_destroy(struct container* c) {
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001273 delete c;
Dylan Reid837c74a2016-01-22 17:25:21 -08001274}
1275
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001276int container_start(struct container* c,
1277 const struct container_config* config) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001278 if (!c) {
1279 errno = EINVAL;
1280 return -1;
1281 }
1282 if (!config) {
1283 errno = EINVAL;
1284 return -1;
1285 }
1286 if (config->program_argv.empty()) {
1287 errno = EINVAL;
1288 return -1;
1289 }
Dylan Reide040c6b2016-05-02 18:49:02 -07001290
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001291 // This will run in all the error cases.
1292 base::ScopedClosureRunner teardown(
Luis Hector Chavez15d0d1a2017-10-12 09:30:19 -07001293 base::Bind(&CancelContainerStart, base::Unretained(c)));
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001294
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001295 if (!config->config_root.empty())
1296 c->config_root = config->config_root;
1297 if (!config->premounted_runfs.empty()) {
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001298 c->runfs.clear();
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001299 c->runfsroot = config->premounted_runfs;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001300 } else {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001301 if (!MountRunfs(c, config))
1302 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001303 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001304
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001305 c->jail.reset(minijail_new());
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001306 if (!c->jail) {
1307 errno = ENOMEM;
1308 return -1;
1309 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001310
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001311 if (!DoContainerMounts(c, config))
1312 return -1;
Dylan Reid837c74a2016-01-22 17:25:21 -08001313
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001314 int cgroup_uid;
1315 if (!GetUsernsOutsideId(config->uid_map, config->cgroup_owner, &cgroup_uid))
1316 return -1;
1317 int cgroup_gid;
1318 if (!GetUsernsOutsideId(config->gid_map, config->cgroup_group, &cgroup_gid))
1319 return -1;
Stephen Barber1a398c72017-01-23 12:39:44 -08001320
Luis Hector Chavez76ae9ac2017-09-20 21:13:08 -07001321 c->cgroup = libcontainer::Cgroup::Create(c->name,
1322 base::FilePath("/sys/fs/cgroup"),
1323 config->cgroup_parent,
1324 cgroup_uid,
1325 cgroup_gid);
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001326 if (!c->cgroup)
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001327 return -1;
Dylan Reida9966422016-07-21 10:11:34 -07001328
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001329 // Must be root to modify device cgroup or mknod.
1330 std::map<minijail_hook_event_t, std::vector<libcontainer::HookCallback>>
1331 hook_callbacks;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001332 if (getuid() == 0) {
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001333 if (!config->devices.empty()) {
1334 // Create the devices in the mount namespace.
1335 auto it = hook_callbacks.insert(
1336 std::make_pair(MINIJAIL_HOOK_EVENT_PRE_CHROOT,
1337 std::vector<libcontainer::HookCallback>()));
1338 it.first->second.emplace_back(
1339 libcontainer::AdaptCallbackToRunInNamespaces(
1340 base::Bind(&CreateDeviceNodes, base::Unretained(c),
1341 base::Unretained(config)),
1342 {CLONE_NEWNS}));
1343 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001344 if (!DeviceSetup(c, config))
1345 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001346 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001347
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001348 // Potentially run setfiles on mounts configured outside of the jail.
1349 if (!config->run_setfiles.empty()) {
1350 const base::FilePath kDataPath("/data");
1351 const base::FilePath kCachePath("/cache");
1352 std::vector<base::FilePath> destinations;
1353 for (const auto& mnt : config->mounts) {
1354 if (mnt.mount_in_ns)
1355 continue;
1356 if (mnt.flags & MS_RDONLY)
1357 continue;
Yusuke Sato91f11f02016-12-02 16:15:13 -08001358
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001359 // A hack to avoid setfiles on /data and /cache.
1360 if (mnt.destination == kDataPath || mnt.destination == kCachePath)
1361 continue;
Yusuke Sato91f11f02016-12-02 16:15:13 -08001362
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001363 destinations.emplace_back(
1364 GetPathInOuterNamespace(c->runfsroot, mnt.destination));
1365 }
1366
1367 if (!destinations.empty()) {
1368 auto it = hook_callbacks.insert(
1369 std::make_pair(MINIJAIL_HOOK_EVENT_PRE_CHROOT,
1370 std::vector<libcontainer::HookCallback>()));
1371 it.first->second.emplace_back(
1372 libcontainer::AdaptCallbackToRunInNamespaces(
1373 base::Bind(&RunSetfilesCommand, base::Unretained(c),
1374 base::Unretained(config), destinations),
1375 {CLONE_NEWNS}));
1376 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001377 }
Dylan Reidd7229582016-04-27 17:08:40 -07001378
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001379 /* Setup CPU cgroup params. */
1380 if (config->cpu_cgparams.shares) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001381 if (!c->cgroup->SetCpuShares(config->cpu_cgparams.shares))
1382 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001383 }
1384 if (config->cpu_cgparams.period) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001385 if (!c->cgroup->SetCpuQuota(config->cpu_cgparams.quota))
1386 return -1;
1387 if (!c->cgroup->SetCpuPeriod(config->cpu_cgparams.period))
1388 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001389 }
1390 if (config->cpu_cgparams.rt_period) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001391 if (!c->cgroup->SetCpuRtRuntime(config->cpu_cgparams.rt_runtime))
1392 return -1;
1393 if (!c->cgroup->SetCpuRtPeriod(config->cpu_cgparams.rt_period))
1394 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001395 }
Chinyue Chenfac909e2016-06-24 14:17:42 +08001396
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001397 /* Setup and start the container with libminijail. */
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001398 if (!config->pid_file_path.empty())
1399 c->pid_file_path = config->pid_file_path;
1400 else if (!c->runfs.empty())
1401 c->pid_file_path = c->runfs.Append("container.pid");
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001402
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001403 if (!c->pid_file_path.empty())
1404 minijail_write_pid_file(c->jail.get(), c->pid_file_path.value().c_str());
1405 minijail_reset_signal_mask(c->jail.get());
Dylan Reid837c74a2016-01-22 17:25:21 -08001406
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001407 /* Setup container namespaces. */
Stephen Barber771653f2017-10-04 23:48:57 -07001408 if (container_config_has_namespace(config, "ipc"))
1409 minijail_namespace_ipc(c->jail.get());
1410 if (container_config_has_namespace(config, "mount"))
1411 minijail_namespace_vfs(c->jail.get());
1412 if (container_config_has_namespace(config, "network"))
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001413 minijail_namespace_net(c->jail.get());
Stephen Barber771653f2017-10-04 23:48:57 -07001414 if (container_config_has_namespace(config, "pid"))
1415 minijail_namespace_pids(c->jail.get());
1416
1417 if (container_config_has_namespace(config, "user")) {
1418 minijail_namespace_user(c->jail.get());
1419 if (minijail_uidmap(c->jail.get(), config->uid_map.c_str()) != 0)
1420 return -1;
1421 if (minijail_gidmap(c->jail.get(), config->gid_map.c_str()) != 0)
1422 return -1;
1423 }
1424
1425 if (container_config_has_namespace(config, "cgroup"))
1426 minijail_namespace_cgroups(c->jail.get());
1427
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001428 if (getuid() != 0)
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001429 minijail_namespace_user_disable_setgroups(c->jail.get());
Dylan Reid837c74a2016-01-22 17:25:21 -08001430
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001431 /* Set the UID/GID inside the container if not 0. */
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001432 if (!GetUsernsOutsideId(config->uid_map, config->uid, nullptr))
1433 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001434 else if (config->uid > 0)
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001435 minijail_change_uid(c->jail.get(), config->uid);
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001436 if (!GetUsernsOutsideId(config->gid_map, config->gid, nullptr))
1437 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001438 else if (config->gid > 0)
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001439 minijail_change_gid(c->jail.get(), config->gid);
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001440
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001441 if (minijail_enter_pivot_root(c->jail.get(), c->runfsroot.value().c_str()) !=
1442 0) {
1443 return -1;
1444 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001445
Luis Hector Chavez76ae9ac2017-09-20 21:13:08 -07001446 // Add the cgroups configured above.
1447 for (int32_t i = 0; i < libcontainer::Cgroup::Type::NUM_TYPES; i++) {
1448 if (c->cgroup->has_tasks_path(i)) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001449 if (minijail_add_to_cgroup(
1450 c->jail.get(), c->cgroup->tasks_path(i).value().c_str()) != 0) {
1451 return -1;
1452 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001453 }
1454 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001455
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001456 if (!config->alt_syscall_table.empty())
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001457 minijail_use_alt_syscall(c->jail.get(), config->alt_syscall_table.c_str());
Dylan Reid837c74a2016-01-22 17:25:21 -08001458
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001459 for (int i = 0; i < config->num_rlimits; i++) {
Luis Hector Chaveze1062e82017-09-18 09:57:37 -07001460 const Rlimit& lim = config->rlimits[i];
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001461 if (minijail_rlimit(c->jail.get(), lim.type, lim.cur, lim.max) != 0)
1462 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001463 }
Dylan Reid93fa4602017-06-06 13:39:31 -07001464
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001465 if (!config->selinux_context.empty()) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001466 if (minijail_add_hook(c->jail.get(), &Setexeccon,
1467 const_cast<char*>(config->selinux_context.c_str()),
1468 MINIJAIL_HOOK_EVENT_PRE_EXECVE) != 0) {
1469 return -1;
1470 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001471 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001472
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001473 if (config->pre_start_hook) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001474 if (minijail_add_hook(c->jail.get(), config->pre_start_hook,
1475 config->pre_start_hook_payload,
1476 MINIJAIL_HOOK_EVENT_PRE_EXECVE) != 0) {
1477 return -1;
1478 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001479 }
Luis Hector Chavezf8e8f4c2017-08-01 01:09:39 -07001480
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001481 // Now that all pre-requisite hooks are installed, copy the ones in the
1482 // container_config object in the correct order.
1483 for (const auto& config_hook : config->hooks) {
1484 auto it = hook_callbacks.insert(std::make_pair(
1485 config_hook.first, std::vector<libcontainer::HookCallback>()));
1486 it.first->second.insert(it.first->second.end(), config_hook.second.begin(),
1487 config_hook.second.end());
1488 }
1489
1490 c->hook_states.clear();
1491 // Reserve enough memory to hold all the hooks, so that their addresses do not
1492 // get invalidated by reallocation.
1493 c->hook_states.reserve(MINIJAIL_HOOK_EVENT_MAX);
1494 for (minijail_hook_event_t event : {MINIJAIL_HOOK_EVENT_PRE_CHROOT,
1495 MINIJAIL_HOOK_EVENT_PRE_DROP_CAPS,
1496 MINIJAIL_HOOK_EVENT_PRE_EXECVE}) {
1497 const auto& it = hook_callbacks.find(event);
1498 if (it == hook_callbacks.end())
1499 continue;
1500 c->hook_states.emplace_back(
1501 std::make_pair(libcontainer::HookState(), it->second));
1502 if (!c->hook_states.back().first.InstallHook(c->jail.get(), event))
1503 return -1;
1504 }
1505
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001506 for (int fd : config->inherited_fds) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001507 if (minijail_preserve_fd(c->jail.get(), fd, fd) != 0)
1508 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001509 }
Luis Hector Chavezf8e8f4c2017-08-01 01:09:39 -07001510
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001511 /* TODO(dgreid) - remove this once shared mounts are cleaned up. */
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001512 minijail_skip_remount_private(c->jail.get());
Dylan Reid3da683b2016-04-05 03:35:35 -07001513
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001514 if (!config->keep_fds_open)
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001515 minijail_close_open_fds(c->jail.get());
Luis Hector Chaveze18e7d42016-10-12 07:35:32 -07001516
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001517 if (config->use_capmask) {
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001518 minijail_use_caps(c->jail.get(), config->capmask);
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001519 if (config->use_capmask_ambient)
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001520 minijail_set_ambient_caps(c->jail.get());
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001521 if (config->securebits_skip_mask) {
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001522 minijail_skip_setting_securebits(c->jail.get(),
1523 config->securebits_skip_mask);
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001524 }
1525 }
Luis Hector Chavezff5978f2017-06-27 12:52:58 -07001526
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001527 if (!config->do_init)
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001528 minijail_run_as_init(c->jail.get());
Luis Hector Chavezdac65c32017-07-21 10:30:23 -07001529
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001530 std::vector<char*> argv_cstr;
1531 argv_cstr.reserve(config->program_argv.size() + 1);
1532 for (const auto& arg : config->program_argv)
1533 argv_cstr.emplace_back(const_cast<char*>(arg.c_str()));
1534 argv_cstr.emplace_back(nullptr);
1535
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001536 if (minijail_run_pid_pipes_no_preload(c->jail.get(), argv_cstr[0],
1537 argv_cstr.data(), &c->init_pid, nullptr,
1538 nullptr, nullptr) != 0) {
1539 return -1;
1540 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001541
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001542 // |hook_states| is already sorted in the correct order.
1543 for (auto& hook_state : c->hook_states) {
1544 if (!hook_state.first.WaitForHookAndRun(hook_state.second, c->init_pid))
1545 return -1;
1546 }
1547
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001548 // The container has started successfully, no need to tear it down anymore.
1549 ignore_result(teardown.Release());
1550 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -08001551}
1552
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001553const char* container_root(struct container* c) {
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001554 return c->runfs.value().c_str();
Dylan Reid837c74a2016-01-22 17:25:21 -08001555}
1556
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001557int container_pid(struct container* c) {
1558 return c->init_pid;
Dylan Reid837c74a2016-01-22 17:25:21 -08001559}
1560
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001561int container_wait(struct container* c) {
1562 int rc;
Dylan Reidcf745c52016-04-22 10:18:03 -07001563
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001564 do {
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001565 rc = minijail_wait(c->jail.get());
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001566 } while (rc == -EINTR);
Dylan Reidcf745c52016-04-22 10:18:03 -07001567
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001568 // If the process had already been reaped, still perform teardown.
1569 if (rc == -ECHILD || rc >= 0) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001570 if (!ContainerTeardown(c))
1571 rc = -errno;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001572 }
1573 return rc;
Dylan Reid837c74a2016-01-22 17:25:21 -08001574}
1575
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001576int container_kill(struct container* c) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001577 if (kill(c->init_pid, SIGKILL) != 0 && errno != ESRCH) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -07001578 PLOG(ERROR) << "Failed to kill " << c->init_pid;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001579 return -errno;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -07001580 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001581 return container_wait(c);
Dylan Reid837c74a2016-01-22 17:25:21 -08001582}
yusukesbbc37a72017-11-21 09:51:54 -08001583
1584char* container_config_dump(struct container_config* c) {
1585 std::stringstream out;
1586 out << c;
1587 return strdup(out.str().c_str());
1588}