blob: 6d3ab563fff9a75b42fa937a3ad3d662b6a1a7a0 [file] [log] [blame]
Luis Hector Chavez81efb332017-09-18 14:01:29 -07001// Copyright 2016 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
Dylan Reid837c74a2016-01-22 17:25:21 -08004
Dylan Reid837c74a2016-01-22 17:25:21 -08005#include <errno.h>
6#include <fcntl.h>
Dylan Reid837c74a2016-01-22 17:25:21 -08007#include <signal.h>
Luis Hector Chavezff5978f2017-06-27 12:52:58 -07008#include <stdint.h>
Dylan Reid837c74a2016-01-22 17:25:21 -08009#include <stdlib.h>
10#include <string.h>
11#include <sys/mount.h>
12#include <sys/stat.h>
13#include <sys/types.h>
Dylan Reid2bd9ea92016-04-07 20:57:47 -070014#include <sys/wait.h>
Luis Hector Chavez836d7b22017-09-14 15:11:15 -070015#include <syscall.h>
Dylan Reid837c74a2016-01-22 17:25:21 -080016#include <unistd.h>
17
yusukes32622542018-01-05 18:59:52 -080018#include <algorithm>
Luis Hector Chavez644d2042017-09-19 18:56:44 -070019#include <map>
Luis Hector Chavez5381d002017-09-16 12:54:24 -070020#include <memory>
yusukesbbc37a72017-11-21 09:51:54 -080021#include <ostream>
Stephen Barber771653f2017-10-04 23:48:57 -070022#include <set>
yusukesbbc37a72017-11-21 09:51:54 -080023#include <sstream>
Luis Hector Chavez5381d002017-09-16 12:54:24 -070024#include <string>
yusukes32622542018-01-05 18:59:52 -080025#include <tuple>
Luis Hector Chavez644d2042017-09-19 18:56:44 -070026#include <utility>
Luis Hector Chavez5381d002017-09-16 12:54:24 -070027#include <vector>
28
29#include <base/bind.h>
30#include <base/bind_helpers.h>
31#include <base/callback_helpers.h>
32#include <base/files/file_path.h>
33#include <base/files/file_util.h>
34#include <base/files/scoped_file.h>
Luis Hector Chavez835d39e2017-09-19 15:16:31 -070035#include <base/logging.h>
Luis Hector Chavez5381d002017-09-16 12:54:24 -070036#include <base/macros.h>
37#include <base/strings/string_util.h>
38#include <base/strings/stringprintf.h>
Luis Hector Chavez836d7b22017-09-14 15:11:15 -070039#include <libminijail.h>
Luis Hector Chavez626f5c82017-09-18 11:19:32 -070040#include <scoped_minijail.h>
Mike Frysinger412dbd22017-01-06 01:50:34 -050041
Luis Hector Chavez76ae9ac2017-09-20 21:13:08 -070042#include "libcontainer/cgroup.h"
Luis Hector Chavez644d2042017-09-19 18:56:44 -070043#include "libcontainer/config.h"
Luis Hector Chavez836d7b22017-09-14 15:11:15 -070044#include "libcontainer/libcontainer.h"
Luis Hector Chavez81efb332017-09-18 14:01:29 -070045#include "libcontainer/libcontainer_util.h"
Yusuke Sato91f11f02016-12-02 16:15:13 -080046
yusukesbbc37a72017-11-21 09:51:54 -080047#define QUOTE(s) ('"' + std::string(s) + '"')
48
Luis Hector Chavez5381d002017-09-16 12:54:24 -070049namespace {
50
Luis Hector Chavez81efb332017-09-18 14:01:29 -070051using libcontainer::DeviceMapperDetach;
52using libcontainer::DeviceMapperSetup;
53using libcontainer::GetUsernsOutsideId;
54using libcontainer::LoopdevDetach;
55using libcontainer::LoopdevSetup;
56using libcontainer::MakeDir;
57using libcontainer::MountExternal;
58using libcontainer::TouchFile;
Mike Frysinger412dbd22017-01-06 01:50:34 -050059
Luis Hector Chavez81efb332017-09-18 14:01:29 -070060constexpr size_t kMaxRlimits = 32; // Linux defines 15 at the time of writing.
Luis Hector Chavez479b95f2016-06-06 08:01:05 -070061
Luis Hector Chavez5381d002017-09-16 12:54:24 -070062struct Mount {
63 std::string name;
64 base::FilePath source;
65 base::FilePath destination;
66 std::string type;
67 std::string data;
68 std::string verity;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -070069 int flags;
70 int uid;
71 int gid;
72 int mode;
Luis Hector Chavez5381d002017-09-16 12:54:24 -070073
74 // True if mount should happen in new vfs ns.
75 bool mount_in_ns;
76
77 // True if target should be created if it doesn't exist.
78 bool create;
79
80 // True if target should be mounted via loopback.
81 bool loopback;
Dylan Reid837c74a2016-01-22 17:25:21 -080082};
83
Luis Hector Chaveze1062e82017-09-18 09:57:37 -070084struct Device {
85 // 'c' or 'b' for char or block
86 char type;
87 base::FilePath path;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -070088 int fs_permissions;
89 int major;
90 int minor;
Luis Hector Chaveze1062e82017-09-18 09:57:37 -070091
Stephen Barber7bae6642017-11-30 10:47:12 -080092 // Copy the major from existing node, ignores |major|.
93 bool copy_major;
Luis Hector Chaveze1062e82017-09-18 09:57:37 -070094 // Copy the minor from existing node, ignores |minor|.
95 bool copy_minor;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -070096 int uid;
97 int gid;
Dylan Reid4843d6b2017-03-31 18:14:30 -070098};
99
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700100struct CgroupDevice {
101 bool allow;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700102 char type;
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700103
104 // -1 for either major or minor means all.
105 int major;
106 int minor;
107
108 bool read;
109 bool write;
110 bool modify;
Dylan Reid837c74a2016-01-22 17:25:21 -0800111};
112
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700113struct CpuCgroup {
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700114 int shares;
115 int quota;
116 int period;
117 int rt_runtime;
118 int rt_period;
Chinyue Chenfac909e2016-06-24 14:17:42 +0800119};
120
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700121struct Rlimit {
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700122 int type;
Luis Hector Chavezda352462018-01-30 09:10:00 -0800123 rlim_t cur;
124 rlim_t max;
Dylan Reid93fa4602017-06-06 13:39:31 -0700125};
126
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700127} // namespace
128
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700129// Structure that configures how the container is run.
Dylan Reid837c74a2016-01-22 17:25:21 -0800130struct container_config {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700131 // Path to the root of the container itself.
132 base::FilePath config_root;
133
134 // Path to the root of the container's filesystem.
135 base::FilePath rootfs;
136
137 // Flags that will be passed to mount() for the rootfs.
yusukesb7b9a042017-12-08 13:14:25 -0800138 unsigned long rootfs_mount_flags = 0x0;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700139
140 // Path to where the container will be run.
141 base::FilePath premounted_runfs;
142
143 // Path to the file where the pid should be written.
144 base::FilePath pid_file_path;
145
146 // The program to run and args, e.g. "/sbin/init".
147 std::vector<std::string> program_argv;
148
149 // The uid the container will run as.
yusukesb7b9a042017-12-08 13:14:25 -0800150 uid_t uid = 0;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700151
152 // Mapping of UIDs in the container, e.g. "0 100000 1024"
153 std::string uid_map;
154
155 // The gid the container will run as.
yusukesb7b9a042017-12-08 13:14:25 -0800156 gid_t gid = 0;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700157
158 // Mapping of GIDs in the container, e.g. "0 100000 1024"
159 std::string gid_map;
160
161 // Syscall table to use or nullptr if none.
162 std::string alt_syscall_table;
163
164 // Filesystems to mount in the new namespace.
Luis Hector Chavez5381d002017-09-16 12:54:24 -0700165 std::vector<Mount> mounts;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700166
Stephen Barber771653f2017-10-04 23:48:57 -0700167 // Namespaces that should be used for the container.
168 std::set<std::string> namespaces;
169
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700170 // Device nodes to create.
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700171 std::vector<Device> devices;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700172
173 // Device node cgroup permissions.
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700174 std::vector<CgroupDevice> cgroup_devices;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700175
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700176 // CPU cgroup params.
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700177 CpuCgroup cpu_cgparams;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700178
179 // Parent dir for cgroup creation
180 base::FilePath cgroup_parent;
181
182 // uid to own the created cgroups
yusukesb7b9a042017-12-08 13:14:25 -0800183 uid_t cgroup_owner = 0;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700184
185 // gid to own the created cgroups
yusukesb7b9a042017-12-08 13:14:25 -0800186 gid_t cgroup_group = 0;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700187
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700188 // Allow the child process to keep open FDs (for stdin/out/err).
yusukesf125f332017-12-08 13:45:15 -0800189 bool keep_fds_open = false;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700190
191 // Array of rlimits for the contained process.
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700192 Rlimit rlimits[kMaxRlimits];
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700193
194 // The number of elements in `rlimits`.
yusukesb7b9a042017-12-08 13:14:25 -0800195 int num_rlimits = 0;
yusukesf125f332017-12-08 13:45:15 -0800196 bool use_capmask = false;
197 bool use_capmask_ambient = false;
yusukesb7b9a042017-12-08 13:14:25 -0800198 uint64_t capmask = 0x0;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700199
200 // The mask of securebits to skip when restricting caps.
yusukesb7b9a042017-12-08 13:14:25 -0800201 uint64_t securebits_skip_mask = 0x0;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700202
203 // Whether the container needs an extra process to be run as init.
yusukesf125f332017-12-08 13:45:15 -0800204 bool do_init = false;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700205
206 // The SELinux context name the container will run under.
207 std::string selinux_context;
208
209 // A function pointer to be called prior to calling execve(2).
yusukesb7b9a042017-12-08 13:14:25 -0800210 minijail_hook_t pre_start_hook = nullptr;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700211
212 // Parameter that will be passed to pre_start_hook().
yusukesb7b9a042017-12-08 13:14:25 -0800213 void* pre_start_hook_payload = nullptr;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700214
Luis Hector Chaveze03926a2017-09-28 17:28:49 -0700215 // A list of file descriptors to inherit.
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700216 std::vector<int> inherited_fds;
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700217
218 // A list of hooks that will be called upon minijail reaching various states
219 // of execution.
220 std::map<minijail_hook_event_t, std::vector<libcontainer::HookCallback>>
221 hooks;
Dylan Reid837c74a2016-01-22 17:25:21 -0800222};
223
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700224// Container manipulation
225struct container {
Luis Hector Chavez76ae9ac2017-09-20 21:13:08 -0700226 std::unique_ptr<libcontainer::Cgroup> cgroup;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700227 ScopedMinijail jail;
Luis Hector Chavez15d0d1a2017-10-12 09:30:19 -0700228 pid_t init_pid = -1;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700229 base::FilePath config_root;
230 base::FilePath runfs;
231 base::FilePath rundir;
232 base::FilePath runfsroot;
233 base::FilePath pid_file_path;
234
235 // Mounts made outside of the minijail.
236 std::vector<base::FilePath> ext_mounts;
237 std::vector<base::FilePath> loopdev_paths;
238 std::vector<std::string> device_mappers;
239 std::string name;
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700240
241 std::vector<std::pair<libcontainer::HookState,
242 std::vector<libcontainer::HookCallback>>>
243 hook_states;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700244};
245
246namespace {
247
yusukes4d955472018-01-17 16:41:32 -0800248std::string GetMountFlagsAsString(int flags) {
249#define CHECK_MOUNT_FLAG(flag) \
250 do { \
251 if (flags & flag) \
252 result.push_back(#flag); \
253 } while (false)
254
255 std::vector<std::string> result;
256 CHECK_MOUNT_FLAG(MS_RDONLY);
257 CHECK_MOUNT_FLAG(MS_NOSUID);
258 CHECK_MOUNT_FLAG(MS_NODEV);
259 CHECK_MOUNT_FLAG(MS_NOEXEC);
260 CHECK_MOUNT_FLAG(MS_SYNCHRONOUS);
261 CHECK_MOUNT_FLAG(MS_REMOUNT);
262 CHECK_MOUNT_FLAG(MS_MANDLOCK);
263 CHECK_MOUNT_FLAG(MS_DIRSYNC);
264 CHECK_MOUNT_FLAG(MS_NOATIME);
265 CHECK_MOUNT_FLAG(MS_NODIRATIME);
266 CHECK_MOUNT_FLAG(MS_BIND);
267 CHECK_MOUNT_FLAG(MS_MOVE);
268 CHECK_MOUNT_FLAG(MS_REC);
269 CHECK_MOUNT_FLAG(MS_SILENT);
270 CHECK_MOUNT_FLAG(MS_POSIXACL);
271 CHECK_MOUNT_FLAG(MS_UNBINDABLE);
272 CHECK_MOUNT_FLAG(MS_PRIVATE);
273 CHECK_MOUNT_FLAG(MS_SLAVE);
274 CHECK_MOUNT_FLAG(MS_SHARED);
275 return result.empty() ? "no flags" : base::JoinString(result, " | ");
276
277#undef CHECK_MOUNT_FLAG
278}
279
yusukesbbc37a72017-11-21 09:51:54 -0800280std::ostream& operator<<(std::ostream& stream, const Mount& mount) {
281 stream << "mount:" << std::endl
282 << " name: " << QUOTE(mount.name) << std::endl
283 << " source: " << QUOTE(mount.source.value()) << std::endl
284 << " destination: " << QUOTE(mount.destination.value()) << std::endl
285 << " type: " << QUOTE(mount.type) << std::endl
286 << " data: " << QUOTE(mount.data) << std::endl
287 << " verity: " << QUOTE(mount.verity) << std::endl
yusukes4d955472018-01-17 16:41:32 -0800288 << " flags: 0x" << std::hex << mount.flags << std::dec << " ("
289 << GetMountFlagsAsString(mount.flags) << ")" << std::endl
yusukesbbc37a72017-11-21 09:51:54 -0800290 << " uid: " << mount.uid << std::endl
291 << " gid: " << mount.gid << std::endl
292 << " mode: 0" << std::oct << mount.mode << std::dec << std::endl
293 << " mount_in_ns: " << mount.mount_in_ns << std::endl
294 << " create: " << mount.create << std::endl
295 << " loopback: " << mount.loopback << std::endl;
296
297 return stream;
298}
299
300std::ostream& operator<<(std::ostream& stream, const Device& device) {
301 stream << "device:" << std::endl
302 << " type: " << device.type << std::endl
303 << " path: " << QUOTE(device.path.value()) << std::endl
304 << " fs_permissions: 0" << std::oct << device.fs_permissions
305 << std::dec << std::endl
306 << " major: " << device.major << std::endl
307 << " minor: " << device.minor << std::endl
308 << " copy_minor: " << device.copy_minor << std::endl
309 << " uid: " << device.uid << std::endl
310 << " gid: " << device.gid << std::endl;
311
312 return stream;
313}
314
315std::ostream& operator<<(std::ostream& stream,
316 const CgroupDevice& cgroup_device) {
317 stream << "cgroup_device:" << std::endl
318 << " allow: " << cgroup_device.allow << std::endl
319 << " type: " << cgroup_device.type << std::endl
320 << " major: " << cgroup_device.major << std::endl
321 << " minor: " << cgroup_device.minor << std::endl
322 << " read: " << cgroup_device.read << std::endl
323 << " write: " << cgroup_device.write << std::endl
324 << " modify: " << cgroup_device.modify << std::endl;
325
326 return stream;
327}
328
329std::ostream& operator<<(std::ostream& stream, const CpuCgroup& cpu_cgroup) {
330 stream << "cpu_cgroup:" << std::endl
331 << " shares: " << cpu_cgroup.shares << std::endl
332 << " quota: " << cpu_cgroup.quota << std::endl
333 << " period: " << cpu_cgroup.period << std::endl
334 << " rt_runtime: " << cpu_cgroup.rt_runtime << std::endl
335 << " rt_period: " << cpu_cgroup.rt_period << std::endl;
336
337 return stream;
338}
339
340std::ostream& operator<<(std::ostream& stream, const Rlimit& rlimit) {
341 stream << "rlimit:" << std::endl
342 << " type: " << rlimit.type << std::endl
343 << " cur: " << rlimit.cur << std::endl
344 << " max: " << rlimit.max << std::endl;
345
346 return stream;
347}
348
yusukes32622542018-01-05 18:59:52 -0800349void DumpConfig(std::ostream* stream,
350 const container_config* c,
351 bool sort_vectors) {
352 *stream << "config_root: " << QUOTE(c->config_root.value()) << std::endl
353 << "rootfs: " << QUOTE(c->rootfs.value()) << std::endl
354 << "rootfs_mount_flags: 0x" << std::hex << c->rootfs_mount_flags
yusukes4d955472018-01-17 16:41:32 -0800355 << std::dec << " (" << GetMountFlagsAsString(c->rootfs_mount_flags)
356 << ")" << std::endl
yusukes32622542018-01-05 18:59:52 -0800357 << "premounted_runfs: " << QUOTE(c->premounted_runfs.value())
358 << std::endl
359 << "pid_file_path: " << QUOTE(c->pid_file_path.value()) << std::endl
360 << "program_argv: size=" << c->program_argv.size() << std::endl;
yusukesbbc37a72017-11-21 09:51:54 -0800361
362 for (const std::string& argv : c->program_argv)
yusukes32622542018-01-05 18:59:52 -0800363 *stream << " " << QUOTE(argv) << std::endl;
yusukesbbc37a72017-11-21 09:51:54 -0800364
yusukes32622542018-01-05 18:59:52 -0800365 *stream << "uid: " << c->uid << std::endl
366 << "uid_map: " << QUOTE(c->uid_map) << std::endl
367 << "gid: " << c->gid << std::endl
368 << "gid_map: " << QUOTE(c->gid_map) << std::endl
369 << "alt_syscall_table: " << QUOTE(c->alt_syscall_table) << std::endl;
yusukesbbc37a72017-11-21 09:51:54 -0800370
yusukes32622542018-01-05 18:59:52 -0800371 auto mount_sorted = c->mounts;
372 if (sort_vectors) {
373 std::stable_sort(mount_sorted.begin(), mount_sorted.end(),
374 [](const Mount& lhs, const Mount& rhs) {
375 return std::make_tuple(lhs.destination.value(),
376 lhs.source.value(), lhs.flags) <
377 std::make_tuple(rhs.destination.value(),
378 rhs.source.value(), rhs.flags);
379 });
380 }
381 for (const auto& mount : mount_sorted)
382 *stream << mount;
yusukesbbc37a72017-11-21 09:51:54 -0800383
yusukes32622542018-01-05 18:59:52 -0800384 *stream << "namespaces: size=" << c->namespaces.size() << std::endl;
yusukesbbc37a72017-11-21 09:51:54 -0800385 for (const std::string& ns : c->namespaces)
yusukes32622542018-01-05 18:59:52 -0800386 *stream << " " << QUOTE(ns) << std::endl;
yusukesbbc37a72017-11-21 09:51:54 -0800387
yusukes32622542018-01-05 18:59:52 -0800388 auto devices_sorted = c->devices;
389 if (sort_vectors) {
390 std::stable_sort(devices_sorted.begin(), devices_sorted.end(),
391 [](const Device& lhs, const Device& rhs) {
392 return lhs.path.value() < rhs.path.value();
393 });
394 }
395 for (const auto& device : devices_sorted)
396 *stream << device;
yusukesbbc37a72017-11-21 09:51:54 -0800397
yusukes32622542018-01-05 18:59:52 -0800398 auto cgroup_devices_sorted = c->cgroup_devices;
399 if (sort_vectors) {
400 std::stable_sort(cgroup_devices_sorted.begin(), cgroup_devices_sorted.end(),
401 [](const CgroupDevice& lhs, const CgroupDevice& rhs) {
402 return std::make_tuple(lhs.type, lhs.major, lhs.minor) <
403 std::make_tuple(rhs.type, rhs.major, rhs.minor);
404 });
405 }
406 for (const auto& cgroup_device : cgroup_devices_sorted)
407 *stream << cgroup_device;
yusukesbbc37a72017-11-21 09:51:54 -0800408
yusukese67af442017-12-23 00:52:15 -0800409 *stream << c->cpu_cgparams
yusukes32622542018-01-05 18:59:52 -0800410 << "cgroup_parent: " << QUOTE(c->cgroup_parent.value()) << std::endl
411 << "cgroup_owner: " << c->cgroup_owner << std::endl
412 << "cgroup_group: " << c->cgroup_group << std::endl
413 << "keep_fds_open: " << c->keep_fds_open << std::endl;
yusukesbbc37a72017-11-21 09:51:54 -0800414
yusukes32622542018-01-05 18:59:52 -0800415 *stream << "num_rlimits: " << c->num_rlimits << std::endl;
yusukesbbc37a72017-11-21 09:51:54 -0800416 for (size_t i = 0; i < c->num_rlimits; ++i)
yusukes32622542018-01-05 18:59:52 -0800417 *stream << c->rlimits[i];
yusukesbbc37a72017-11-21 09:51:54 -0800418
yusukes32622542018-01-05 18:59:52 -0800419 *stream << "use_capmask: " << c->use_capmask << std::endl
420 << "use_capmask_ambient: " << c->use_capmask_ambient << std::endl
421 << "capmask: 0x" << std::hex << c->capmask << std::dec << std::endl
422 << "securebits_skip_mask: 0x" << std::hex << c->securebits_skip_mask
423 << std::dec << std::endl
424 << "do_init: " << c->do_init << std::endl
425 << "selinux_context: " << QUOTE(c->selinux_context) << std::endl
426 << "pre_start_hook: " << reinterpret_cast<void*>(c->pre_start_hook)
427 << std::endl
428 << "pre_start_hook_payload: " << c->pre_start_hook_payload
429 << std::endl
430 << "inherited_fds: size=" << c->inherited_fds.size() << std::endl;
yusukesbbc37a72017-11-21 09:51:54 -0800431
432 for (int fd : c->inherited_fds)
yusukes32622542018-01-05 18:59:52 -0800433 *stream << " " << fd << std::endl;
yusukesbbc37a72017-11-21 09:51:54 -0800434
yusukes32622542018-01-05 18:59:52 -0800435 *stream << "hooks: size=" << c->hooks.size() << std::endl;
yusukesbbc37a72017-11-21 09:51:54 -0800436}
437
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700438// Returns the path for |path_in_container| in the outer namespace.
439base::FilePath GetPathInOuterNamespace(
440 const base::FilePath& root, const base::FilePath& path_in_container) {
441 if (path_in_container.IsAbsolute())
442 return base::FilePath(root.value() + path_in_container.value());
443 return root.Append(path_in_container);
444}
445
446// Make sure the mount target exists in the new rootfs. Create if needed and
447// possible.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700448bool SetupMountDestination(const struct container_config* config,
449 const Mount& mount,
450 const base::FilePath& source,
451 const base::FilePath& dest) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700452 struct stat st_buf;
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700453 if (stat(dest.value().c_str(), &st_buf) == 0) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700454 // destination exists.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700455 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700456 }
457
458 // Try to create the destination. Either make directory or touch a file
459 // depending on the source type.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700460 int uid_userns;
461 if (!GetUsernsOutsideId(config->uid_map, mount.uid, &uid_userns))
462 return false;
463 int gid_userns;
464 if (!GetUsernsOutsideId(config->gid_map, mount.gid, &gid_userns))
465 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700466
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700467 if (stat(source.value().c_str(), &st_buf) != 0 || S_ISDIR(st_buf.st_mode) ||
468 S_ISBLK(st_buf.st_mode)) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700469 return MakeDir(dest, uid_userns, gid_userns, mount.mode);
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700470 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700471
472 return TouchFile(dest, uid_userns, gid_userns, mount.mode);
473}
474
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700475// Unmounts anything we mounted in this mount namespace in the opposite order
476// that they were mounted.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700477bool UnmountExternalMounts(struct container* c) {
478 bool ret = true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700479
480 for (auto it = c->ext_mounts.rbegin(); it != c->ext_mounts.rend(); ++it) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700481 if (umount(it->value().c_str()) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700482 PLOG(ERROR) << "Failed to unmount " << it->value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700483 ret = false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700484 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700485 }
486 c->ext_mounts.clear();
487
488 for (auto it = c->loopdev_paths.rbegin(); it != c->loopdev_paths.rend();
489 ++it) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700490 if (!LoopdevDetach(*it))
491 ret = false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700492 }
493 c->loopdev_paths.clear();
494
495 for (auto it = c->device_mappers.rbegin(); it != c->device_mappers.rend();
496 ++it) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700497 if (!DeviceMapperDetach(*it))
498 ret = false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700499 }
500 c->device_mappers.clear();
501
502 return ret;
503}
504
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700505bool DoContainerMount(struct container* c,
506 const struct container_config* config,
507 const Mount& mount) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700508 base::FilePath dest =
509 GetPathInOuterNamespace(c->runfsroot, mount.destination);
510
511 // If it's a bind mount relative to rootfs, append source to
512 // rootfs path, otherwise source path is absolute.
513 base::FilePath source;
514 if ((mount.flags & MS_BIND) && !mount.source.IsAbsolute()) {
515 source = GetPathInOuterNamespace(c->runfsroot, mount.source);
516 } else if (mount.loopback && !mount.source.IsAbsolute() &&
517 !c->config_root.empty()) {
518 source = GetPathInOuterNamespace(c->config_root, mount.source);
519 } else {
520 source = mount.source;
521 }
522
523 // Only create the destinations for external mounts, minijail will take
524 // care of those mounted in the new namespace.
525 if (mount.create && !mount.mount_in_ns) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700526 if (!SetupMountDestination(config, mount, source, dest))
527 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700528 }
529 if (mount.loopback) {
530 // Record this loopback file for cleanup later.
531 base::FilePath loop_source = source;
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700532 if (!LoopdevSetup(loop_source, &source))
533 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700534
535 // Save this to cleanup when shutting down.
536 c->loopdev_paths.push_back(source);
537 }
538 if (!mount.verity.empty()) {
539 // Set this device up via dm-verity.
540 std::string dm_name;
541 base::FilePath dm_source = source;
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700542 if (!DeviceMapperSetup(dm_source, mount.verity, &source, &dm_name))
543 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700544
545 // Save this to cleanup when shutting down.
546 c->device_mappers.push_back(dm_name);
547 }
548 if (mount.mount_in_ns) {
549 // We can mount this with minijail.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700550 if (minijail_mount_with_data(
551 c->jail.get(), source.value().c_str(),
552 mount.destination.value().c_str(), mount.type.c_str(), mount.flags,
553 mount.data.empty() ? nullptr : mount.data.c_str()) != 0) {
554 return false;
555 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700556 } else {
557 // Mount this externally and unmount it on exit.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700558 if (!MountExternal(source.value(), dest.value(), mount.type, mount.flags,
559 mount.data)) {
560 return false;
561 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700562 // Save this to unmount when shutting down.
563 c->ext_mounts.push_back(dest);
564 }
565
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700566 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700567}
568
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700569bool DoContainerMounts(struct container* c,
570 const struct container_config* config) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700571 UnmountExternalMounts(c);
572
573 // This will run in all the error cases.
574 base::ScopedClosureRunner teardown(base::Bind(
575 base::IgnoreResult(&UnmountExternalMounts), base::Unretained(c)));
576
577 for (const auto& mount : config->mounts) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700578 if (!DoContainerMount(c, config, mount))
579 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700580 }
581
582 // The mounts have been done successfully, no need to tear them down anymore.
583 ignore_result(teardown.Release());
584
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700585 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700586}
587
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700588bool ContainerCreateDevice(const struct container* c,
589 const struct container_config* config,
590 const Device& dev,
Stephen Barber7bae6642017-11-30 10:47:12 -0800591 int major,
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700592 int minor) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700593 mode_t mode = dev.fs_permissions;
594 switch (dev.type) {
595 case 'b':
596 mode |= S_IFBLK;
597 break;
598 case 'c':
599 mode |= S_IFCHR;
600 break;
601 default:
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700602 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700603 }
604
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700605 int uid_userns;
606 if (!GetUsernsOutsideId(config->uid_map, dev.uid, &uid_userns))
607 return false;
608 int gid_userns;
609 if (!GetUsernsOutsideId(config->gid_map, dev.gid, &gid_userns))
610 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700611
612 base::FilePath path = GetPathInOuterNamespace(c->runfsroot, dev.path);
Luis Hector Chavez92278e82017-10-16 11:30:27 -0700613 if (!libcontainer::CreateDirectoryOwnedBy(path.DirName(), 0755, uid_userns,
614 gid_userns)) {
Luis Hector Chavez5d51abb2017-10-11 17:05:57 -0700615 PLOG(ERROR) << "Failed to create parent directory for " << path.value();
616 return false;
617 }
Stephen Barber7bae6642017-11-30 10:47:12 -0800618 if (mknod(path.value().c_str(), mode, makedev(major, minor)) != 0 &&
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700619 errno != EEXIST) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700620 PLOG(ERROR) << "Failed to mknod " << path.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700621 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700622 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700623 if (chown(path.value().c_str(), uid_userns, gid_userns) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700624 PLOG(ERROR) << "Failed to chown " << path.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700625 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700626 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700627 if (chmod(path.value().c_str(), dev.fs_permissions) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700628 PLOG(ERROR) << "Failed to chmod " << path.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700629 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700630 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700631
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700632 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700633}
634
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700635bool MountRunfs(struct container* c, const struct container_config* config) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700636 {
637 std::string runfs_template = base::StringPrintf(
638 "%s/%s_XXXXXX", c->rundir.value().c_str(), c->name.c_str());
639 // TODO(lhchavez): Replace this with base::CreateTemporaryDirInDir().
640 char* runfs_path = mkdtemp(const_cast<char*>(runfs_template.c_str()));
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700641 if (!runfs_path) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700642 PLOG(ERROR) << "Failed to mkdtemp in " << c->rundir.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700643 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700644 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700645 c->runfs = base::FilePath(runfs_path);
646 }
647
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700648 int uid_userns;
649 if (!GetUsernsOutsideId(config->uid_map, config->uid, &uid_userns))
650 return false;
651 int gid_userns;
652 if (!GetUsernsOutsideId(config->gid_map, config->gid, &gid_userns))
653 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700654
655 // Make sure the container uid can access the rootfs.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700656 if (chmod(c->runfs.value().c_str(), 0700) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700657 PLOG(ERROR) << "Failed to chmod " << c->runfs.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700658 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700659 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700660 if (chown(c->runfs.value().c_str(), uid_userns, gid_userns) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700661 PLOG(ERROR) << "Failed to chown " << c->runfs.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700662 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700663 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700664
665 c->runfsroot = c->runfs.Append("root");
666
667 constexpr mode_t kRootDirMode = 0660;
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700668 if (mkdir(c->runfsroot.value().c_str(), kRootDirMode) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700669 PLOG(ERROR) << "Failed to mkdir " << c->runfsroot.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700670 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700671 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700672 if (chmod(c->runfsroot.value().c_str(), kRootDirMode) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700673 PLOG(ERROR) << "Failed to chmod " << c->runfsroot.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700674 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700675 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700676
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700677 if (mount(config->rootfs.value().c_str(), c->runfsroot.value().c_str(), "",
678 MS_BIND | (config->rootfs_mount_flags & MS_REC), nullptr) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700679 PLOG(ERROR) << "Failed to bind-mount " << config->rootfs.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700680 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700681 }
682
683 // MS_BIND ignores any flags passed to it (except MS_REC). We need a
684 // second call to mount() to actually set them.
685 if (config->rootfs_mount_flags &&
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700686 mount(config->rootfs.value().c_str(), c->runfsroot.value().c_str(), "",
687 (config->rootfs_mount_flags & ~MS_REC), nullptr) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700688 PLOG(ERROR) << "Failed to remount " << c->runfsroot.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700689 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700690 }
691
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700692 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700693}
694
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700695bool CreateDeviceNodes(struct container* c,
696 const struct container_config* config,
697 pid_t container_pid) {
698 for (const auto& dev : config->devices) {
Stephen Barber7bae6642017-11-30 10:47:12 -0800699 int major = dev.major;
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700700 int minor = dev.minor;
701
Stephen Barber7bae6642017-11-30 10:47:12 -0800702 if (dev.copy_major || dev.copy_minor) {
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700703 struct stat st_buff;
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700704 if (stat(dev.path.value().c_str(), &st_buff) != 0)
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700705 continue;
Stephen Barber7bae6642017-11-30 10:47:12 -0800706
707 if (dev.copy_major)
708 major = major(st_buff.st_rdev);
709 if (dev.copy_minor)
710 minor = minor(st_buff.st_rdev);
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700711 }
Stephen Barber7bae6642017-11-30 10:47:12 -0800712 if (major < 0 || minor < 0)
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700713 continue;
Stephen Barber7bae6642017-11-30 10:47:12 -0800714 if (!ContainerCreateDevice(c, config, dev, major, minor))
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700715 return false;
716 }
717
718 return true;
719}
720
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700721bool DeviceSetup(struct container* c, const struct container_config* config) {
Luis Hector Chavez76ae9ac2017-09-20 21:13:08 -0700722 c->cgroup->DenyAllDevices();
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700723
724 for (const auto& dev : config->cgroup_devices) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700725 if (!c->cgroup->AddDevice(dev.allow, dev.major, dev.minor, dev.read,
726 dev.write, dev.modify, dev.type)) {
727 return false;
728 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700729 }
730
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700731 for (const auto& loopdev_path : c->loopdev_paths) {
732 struct stat st;
733
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700734 if (stat(loopdev_path.value().c_str(), &st) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700735 PLOG(ERROR) << "Failed to stat " << loopdev_path.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700736 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700737 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700738 if (!c->cgroup->AddDevice(1, major(st.st_rdev), minor(st.st_rdev), 1, 0, 0,
739 'b')) {
740 return false;
741 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700742 }
743
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700744 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700745}
746
747int Setexeccon(void* payload) {
748 char* init_domain = reinterpret_cast<char*>(payload);
749 pid_t tid = syscall(SYS_gettid);
750
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700751 if (tid < 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700752 PLOG(ERROR) << "Failed to gettid";
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700753 return -errno;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700754 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700755
756 std::string exec_path =
757 base::StringPrintf("/proc/self/task/%d/attr/exec", tid);
758
759 base::ScopedFD fd(open(exec_path.c_str(), O_WRONLY | O_CLOEXEC));
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700760 if (!fd.is_valid()) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700761 PLOG(ERROR) << "Failed to open " << exec_path;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700762 return -errno;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700763 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700764
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700765 if (!base::WriteFileDescriptor(fd.get(), init_domain, strlen(init_domain))) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700766 PLOG(ERROR) << "Failed to write the SELinux label to " << exec_path;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700767 return -errno;
768 }
769
770 return 0;
771}
772
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700773bool ContainerTeardown(struct container* c) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700774 UnmountExternalMounts(c);
775 if (!c->runfsroot.empty() && !c->runfs.empty()) {
776 /* |c->runfsroot| may have been mounted recursively. Thus use
777 * MNT_DETACH to "immediately disconnect the filesystem and all
778 * filesystems mounted below it from each other and from the
779 * mount table". Otherwise one would need to unmount every
780 * single dependent mount before unmounting |c->runfsroot|
781 * itself.
782 */
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700783 if (umount2(c->runfsroot.value().c_str(), MNT_DETACH) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700784 PLOG(ERROR) << "Failed to detach " << c->runfsroot.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700785 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700786 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700787 if (rmdir(c->runfsroot.value().c_str()) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700788 PLOG(ERROR) << "Failed to rmdir " << c->runfsroot.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700789 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700790 }
Luis Hector Chavez15d0d1a2017-10-12 09:30:19 -0700791 c->runfsroot = base::FilePath();
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700792 }
793 if (!c->pid_file_path.empty()) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700794 if (unlink(c->pid_file_path.value().c_str()) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700795 PLOG(ERROR) << "Failed to unlink " << c->pid_file_path.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700796 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700797 }
Luis Hector Chavez15d0d1a2017-10-12 09:30:19 -0700798 c->pid_file_path = base::FilePath();
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700799 }
800 if (!c->runfs.empty()) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700801 if (rmdir(c->runfs.value().c_str()) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700802 PLOG(ERROR) << "Failed to rmdir " << c->runfs.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700803 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700804 }
Luis Hector Chavez15d0d1a2017-10-12 09:30:19 -0700805 c->runfs = base::FilePath();
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700806 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700807 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700808}
809
Luis Hector Chavez15d0d1a2017-10-12 09:30:19 -0700810void CancelContainerStart(struct container* c) {
811 if (c->init_pid != -1)
812 container_kill(c);
813 ContainerTeardown(c);
814}
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700815
816} // namespace
817
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700818struct container_config* container_config_create() {
Luis Hector Chavez5381d002017-09-16 12:54:24 -0700819 return new (std::nothrow) struct container_config();
Dylan Reid837c74a2016-01-22 17:25:21 -0800820}
821
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700822void container_config_destroy(struct container_config* c) {
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700823 if (c == nullptr)
824 return;
Luis Hector Chavez5381d002017-09-16 12:54:24 -0700825 delete c;
Dylan Reid837c74a2016-01-22 17:25:21 -0800826}
827
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700828int container_config_config_root(struct container_config* c,
829 const char* config_root) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700830 c->config_root = base::FilePath(config_root);
831 return 0;
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500832}
833
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700834const char* container_config_get_config_root(const struct container_config* c) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700835 return c->config_root.value().c_str();
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500836}
837
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700838int container_config_rootfs(struct container_config* c, const char* rootfs) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700839 c->rootfs = base::FilePath(rootfs);
840 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800841}
842
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700843const char* container_config_get_rootfs(const struct container_config* c) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700844 return c->rootfs.value().c_str();
Dylan Reid11456722016-05-02 11:24:50 -0700845}
846
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700847void container_config_rootfs_mount_flags(struct container_config* c,
848 unsigned long rootfs_mount_flags) {
849 /* Since we are going to add MS_REMOUNT anyways, add it here so we can
850 * simply check against zero later. MS_BIND is also added to avoid
851 * re-mounting the original filesystem, since the rootfs is always
852 * bind-mounted.
853 */
854 c->rootfs_mount_flags = MS_REMOUNT | MS_BIND | rootfs_mount_flags;
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -0700855}
856
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700857unsigned long container_config_get_rootfs_mount_flags(
858 const struct container_config* c) {
859 return c->rootfs_mount_flags;
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -0700860}
861
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700862int container_config_premounted_runfs(struct container_config* c,
863 const char* runfs) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700864 c->premounted_runfs = base::FilePath(runfs);
865 return 0;
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700866}
867
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700868const char* container_config_get_premounted_runfs(
869 const struct container_config* c) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700870 return c->premounted_runfs.value().c_str();
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700871}
872
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700873int container_config_pid_file(struct container_config* c, const char* path) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700874 c->pid_file_path = base::FilePath(path);
875 return 0;
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700876}
877
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700878const char* container_config_get_pid_file(const struct container_config* c) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700879 return c->pid_file_path.value().c_str();
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700880}
881
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700882int container_config_program_argv(struct container_config* c,
883 const char** argv,
884 size_t num_args) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700885 if (num_args < 1) {
886 errno = EINVAL;
887 return -1;
888 }
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700889 c->program_argv.clear();
890 c->program_argv.reserve(num_args);
891 for (size_t i = 0; i < num_args; ++i)
892 c->program_argv.emplace_back(argv[i]);
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700893 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800894}
895
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700896size_t container_config_get_num_program_args(const struct container_config* c) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700897 return c->program_argv.size();
Dylan Reid11456722016-05-02 11:24:50 -0700898}
899
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700900const char* container_config_get_program_arg(const struct container_config* c,
901 size_t index) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700902 if (index >= c->program_argv.size())
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700903 return nullptr;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700904 return c->program_argv[index].c_str();
Dylan Reid11456722016-05-02 11:24:50 -0700905}
906
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700907void container_config_uid(struct container_config* c, uid_t uid) {
908 c->uid = uid;
Dylan Reid1874feb2016-06-22 17:53:50 -0700909}
910
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700911uid_t container_config_get_uid(const struct container_config* c) {
912 return c->uid;
Dylan Reid1874feb2016-06-22 17:53:50 -0700913}
914
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700915int container_config_uid_map(struct container_config* c, const char* uid_map) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700916 c->uid_map = uid_map;
917 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800918}
919
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700920void container_config_gid(struct container_config* c, gid_t gid) {
921 c->gid = gid;
Dylan Reid1874feb2016-06-22 17:53:50 -0700922}
923
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700924gid_t container_config_get_gid(const struct container_config* c) {
925 return c->gid;
Dylan Reid1874feb2016-06-22 17:53:50 -0700926}
927
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700928int container_config_gid_map(struct container_config* c, const char* gid_map) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700929 c->gid_map = gid_map;
930 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800931}
932
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700933int container_config_alt_syscall_table(struct container_config* c,
934 const char* alt_syscall_table) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700935 c->alt_syscall_table = alt_syscall_table;
936 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800937}
938
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700939int container_config_add_rlimit(struct container_config* c,
940 int type,
Luis Hector Chavezda352462018-01-30 09:10:00 -0800941 rlim_t cur,
942 rlim_t max) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700943 if (c->num_rlimits >= kMaxRlimits) {
944 errno = ENOMEM;
945 return -1;
946 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700947 c->rlimits[c->num_rlimits].type = type;
948 c->rlimits[c->num_rlimits].cur = cur;
949 c->rlimits[c->num_rlimits].max = max;
950 c->num_rlimits++;
951 return 0;
Dylan Reid93fa4602017-06-06 13:39:31 -0700952}
953
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700954int container_config_add_mount(struct container_config* c,
955 const char* name,
956 const char* source,
957 const char* destination,
958 const char* type,
959 const char* data,
960 const char* verity,
961 int flags,
962 int uid,
963 int gid,
964 int mode,
965 int mount_in_ns,
966 int create,
967 int loopback) {
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700968 if (name == nullptr || source == nullptr || destination == nullptr ||
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700969 type == nullptr) {
970 errno = EINVAL;
971 return -1;
972 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800973
Luis Hector Chavez5381d002017-09-16 12:54:24 -0700974 c->mounts.emplace_back(Mount{name,
975 base::FilePath(source),
976 base::FilePath(destination),
977 type,
978 data ? data : "",
979 verity ? verity : "",
980 flags,
981 uid,
982 gid,
983 mode,
984 mount_in_ns != 0,
985 create != 0,
986 loopback != 0});
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700987
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700988 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800989}
990
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700991int container_config_add_cgroup_device(struct container_config* c,
992 int allow,
993 char type,
994 int major,
995 int minor,
996 int read,
997 int write,
998 int modify) {
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700999 c->cgroup_devices.emplace_back(CgroupDevice{
1000 allow != 0, type, major, minor, read != 0, write != 0, modify != 0});
Dylan Reid4843d6b2017-03-31 18:14:30 -07001001
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001002 return 0;
Dylan Reid4843d6b2017-03-31 18:14:30 -07001003}
1004
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001005int container_config_add_device(struct container_config* c,
1006 char type,
1007 const char* path,
1008 int fs_permissions,
1009 int major,
1010 int minor,
Stephen Barber7bae6642017-11-30 10:47:12 -08001011 int copy_major,
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001012 int copy_minor,
1013 int uid,
1014 int gid,
1015 int read_allowed,
1016 int write_allowed,
1017 int modify_allowed) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001018 if (path == nullptr) {
1019 errno = EINVAL;
1020 return -1;
1021 }
Stephen Barber7bae6642017-11-30 10:47:12 -08001022 /* If using a dynamic major/minor number, ensure that major/minor is -1. */
1023 if ((copy_major && (major != -1)) || (copy_minor && (minor != -1))) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001024 errno = EINVAL;
1025 return -1;
1026 }
Dylan Reid355d5e42016-04-29 16:53:31 -07001027
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001028 if (read_allowed || write_allowed || modify_allowed) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001029 if (container_config_add_cgroup_device(c, 1, type, major, minor,
1030 read_allowed, write_allowed,
1031 modify_allowed) != 0) {
1032 errno = ENOMEM;
1033 return -1;
Luis Hector Chaveze1062e82017-09-18 09:57:37 -07001034 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001035 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001036
Luis Hector Chaveze1062e82017-09-18 09:57:37 -07001037 c->devices.emplace_back(Device{
Stephen Barber7bae6642017-11-30 10:47:12 -08001038 type, base::FilePath(path), fs_permissions, major, minor, copy_major != 0,
1039 copy_minor != 0, uid, gid,
Luis Hector Chaveze1062e82017-09-18 09:57:37 -07001040 });
1041
1042 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -08001043}
1044
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001045int container_config_set_cpu_shares(struct container_config* c, int shares) {
1046 /* CPU shares must be 2 or higher. */
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001047 if (shares < 2) {
1048 errno = EINVAL;
1049 return -1;
1050 }
Chinyue Chenfac909e2016-06-24 14:17:42 +08001051
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001052 c->cpu_cgparams.shares = shares;
1053 return 0;
Chinyue Chenfac909e2016-06-24 14:17:42 +08001054}
1055
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001056int container_config_set_cpu_cfs_params(struct container_config* c,
1057 int quota,
1058 int period) {
1059 /*
1060 * quota could be set higher than period to utilize more than one CPU.
1061 * quota could also be set as -1 to indicate the cgroup does not adhere
1062 * to any CPU time restrictions.
1063 */
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001064 if (quota <= 0 && quota != -1) {
1065 errno = EINVAL;
1066 return -1;
1067 }
1068 if (period <= 0) {
1069 errno = EINVAL;
1070 return -1;
1071 }
Chinyue Chenfac909e2016-06-24 14:17:42 +08001072
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001073 c->cpu_cgparams.quota = quota;
1074 c->cpu_cgparams.period = period;
1075 return 0;
Chinyue Chenfac909e2016-06-24 14:17:42 +08001076}
1077
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001078int container_config_set_cpu_rt_params(struct container_config* c,
1079 int rt_runtime,
1080 int rt_period) {
1081 /*
1082 * rt_runtime could be set as 0 to prevent the cgroup from using
1083 * realtime CPU.
1084 */
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001085 if (rt_runtime < 0 || rt_runtime >= rt_period) {
1086 errno = EINVAL;
1087 return -1;
1088 }
Chinyue Chenfac909e2016-06-24 14:17:42 +08001089
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001090 c->cpu_cgparams.rt_runtime = rt_runtime;
1091 c->cpu_cgparams.rt_period = rt_period;
1092 return 0;
Chinyue Chenfac909e2016-06-24 14:17:42 +08001093}
1094
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001095int container_config_get_cpu_shares(struct container_config* c) {
1096 return c->cpu_cgparams.shares;
Chinyue Chen4f3fd682016-07-01 14:11:42 +08001097}
1098
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001099int container_config_get_cpu_quota(struct container_config* c) {
1100 return c->cpu_cgparams.quota;
Chinyue Chen4f3fd682016-07-01 14:11:42 +08001101}
1102
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001103int container_config_get_cpu_period(struct container_config* c) {
1104 return c->cpu_cgparams.period;
Chinyue Chen4f3fd682016-07-01 14:11:42 +08001105}
1106
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001107int container_config_get_cpu_rt_runtime(struct container_config* c) {
1108 return c->cpu_cgparams.rt_runtime;
Chinyue Chen4f3fd682016-07-01 14:11:42 +08001109}
1110
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001111int container_config_get_cpu_rt_period(struct container_config* c) {
1112 return c->cpu_cgparams.rt_period;
Chinyue Chen4f3fd682016-07-01 14:11:42 +08001113}
1114
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001115int container_config_set_cgroup_parent(struct container_config* c,
1116 const char* parent,
1117 uid_t cgroup_owner,
1118 gid_t cgroup_group) {
1119 c->cgroup_owner = cgroup_owner;
1120 c->cgroup_group = cgroup_group;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001121 c->cgroup_parent = base::FilePath(parent);
1122 return 0;
Dylan Reid9e724af2016-07-21 09:58:07 -07001123}
1124
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001125const char* container_config_get_cgroup_parent(struct container_config* c) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001126 return c->cgroup_parent.value().c_str();
Dylan Reid9e724af2016-07-21 09:58:07 -07001127}
1128
Stephen Barber771653f2017-10-04 23:48:57 -07001129int container_config_namespaces(struct container_config* c,
1130 const char** namespaces,
1131 size_t num_ns) {
1132 if (num_ns < 1)
1133 return -EINVAL;
1134 c->namespaces.clear();
1135 for (size_t i = 0; i < num_ns; ++i)
1136 c->namespaces.emplace(namespaces[i]);
1137 return 0;
Keshav Santhanam1b6bf672016-08-10 18:35:12 -07001138}
1139
Stephen Barber771653f2017-10-04 23:48:57 -07001140size_t container_config_get_num_namespaces(const struct container_config* c) {
1141 return c->namespaces.size();
1142}
1143
1144bool container_config_has_namespace(const struct container_config* c,
1145 const char* ns) {
1146 return c->namespaces.find(ns) != c->namespaces.end();
Keshav Santhanam1b6bf672016-08-10 18:35:12 -07001147}
1148
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001149void container_config_keep_fds_open(struct container_config* c) {
yusukesf125f332017-12-08 13:45:15 -08001150 c->keep_fds_open = true;
Dylan Reidc4335842016-11-11 10:24:52 -08001151}
1152
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001153void container_config_set_capmask(struct container_config* c,
1154 uint64_t capmask,
1155 int ambient) {
yusukesf125f332017-12-08 13:45:15 -08001156 c->use_capmask = true;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001157 c->capmask = capmask;
1158 c->use_capmask_ambient = ambient;
Luis Hector Chavezff5978f2017-06-27 12:52:58 -07001159}
1160
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001161void container_config_set_securebits_skip_mask(struct container_config* c,
1162 uint64_t securebits_skip_mask) {
1163 c->securebits_skip_mask = securebits_skip_mask;
Luis Hector Chavezcd44ba72017-06-30 13:01:38 -07001164}
1165
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001166void container_config_set_run_as_init(struct container_config* c,
1167 int run_as_init) {
1168 c->do_init = !run_as_init;
Luis Hector Chavezdac65c32017-07-21 10:30:23 -07001169}
1170
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001171int container_config_set_selinux_context(struct container_config* c,
1172 const char* context) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001173 if (!context) {
1174 errno = EINVAL;
1175 return -1;
1176 }
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001177 c->selinux_context = context;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001178 return 0;
Luis Hector Chavez15e8e672017-07-20 15:13:27 -07001179}
1180
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001181void container_config_set_pre_execve_hook(struct container_config* c,
1182 int (*hook)(void*),
1183 void* payload) {
1184 c->pre_start_hook = hook;
1185 c->pre_start_hook_payload = payload;
Luis Hector Chavezf8e8f4c2017-08-01 01:09:39 -07001186}
1187
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001188void container_config_add_hook(struct container_config* c,
1189 minijail_hook_event_t event,
1190 libcontainer::HookCallback callback) {
1191 auto it = c->hooks.insert(
1192 std::make_pair(event, std::vector<libcontainer::HookCallback>()));
1193 it.first->second.emplace_back(std::move(callback));
1194}
1195
Luis Hector Chaveze03926a2017-09-28 17:28:49 -07001196int container_config_add_hook(struct container_config* c,
1197 minijail_hook_event_t event,
1198 const char* filename,
1199 const char** argv,
1200 size_t num_args,
1201 int* pstdin_fd,
1202 int* pstdout_fd,
1203 int* pstderr_fd) {
1204 std::vector<std::string> args;
1205 args.reserve(num_args);
1206 for (size_t i = 0; i < num_args; ++i)
1207 args.emplace_back(argv[i]);
1208
1209 // First element of the array belongs to the parent and the second one belongs
1210 // to the child.
1211 base::ScopedFD stdin_fds[2], stdout_fds[2], stderr_fds[2];
1212 if (pstdin_fd) {
1213 if (!libcontainer::Pipe2(&stdin_fds[1], &stdin_fds[0], 0))
1214 return -1;
1215 }
1216 if (pstdout_fd) {
1217 if (!libcontainer::Pipe2(&stdout_fds[0], &stdout_fds[0], 0))
1218 return -1;
1219 }
1220 if (pstderr_fd) {
1221 if (!libcontainer::Pipe2(&stderr_fds[0], &stderr_fds[0], 0))
1222 return -1;
1223 }
1224
1225 // After this point the call has been successful, so we can now commit to
1226 // whatever pipes we have opened.
1227 if (pstdin_fd) {
1228 *pstdin_fd = stdin_fds[0].release();
1229 c->inherited_fds.emplace_back(stdin_fds[1].get());
1230 }
1231 if (pstdout_fd) {
1232 *pstdout_fd = stdout_fds[0].release();
1233 c->inherited_fds.emplace_back(stdout_fds[1].get());
1234 }
1235 if (pstderr_fd) {
1236 *pstderr_fd = stderr_fds[0].release();
1237 c->inherited_fds.emplace_back(stderr_fds[1].get());
1238 }
1239 container_config_add_hook(
1240 c, event,
1241 libcontainer::CreateExecveCallback(
1242 base::FilePath(filename), args, std::move(stdin_fds[1]),
1243 std::move(stdout_fds[1]), std::move(stderr_fds[1])));
1244 return 0;
1245}
1246
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001247int container_config_inherit_fds(struct container_config* c,
1248 int* inherited_fds,
1249 size_t inherited_fd_count) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001250 if (!c->inherited_fds.empty()) {
1251 errno = EINVAL;
1252 return -1;
1253 }
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001254 for (size_t i = 0; i < inherited_fd_count; ++i)
1255 c->inherited_fds.emplace_back(inherited_fds[i]);
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001256 return 0;
Luis Hector Chavezf8e8f4c2017-08-01 01:09:39 -07001257}
1258
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001259struct container* container_new(const char* name, const char* rundir) {
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001260 struct container* c = new (std::nothrow) container();
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001261 if (!c)
1262 return nullptr;
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001263 c->rundir = base::FilePath(rundir);
1264 c->name = name;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001265 return c;
Dylan Reid837c74a2016-01-22 17:25:21 -08001266}
1267
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001268void container_destroy(struct container* c) {
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001269 delete c;
Dylan Reid837c74a2016-01-22 17:25:21 -08001270}
1271
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001272int container_start(struct container* c,
1273 const struct container_config* config) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001274 if (!c) {
1275 errno = EINVAL;
1276 return -1;
1277 }
1278 if (!config) {
1279 errno = EINVAL;
1280 return -1;
1281 }
1282 if (config->program_argv.empty()) {
1283 errno = EINVAL;
1284 return -1;
1285 }
Dylan Reide040c6b2016-05-02 18:49:02 -07001286
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001287 // This will run in all the error cases.
1288 base::ScopedClosureRunner teardown(
Luis Hector Chavez15d0d1a2017-10-12 09:30:19 -07001289 base::Bind(&CancelContainerStart, base::Unretained(c)));
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001290
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001291 if (!config->config_root.empty())
1292 c->config_root = config->config_root;
1293 if (!config->premounted_runfs.empty()) {
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001294 c->runfs.clear();
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001295 c->runfsroot = config->premounted_runfs;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001296 } else {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001297 if (!MountRunfs(c, config))
1298 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001299 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001300
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001301 c->jail.reset(minijail_new());
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001302 if (!c->jail) {
1303 errno = ENOMEM;
1304 return -1;
1305 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001306
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001307 if (!DoContainerMounts(c, config))
1308 return -1;
Dylan Reid837c74a2016-01-22 17:25:21 -08001309
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001310 int cgroup_uid;
1311 if (!GetUsernsOutsideId(config->uid_map, config->cgroup_owner, &cgroup_uid))
1312 return -1;
1313 int cgroup_gid;
1314 if (!GetUsernsOutsideId(config->gid_map, config->cgroup_group, &cgroup_gid))
1315 return -1;
Stephen Barber1a398c72017-01-23 12:39:44 -08001316
Luis Hector Chavez76ae9ac2017-09-20 21:13:08 -07001317 c->cgroup = libcontainer::Cgroup::Create(c->name,
1318 base::FilePath("/sys/fs/cgroup"),
1319 config->cgroup_parent,
1320 cgroup_uid,
1321 cgroup_gid);
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001322 if (!c->cgroup)
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001323 return -1;
Dylan Reida9966422016-07-21 10:11:34 -07001324
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001325 // Must be root to modify device cgroup or mknod.
1326 std::map<minijail_hook_event_t, std::vector<libcontainer::HookCallback>>
1327 hook_callbacks;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001328 if (getuid() == 0) {
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001329 if (!config->devices.empty()) {
1330 // Create the devices in the mount namespace.
1331 auto it = hook_callbacks.insert(
1332 std::make_pair(MINIJAIL_HOOK_EVENT_PRE_CHROOT,
1333 std::vector<libcontainer::HookCallback>()));
1334 it.first->second.emplace_back(
1335 libcontainer::AdaptCallbackToRunInNamespaces(
1336 base::Bind(&CreateDeviceNodes, base::Unretained(c),
1337 base::Unretained(config)),
1338 {CLONE_NEWNS}));
1339 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001340 if (!DeviceSetup(c, config))
1341 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001342 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001343
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001344 /* Setup CPU cgroup params. */
1345 if (config->cpu_cgparams.shares) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001346 if (!c->cgroup->SetCpuShares(config->cpu_cgparams.shares))
1347 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001348 }
1349 if (config->cpu_cgparams.period) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001350 if (!c->cgroup->SetCpuQuota(config->cpu_cgparams.quota))
1351 return -1;
1352 if (!c->cgroup->SetCpuPeriod(config->cpu_cgparams.period))
1353 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001354 }
1355 if (config->cpu_cgparams.rt_period) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001356 if (!c->cgroup->SetCpuRtRuntime(config->cpu_cgparams.rt_runtime))
1357 return -1;
1358 if (!c->cgroup->SetCpuRtPeriod(config->cpu_cgparams.rt_period))
1359 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001360 }
Chinyue Chenfac909e2016-06-24 14:17:42 +08001361
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001362 /* Setup and start the container with libminijail. */
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001363 if (!config->pid_file_path.empty())
1364 c->pid_file_path = config->pid_file_path;
1365 else if (!c->runfs.empty())
1366 c->pid_file_path = c->runfs.Append("container.pid");
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001367
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001368 if (!c->pid_file_path.empty())
1369 minijail_write_pid_file(c->jail.get(), c->pid_file_path.value().c_str());
1370 minijail_reset_signal_mask(c->jail.get());
Luis Hector Chavezcd9a6b62018-04-04 08:39:44 -07001371 minijail_reset_signal_handlers(c->jail.get());
Dylan Reid837c74a2016-01-22 17:25:21 -08001372
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001373 /* Setup container namespaces. */
Stephen Barber771653f2017-10-04 23:48:57 -07001374 if (container_config_has_namespace(config, "ipc"))
1375 minijail_namespace_ipc(c->jail.get());
1376 if (container_config_has_namespace(config, "mount"))
1377 minijail_namespace_vfs(c->jail.get());
1378 if (container_config_has_namespace(config, "network"))
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001379 minijail_namespace_net(c->jail.get());
Stephen Barber771653f2017-10-04 23:48:57 -07001380 if (container_config_has_namespace(config, "pid"))
1381 minijail_namespace_pids(c->jail.get());
1382
1383 if (container_config_has_namespace(config, "user")) {
1384 minijail_namespace_user(c->jail.get());
1385 if (minijail_uidmap(c->jail.get(), config->uid_map.c_str()) != 0)
1386 return -1;
1387 if (minijail_gidmap(c->jail.get(), config->gid_map.c_str()) != 0)
1388 return -1;
1389 }
1390
1391 if (container_config_has_namespace(config, "cgroup"))
1392 minijail_namespace_cgroups(c->jail.get());
1393
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001394 if (getuid() != 0)
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001395 minijail_namespace_user_disable_setgroups(c->jail.get());
Dylan Reid837c74a2016-01-22 17:25:21 -08001396
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001397 /* Set the UID/GID inside the container if not 0. */
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001398 if (!GetUsernsOutsideId(config->uid_map, config->uid, nullptr))
1399 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001400 else if (config->uid > 0)
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001401 minijail_change_uid(c->jail.get(), config->uid);
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001402 if (!GetUsernsOutsideId(config->gid_map, config->gid, nullptr))
1403 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001404 else if (config->gid > 0)
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001405 minijail_change_gid(c->jail.get(), config->gid);
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001406
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001407 if (minijail_enter_pivot_root(c->jail.get(), c->runfsroot.value().c_str()) !=
1408 0) {
1409 return -1;
1410 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001411
Luis Hector Chavez76ae9ac2017-09-20 21:13:08 -07001412 // Add the cgroups configured above.
1413 for (int32_t i = 0; i < libcontainer::Cgroup::Type::NUM_TYPES; i++) {
1414 if (c->cgroup->has_tasks_path(i)) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001415 if (minijail_add_to_cgroup(
1416 c->jail.get(), c->cgroup->tasks_path(i).value().c_str()) != 0) {
1417 return -1;
1418 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001419 }
1420 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001421
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001422 if (!config->alt_syscall_table.empty())
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001423 minijail_use_alt_syscall(c->jail.get(), config->alt_syscall_table.c_str());
Dylan Reid837c74a2016-01-22 17:25:21 -08001424
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001425 for (int i = 0; i < config->num_rlimits; i++) {
Luis Hector Chaveze1062e82017-09-18 09:57:37 -07001426 const Rlimit& lim = config->rlimits[i];
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001427 if (minijail_rlimit(c->jail.get(), lim.type, lim.cur, lim.max) != 0)
1428 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001429 }
Dylan Reid93fa4602017-06-06 13:39:31 -07001430
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001431 if (!config->selinux_context.empty()) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001432 if (minijail_add_hook(c->jail.get(), &Setexeccon,
1433 const_cast<char*>(config->selinux_context.c_str()),
1434 MINIJAIL_HOOK_EVENT_PRE_EXECVE) != 0) {
1435 return -1;
1436 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001437 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001438
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001439 if (config->pre_start_hook) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001440 if (minijail_add_hook(c->jail.get(), config->pre_start_hook,
1441 config->pre_start_hook_payload,
1442 MINIJAIL_HOOK_EVENT_PRE_EXECVE) != 0) {
1443 return -1;
1444 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001445 }
Luis Hector Chavezf8e8f4c2017-08-01 01:09:39 -07001446
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001447 // Now that all pre-requisite hooks are installed, copy the ones in the
1448 // container_config object in the correct order.
1449 for (const auto& config_hook : config->hooks) {
1450 auto it = hook_callbacks.insert(std::make_pair(
1451 config_hook.first, std::vector<libcontainer::HookCallback>()));
1452 it.first->second.insert(it.first->second.end(), config_hook.second.begin(),
1453 config_hook.second.end());
1454 }
1455
1456 c->hook_states.clear();
1457 // Reserve enough memory to hold all the hooks, so that their addresses do not
1458 // get invalidated by reallocation.
1459 c->hook_states.reserve(MINIJAIL_HOOK_EVENT_MAX);
1460 for (minijail_hook_event_t event : {MINIJAIL_HOOK_EVENT_PRE_CHROOT,
1461 MINIJAIL_HOOK_EVENT_PRE_DROP_CAPS,
1462 MINIJAIL_HOOK_EVENT_PRE_EXECVE}) {
1463 const auto& it = hook_callbacks.find(event);
1464 if (it == hook_callbacks.end())
1465 continue;
1466 c->hook_states.emplace_back(
1467 std::make_pair(libcontainer::HookState(), it->second));
1468 if (!c->hook_states.back().first.InstallHook(c->jail.get(), event))
1469 return -1;
1470 }
1471
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001472 for (int fd : config->inherited_fds) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001473 if (minijail_preserve_fd(c->jail.get(), fd, fd) != 0)
1474 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001475 }
Luis Hector Chavezf8e8f4c2017-08-01 01:09:39 -07001476
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001477 /* TODO(dgreid) - remove this once shared mounts are cleaned up. */
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001478 minijail_skip_remount_private(c->jail.get());
Dylan Reid3da683b2016-04-05 03:35:35 -07001479
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001480 if (!config->keep_fds_open)
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001481 minijail_close_open_fds(c->jail.get());
Luis Hector Chaveze18e7d42016-10-12 07:35:32 -07001482
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001483 if (config->use_capmask) {
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001484 minijail_use_caps(c->jail.get(), config->capmask);
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001485 if (config->use_capmask_ambient)
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001486 minijail_set_ambient_caps(c->jail.get());
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001487 if (config->securebits_skip_mask) {
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001488 minijail_skip_setting_securebits(c->jail.get(),
1489 config->securebits_skip_mask);
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001490 }
1491 }
Luis Hector Chavezff5978f2017-06-27 12:52:58 -07001492
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001493 if (!config->do_init)
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001494 minijail_run_as_init(c->jail.get());
Luis Hector Chavezdac65c32017-07-21 10:30:23 -07001495
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001496 std::vector<char*> argv_cstr;
1497 argv_cstr.reserve(config->program_argv.size() + 1);
1498 for (const auto& arg : config->program_argv)
1499 argv_cstr.emplace_back(const_cast<char*>(arg.c_str()));
1500 argv_cstr.emplace_back(nullptr);
1501
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001502 if (minijail_run_pid_pipes_no_preload(c->jail.get(), argv_cstr[0],
1503 argv_cstr.data(), &c->init_pid, nullptr,
1504 nullptr, nullptr) != 0) {
1505 return -1;
1506 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001507
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001508 // |hook_states| is already sorted in the correct order.
1509 for (auto& hook_state : c->hook_states) {
1510 if (!hook_state.first.WaitForHookAndRun(hook_state.second, c->init_pid))
1511 return -1;
1512 }
1513
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001514 // The container has started successfully, no need to tear it down anymore.
1515 ignore_result(teardown.Release());
1516 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -08001517}
1518
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001519const char* container_root(struct container* c) {
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001520 return c->runfs.value().c_str();
Dylan Reid837c74a2016-01-22 17:25:21 -08001521}
1522
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001523int container_pid(struct container* c) {
1524 return c->init_pid;
Dylan Reid837c74a2016-01-22 17:25:21 -08001525}
1526
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001527int container_wait(struct container* c) {
1528 int rc;
Dylan Reidcf745c52016-04-22 10:18:03 -07001529
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001530 do {
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001531 rc = minijail_wait(c->jail.get());
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001532 } while (rc == -EINTR);
Dylan Reidcf745c52016-04-22 10:18:03 -07001533
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001534 // If the process had already been reaped, still perform teardown.
1535 if (rc == -ECHILD || rc >= 0) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001536 if (!ContainerTeardown(c))
1537 rc = -errno;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001538 }
1539 return rc;
Dylan Reid837c74a2016-01-22 17:25:21 -08001540}
1541
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001542int container_kill(struct container* c) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001543 if (kill(c->init_pid, SIGKILL) != 0 && errno != ESRCH) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -07001544 PLOG(ERROR) << "Failed to kill " << c->init_pid;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001545 return -errno;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -07001546 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001547 return container_wait(c);
Dylan Reid837c74a2016-01-22 17:25:21 -08001548}
yusukesbbc37a72017-11-21 09:51:54 -08001549
yusukes32622542018-01-05 18:59:52 -08001550char* container_config_dump(struct container_config* c, int sort_vectors) {
yusukesbbc37a72017-11-21 09:51:54 -08001551 std::stringstream out;
yusukes32622542018-01-05 18:59:52 -08001552 DumpConfig(&out, c, sort_vectors);
yusukesbbc37a72017-11-21 09:51:54 -08001553 return strdup(out.str().c_str());
1554}