blob: 148e3967aa0ce1cb3064f1b99360064b3207c1dc [file] [log] [blame]
Luis Hector Chavez81efb332017-09-18 14:01:29 -07001// Copyright 2016 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
Dylan Reid837c74a2016-01-22 17:25:21 -08004
Dylan Reid837c74a2016-01-22 17:25:21 -08005#include <errno.h>
6#include <fcntl.h>
Dylan Reid837c74a2016-01-22 17:25:21 -08007#include <signal.h>
Luis Hector Chavezff5978f2017-06-27 12:52:58 -07008#include <stdint.h>
Dylan Reid837c74a2016-01-22 17:25:21 -08009#include <stdlib.h>
10#include <string.h>
11#include <sys/mount.h>
12#include <sys/stat.h>
13#include <sys/types.h>
Dylan Reid2bd9ea92016-04-07 20:57:47 -070014#include <sys/wait.h>
Luis Hector Chavez836d7b22017-09-14 15:11:15 -070015#include <syscall.h>
Dylan Reid837c74a2016-01-22 17:25:21 -080016#include <unistd.h>
17
yusukes32622542018-01-05 18:59:52 -080018#include <algorithm>
Luis Hector Chavez644d2042017-09-19 18:56:44 -070019#include <map>
Luis Hector Chavez5381d002017-09-16 12:54:24 -070020#include <memory>
yusukesbbc37a72017-11-21 09:51:54 -080021#include <ostream>
Stephen Barber771653f2017-10-04 23:48:57 -070022#include <set>
yusukesbbc37a72017-11-21 09:51:54 -080023#include <sstream>
Luis Hector Chavez5381d002017-09-16 12:54:24 -070024#include <string>
yusukes32622542018-01-05 18:59:52 -080025#include <tuple>
Luis Hector Chavez644d2042017-09-19 18:56:44 -070026#include <utility>
Luis Hector Chavez5381d002017-09-16 12:54:24 -070027#include <vector>
28
29#include <base/bind.h>
30#include <base/bind_helpers.h>
31#include <base/callback_helpers.h>
32#include <base/files/file_path.h>
33#include <base/files/file_util.h>
34#include <base/files/scoped_file.h>
Luis Hector Chavez835d39e2017-09-19 15:16:31 -070035#include <base/logging.h>
Luis Hector Chavez5381d002017-09-16 12:54:24 -070036#include <base/macros.h>
37#include <base/strings/string_util.h>
38#include <base/strings/stringprintf.h>
Luis Hector Chavez836d7b22017-09-14 15:11:15 -070039#include <libminijail.h>
Luis Hector Chavez626f5c82017-09-18 11:19:32 -070040#include <scoped_minijail.h>
Mike Frysinger412dbd22017-01-06 01:50:34 -050041
Luis Hector Chavez76ae9ac2017-09-20 21:13:08 -070042#include "libcontainer/cgroup.h"
Luis Hector Chavez644d2042017-09-19 18:56:44 -070043#include "libcontainer/config.h"
Luis Hector Chavez836d7b22017-09-14 15:11:15 -070044#include "libcontainer/libcontainer.h"
Luis Hector Chavez81efb332017-09-18 14:01:29 -070045#include "libcontainer/libcontainer_util.h"
Yusuke Sato91f11f02016-12-02 16:15:13 -080046
yusukesbbc37a72017-11-21 09:51:54 -080047#define QUOTE(s) ('"' + std::string(s) + '"')
48
Luis Hector Chavez5381d002017-09-16 12:54:24 -070049namespace {
50
Luis Hector Chavez81efb332017-09-18 14:01:29 -070051using libcontainer::DeviceMapperDetach;
52using libcontainer::DeviceMapperSetup;
53using libcontainer::GetUsernsOutsideId;
54using libcontainer::LoopdevDetach;
55using libcontainer::LoopdevSetup;
56using libcontainer::MakeDir;
57using libcontainer::MountExternal;
58using libcontainer::TouchFile;
Mike Frysinger412dbd22017-01-06 01:50:34 -050059
Luis Hector Chavez81efb332017-09-18 14:01:29 -070060constexpr size_t kMaxNumSetfilesArgs = 128;
61constexpr size_t kMaxRlimits = 32; // Linux defines 15 at the time of writing.
Luis Hector Chavez479b95f2016-06-06 08:01:05 -070062
Luis Hector Chavez5381d002017-09-16 12:54:24 -070063struct Mount {
64 std::string name;
65 base::FilePath source;
66 base::FilePath destination;
67 std::string type;
68 std::string data;
69 std::string verity;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -070070 int flags;
71 int uid;
72 int gid;
73 int mode;
Luis Hector Chavez5381d002017-09-16 12:54:24 -070074
75 // True if mount should happen in new vfs ns.
76 bool mount_in_ns;
77
78 // True if target should be created if it doesn't exist.
79 bool create;
80
81 // True if target should be mounted via loopback.
82 bool loopback;
Dylan Reid837c74a2016-01-22 17:25:21 -080083};
84
Luis Hector Chaveze1062e82017-09-18 09:57:37 -070085struct Device {
86 // 'c' or 'b' for char or block
87 char type;
88 base::FilePath path;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -070089 int fs_permissions;
90 int major;
91 int minor;
Luis Hector Chaveze1062e82017-09-18 09:57:37 -070092
Stephen Barber7bae6642017-11-30 10:47:12 -080093 // Copy the major from existing node, ignores |major|.
94 bool copy_major;
Luis Hector Chaveze1062e82017-09-18 09:57:37 -070095 // Copy the minor from existing node, ignores |minor|.
96 bool copy_minor;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -070097 int uid;
98 int gid;
Dylan Reid4843d6b2017-03-31 18:14:30 -070099};
100
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700101struct CgroupDevice {
102 bool allow;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700103 char type;
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700104
105 // -1 for either major or minor means all.
106 int major;
107 int minor;
108
109 bool read;
110 bool write;
111 bool modify;
Dylan Reid837c74a2016-01-22 17:25:21 -0800112};
113
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700114struct CpuCgroup {
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700115 int shares;
116 int quota;
117 int period;
118 int rt_runtime;
119 int rt_period;
Chinyue Chenfac909e2016-06-24 14:17:42 +0800120};
121
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700122struct Rlimit {
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700123 int type;
Luis Hector Chavezda352462018-01-30 09:10:00 -0800124 rlim_t cur;
125 rlim_t max;
Dylan Reid93fa4602017-06-06 13:39:31 -0700126};
127
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700128} // namespace
129
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700130// Structure that configures how the container is run.
Dylan Reid837c74a2016-01-22 17:25:21 -0800131struct container_config {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700132 // Path to the root of the container itself.
133 base::FilePath config_root;
134
135 // Path to the root of the container's filesystem.
136 base::FilePath rootfs;
137
138 // Flags that will be passed to mount() for the rootfs.
yusukesb7b9a042017-12-08 13:14:25 -0800139 unsigned long rootfs_mount_flags = 0x0;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700140
141 // Path to where the container will be run.
142 base::FilePath premounted_runfs;
143
144 // Path to the file where the pid should be written.
145 base::FilePath pid_file_path;
146
147 // The program to run and args, e.g. "/sbin/init".
148 std::vector<std::string> program_argv;
149
150 // The uid the container will run as.
yusukesb7b9a042017-12-08 13:14:25 -0800151 uid_t uid = 0;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700152
153 // Mapping of UIDs in the container, e.g. "0 100000 1024"
154 std::string uid_map;
155
156 // The gid the container will run as.
yusukesb7b9a042017-12-08 13:14:25 -0800157 gid_t gid = 0;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700158
159 // Mapping of GIDs in the container, e.g. "0 100000 1024"
160 std::string gid_map;
161
162 // Syscall table to use or nullptr if none.
163 std::string alt_syscall_table;
164
165 // Filesystems to mount in the new namespace.
Luis Hector Chavez5381d002017-09-16 12:54:24 -0700166 std::vector<Mount> mounts;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700167
Stephen Barber771653f2017-10-04 23:48:57 -0700168 // Namespaces that should be used for the container.
169 std::set<std::string> namespaces;
170
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700171 // Device nodes to create.
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700172 std::vector<Device> devices;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700173
174 // Device node cgroup permissions.
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700175 std::vector<CgroupDevice> cgroup_devices;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700176
177 // Should run setfiles on mounts to enable selinux.
178 std::string run_setfiles;
179
180 // CPU cgroup params.
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700181 CpuCgroup cpu_cgparams;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700182
183 // Parent dir for cgroup creation
184 base::FilePath cgroup_parent;
185
186 // uid to own the created cgroups
yusukesb7b9a042017-12-08 13:14:25 -0800187 uid_t cgroup_owner = 0;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700188
189 // gid to own the created cgroups
yusukesb7b9a042017-12-08 13:14:25 -0800190 gid_t cgroup_group = 0;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700191
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700192 // Allow the child process to keep open FDs (for stdin/out/err).
yusukesf125f332017-12-08 13:45:15 -0800193 bool keep_fds_open = false;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700194
195 // Array of rlimits for the contained process.
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700196 Rlimit rlimits[kMaxRlimits];
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700197
198 // The number of elements in `rlimits`.
yusukesb7b9a042017-12-08 13:14:25 -0800199 int num_rlimits = 0;
yusukesf125f332017-12-08 13:45:15 -0800200 bool use_capmask = false;
201 bool use_capmask_ambient = false;
yusukesb7b9a042017-12-08 13:14:25 -0800202 uint64_t capmask = 0x0;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700203
204 // The mask of securebits to skip when restricting caps.
yusukesb7b9a042017-12-08 13:14:25 -0800205 uint64_t securebits_skip_mask = 0x0;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700206
207 // Whether the container needs an extra process to be run as init.
yusukesf125f332017-12-08 13:45:15 -0800208 bool do_init = false;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700209
210 // The SELinux context name the container will run under.
211 std::string selinux_context;
212
213 // A function pointer to be called prior to calling execve(2).
yusukesb7b9a042017-12-08 13:14:25 -0800214 minijail_hook_t pre_start_hook = nullptr;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700215
216 // Parameter that will be passed to pre_start_hook().
yusukesb7b9a042017-12-08 13:14:25 -0800217 void* pre_start_hook_payload = nullptr;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700218
Luis Hector Chaveze03926a2017-09-28 17:28:49 -0700219 // A list of file descriptors to inherit.
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700220 std::vector<int> inherited_fds;
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700221
222 // A list of hooks that will be called upon minijail reaching various states
223 // of execution.
224 std::map<minijail_hook_event_t, std::vector<libcontainer::HookCallback>>
225 hooks;
Dylan Reid837c74a2016-01-22 17:25:21 -0800226};
227
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700228// Container manipulation
229struct container {
Luis Hector Chavez76ae9ac2017-09-20 21:13:08 -0700230 std::unique_ptr<libcontainer::Cgroup> cgroup;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700231 ScopedMinijail jail;
Luis Hector Chavez15d0d1a2017-10-12 09:30:19 -0700232 pid_t init_pid = -1;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700233 base::FilePath config_root;
234 base::FilePath runfs;
235 base::FilePath rundir;
236 base::FilePath runfsroot;
237 base::FilePath pid_file_path;
238
239 // Mounts made outside of the minijail.
240 std::vector<base::FilePath> ext_mounts;
241 std::vector<base::FilePath> loopdev_paths;
242 std::vector<std::string> device_mappers;
243 std::string name;
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700244
245 std::vector<std::pair<libcontainer::HookState,
246 std::vector<libcontainer::HookCallback>>>
247 hook_states;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700248};
249
250namespace {
251
yusukes4d955472018-01-17 16:41:32 -0800252std::string GetMountFlagsAsString(int flags) {
253#define CHECK_MOUNT_FLAG(flag) \
254 do { \
255 if (flags & flag) \
256 result.push_back(#flag); \
257 } while (false)
258
259 std::vector<std::string> result;
260 CHECK_MOUNT_FLAG(MS_RDONLY);
261 CHECK_MOUNT_FLAG(MS_NOSUID);
262 CHECK_MOUNT_FLAG(MS_NODEV);
263 CHECK_MOUNT_FLAG(MS_NOEXEC);
264 CHECK_MOUNT_FLAG(MS_SYNCHRONOUS);
265 CHECK_MOUNT_FLAG(MS_REMOUNT);
266 CHECK_MOUNT_FLAG(MS_MANDLOCK);
267 CHECK_MOUNT_FLAG(MS_DIRSYNC);
268 CHECK_MOUNT_FLAG(MS_NOATIME);
269 CHECK_MOUNT_FLAG(MS_NODIRATIME);
270 CHECK_MOUNT_FLAG(MS_BIND);
271 CHECK_MOUNT_FLAG(MS_MOVE);
272 CHECK_MOUNT_FLAG(MS_REC);
273 CHECK_MOUNT_FLAG(MS_SILENT);
274 CHECK_MOUNT_FLAG(MS_POSIXACL);
275 CHECK_MOUNT_FLAG(MS_UNBINDABLE);
276 CHECK_MOUNT_FLAG(MS_PRIVATE);
277 CHECK_MOUNT_FLAG(MS_SLAVE);
278 CHECK_MOUNT_FLAG(MS_SHARED);
279 return result.empty() ? "no flags" : base::JoinString(result, " | ");
280
281#undef CHECK_MOUNT_FLAG
282}
283
yusukesbbc37a72017-11-21 09:51:54 -0800284std::ostream& operator<<(std::ostream& stream, const Mount& mount) {
285 stream << "mount:" << std::endl
286 << " name: " << QUOTE(mount.name) << std::endl
287 << " source: " << QUOTE(mount.source.value()) << std::endl
288 << " destination: " << QUOTE(mount.destination.value()) << std::endl
289 << " type: " << QUOTE(mount.type) << std::endl
290 << " data: " << QUOTE(mount.data) << std::endl
291 << " verity: " << QUOTE(mount.verity) << std::endl
yusukes4d955472018-01-17 16:41:32 -0800292 << " flags: 0x" << std::hex << mount.flags << std::dec << " ("
293 << GetMountFlagsAsString(mount.flags) << ")" << std::endl
yusukesbbc37a72017-11-21 09:51:54 -0800294 << " uid: " << mount.uid << std::endl
295 << " gid: " << mount.gid << std::endl
296 << " mode: 0" << std::oct << mount.mode << std::dec << std::endl
297 << " mount_in_ns: " << mount.mount_in_ns << std::endl
298 << " create: " << mount.create << std::endl
299 << " loopback: " << mount.loopback << std::endl;
300
301 return stream;
302}
303
304std::ostream& operator<<(std::ostream& stream, const Device& device) {
305 stream << "device:" << std::endl
306 << " type: " << device.type << std::endl
307 << " path: " << QUOTE(device.path.value()) << std::endl
308 << " fs_permissions: 0" << std::oct << device.fs_permissions
309 << std::dec << std::endl
310 << " major: " << device.major << std::endl
311 << " minor: " << device.minor << std::endl
312 << " copy_minor: " << device.copy_minor << std::endl
313 << " uid: " << device.uid << std::endl
314 << " gid: " << device.gid << std::endl;
315
316 return stream;
317}
318
319std::ostream& operator<<(std::ostream& stream,
320 const CgroupDevice& cgroup_device) {
321 stream << "cgroup_device:" << std::endl
322 << " allow: " << cgroup_device.allow << std::endl
323 << " type: " << cgroup_device.type << std::endl
324 << " major: " << cgroup_device.major << std::endl
325 << " minor: " << cgroup_device.minor << std::endl
326 << " read: " << cgroup_device.read << std::endl
327 << " write: " << cgroup_device.write << std::endl
328 << " modify: " << cgroup_device.modify << std::endl;
329
330 return stream;
331}
332
333std::ostream& operator<<(std::ostream& stream, const CpuCgroup& cpu_cgroup) {
334 stream << "cpu_cgroup:" << std::endl
335 << " shares: " << cpu_cgroup.shares << std::endl
336 << " quota: " << cpu_cgroup.quota << std::endl
337 << " period: " << cpu_cgroup.period << std::endl
338 << " rt_runtime: " << cpu_cgroup.rt_runtime << std::endl
339 << " rt_period: " << cpu_cgroup.rt_period << std::endl;
340
341 return stream;
342}
343
344std::ostream& operator<<(std::ostream& stream, const Rlimit& rlimit) {
345 stream << "rlimit:" << std::endl
346 << " type: " << rlimit.type << std::endl
347 << " cur: " << rlimit.cur << std::endl
348 << " max: " << rlimit.max << std::endl;
349
350 return stream;
351}
352
yusukes32622542018-01-05 18:59:52 -0800353void DumpConfig(std::ostream* stream,
354 const container_config* c,
355 bool sort_vectors) {
356 *stream << "config_root: " << QUOTE(c->config_root.value()) << std::endl
357 << "rootfs: " << QUOTE(c->rootfs.value()) << std::endl
358 << "rootfs_mount_flags: 0x" << std::hex << c->rootfs_mount_flags
yusukes4d955472018-01-17 16:41:32 -0800359 << std::dec << " (" << GetMountFlagsAsString(c->rootfs_mount_flags)
360 << ")" << std::endl
yusukes32622542018-01-05 18:59:52 -0800361 << "premounted_runfs: " << QUOTE(c->premounted_runfs.value())
362 << std::endl
363 << "pid_file_path: " << QUOTE(c->pid_file_path.value()) << std::endl
364 << "program_argv: size=" << c->program_argv.size() << std::endl;
yusukesbbc37a72017-11-21 09:51:54 -0800365
366 for (const std::string& argv : c->program_argv)
yusukes32622542018-01-05 18:59:52 -0800367 *stream << " " << QUOTE(argv) << std::endl;
yusukesbbc37a72017-11-21 09:51:54 -0800368
yusukes32622542018-01-05 18:59:52 -0800369 *stream << "uid: " << c->uid << std::endl
370 << "uid_map: " << QUOTE(c->uid_map) << std::endl
371 << "gid: " << c->gid << std::endl
372 << "gid_map: " << QUOTE(c->gid_map) << std::endl
373 << "alt_syscall_table: " << QUOTE(c->alt_syscall_table) << std::endl;
yusukesbbc37a72017-11-21 09:51:54 -0800374
yusukes32622542018-01-05 18:59:52 -0800375 auto mount_sorted = c->mounts;
376 if (sort_vectors) {
377 std::stable_sort(mount_sorted.begin(), mount_sorted.end(),
378 [](const Mount& lhs, const Mount& rhs) {
379 return std::make_tuple(lhs.destination.value(),
380 lhs.source.value(), lhs.flags) <
381 std::make_tuple(rhs.destination.value(),
382 rhs.source.value(), rhs.flags);
383 });
384 }
385 for (const auto& mount : mount_sorted)
386 *stream << mount;
yusukesbbc37a72017-11-21 09:51:54 -0800387
yusukes32622542018-01-05 18:59:52 -0800388 *stream << "namespaces: size=" << c->namespaces.size() << std::endl;
yusukesbbc37a72017-11-21 09:51:54 -0800389 for (const std::string& ns : c->namespaces)
yusukes32622542018-01-05 18:59:52 -0800390 *stream << " " << QUOTE(ns) << std::endl;
yusukesbbc37a72017-11-21 09:51:54 -0800391
yusukes32622542018-01-05 18:59:52 -0800392 auto devices_sorted = c->devices;
393 if (sort_vectors) {
394 std::stable_sort(devices_sorted.begin(), devices_sorted.end(),
395 [](const Device& lhs, const Device& rhs) {
396 return lhs.path.value() < rhs.path.value();
397 });
398 }
399 for (const auto& device : devices_sorted)
400 *stream << device;
yusukesbbc37a72017-11-21 09:51:54 -0800401
yusukes32622542018-01-05 18:59:52 -0800402 auto cgroup_devices_sorted = c->cgroup_devices;
403 if (sort_vectors) {
404 std::stable_sort(cgroup_devices_sorted.begin(), cgroup_devices_sorted.end(),
405 [](const CgroupDevice& lhs, const CgroupDevice& rhs) {
406 return std::make_tuple(lhs.type, lhs.major, lhs.minor) <
407 std::make_tuple(rhs.type, rhs.major, rhs.minor);
408 });
409 }
410 for (const auto& cgroup_device : cgroup_devices_sorted)
411 *stream << cgroup_device;
yusukesbbc37a72017-11-21 09:51:54 -0800412
yusukes32622542018-01-05 18:59:52 -0800413 *stream << "run_setfiles: " << QUOTE(c->run_setfiles) << std::endl
414 << c->cpu_cgparams
415 << "cgroup_parent: " << QUOTE(c->cgroup_parent.value()) << std::endl
416 << "cgroup_owner: " << c->cgroup_owner << std::endl
417 << "cgroup_group: " << c->cgroup_group << std::endl
418 << "keep_fds_open: " << c->keep_fds_open << std::endl;
yusukesbbc37a72017-11-21 09:51:54 -0800419
yusukes32622542018-01-05 18:59:52 -0800420 *stream << "num_rlimits: " << c->num_rlimits << std::endl;
yusukesbbc37a72017-11-21 09:51:54 -0800421 for (size_t i = 0; i < c->num_rlimits; ++i)
yusukes32622542018-01-05 18:59:52 -0800422 *stream << c->rlimits[i];
yusukesbbc37a72017-11-21 09:51:54 -0800423
yusukes32622542018-01-05 18:59:52 -0800424 *stream << "use_capmask: " << c->use_capmask << std::endl
425 << "use_capmask_ambient: " << c->use_capmask_ambient << std::endl
426 << "capmask: 0x" << std::hex << c->capmask << std::dec << std::endl
427 << "securebits_skip_mask: 0x" << std::hex << c->securebits_skip_mask
428 << std::dec << std::endl
429 << "do_init: " << c->do_init << std::endl
430 << "selinux_context: " << QUOTE(c->selinux_context) << std::endl
431 << "pre_start_hook: " << reinterpret_cast<void*>(c->pre_start_hook)
432 << std::endl
433 << "pre_start_hook_payload: " << c->pre_start_hook_payload
434 << std::endl
435 << "inherited_fds: size=" << c->inherited_fds.size() << std::endl;
yusukesbbc37a72017-11-21 09:51:54 -0800436
437 for (int fd : c->inherited_fds)
yusukes32622542018-01-05 18:59:52 -0800438 *stream << " " << fd << std::endl;
yusukesbbc37a72017-11-21 09:51:54 -0800439
yusukes32622542018-01-05 18:59:52 -0800440 *stream << "hooks: size=" << c->hooks.size() << std::endl;
yusukesbbc37a72017-11-21 09:51:54 -0800441}
442
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700443// Returns the path for |path_in_container| in the outer namespace.
444base::FilePath GetPathInOuterNamespace(
445 const base::FilePath& root, const base::FilePath& path_in_container) {
446 if (path_in_container.IsAbsolute())
447 return base::FilePath(root.value() + path_in_container.value());
448 return root.Append(path_in_container);
449}
450
451// Make sure the mount target exists in the new rootfs. Create if needed and
452// possible.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700453bool SetupMountDestination(const struct container_config* config,
454 const Mount& mount,
455 const base::FilePath& source,
456 const base::FilePath& dest) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700457 struct stat st_buf;
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700458 if (stat(dest.value().c_str(), &st_buf) == 0) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700459 // destination exists.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700460 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700461 }
462
463 // Try to create the destination. Either make directory or touch a file
464 // depending on the source type.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700465 int uid_userns;
466 if (!GetUsernsOutsideId(config->uid_map, mount.uid, &uid_userns))
467 return false;
468 int gid_userns;
469 if (!GetUsernsOutsideId(config->gid_map, mount.gid, &gid_userns))
470 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700471
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700472 if (stat(source.value().c_str(), &st_buf) != 0 || S_ISDIR(st_buf.st_mode) ||
473 S_ISBLK(st_buf.st_mode)) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700474 return MakeDir(dest, uid_userns, gid_userns, mount.mode);
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700475 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700476
477 return TouchFile(dest, uid_userns, gid_userns, mount.mode);
478}
479
480// Fork and exec the setfiles command to configure the selinux policy.
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700481bool RunSetfilesCommand(const struct container* c,
482 const struct container_config* config,
483 const std::vector<base::FilePath>& destinations,
484 pid_t container_pid) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700485 int pid = fork();
486 if (pid == 0) {
487 size_t arg_index = 0;
488 const char* argv[kMaxNumSetfilesArgs];
489 const char* env[] = {
490 nullptr,
491 };
492
493 base::FilePath context_path = c->runfsroot.Append("file_contexts");
494
495 argv[arg_index++] = config->run_setfiles.c_str();
496 argv[arg_index++] = "-r";
497 argv[arg_index++] = c->runfsroot.value().c_str();
498 argv[arg_index++] = context_path.value().c_str();
499 if (arg_index + destinations.size() >= kMaxNumSetfilesArgs)
500 _exit(-E2BIG);
501 for (const auto& destination : destinations)
502 argv[arg_index++] = destination.value().c_str();
503 argv[arg_index] = nullptr;
504
505 execve(
506 argv[0], const_cast<char* const*>(argv), const_cast<char* const*>(env));
507
508 /* Command failed to exec if execve returns. */
509 _exit(-errno);
510 }
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700511 if (pid < 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700512 PLOG(ERROR) << "Failed to fork to run setfiles";
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700513 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700514 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700515
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700516 int status;
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700517 if (HANDLE_EINTR(waitpid(pid, &status, 0)) < 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700518 PLOG(ERROR) << "Failed to wait for setfiles";
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700519 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700520 }
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700521 if (!WIFEXITED(status)) {
522 LOG(ERROR) << "setfiles did not terminate cleanly";
523 return false;
524 }
525 if (WEXITSTATUS(status) != 0) {
526 LOG(ERROR) << "setfiles exited with non-zero status: "
527 << WEXITSTATUS(status);
528 return false;
529 }
530 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700531}
532
533// Unmounts anything we mounted in this mount namespace in the opposite order
534// that they were mounted.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700535bool UnmountExternalMounts(struct container* c) {
536 bool ret = true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700537
538 for (auto it = c->ext_mounts.rbegin(); it != c->ext_mounts.rend(); ++it) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700539 if (umount(it->value().c_str()) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700540 PLOG(ERROR) << "Failed to unmount " << it->value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700541 ret = false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700542 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700543 }
544 c->ext_mounts.clear();
545
546 for (auto it = c->loopdev_paths.rbegin(); it != c->loopdev_paths.rend();
547 ++it) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700548 if (!LoopdevDetach(*it))
549 ret = false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700550 }
551 c->loopdev_paths.clear();
552
553 for (auto it = c->device_mappers.rbegin(); it != c->device_mappers.rend();
554 ++it) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700555 if (!DeviceMapperDetach(*it))
556 ret = false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700557 }
558 c->device_mappers.clear();
559
560 return ret;
561}
562
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700563bool DoContainerMount(struct container* c,
564 const struct container_config* config,
565 const Mount& mount) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700566 base::FilePath dest =
567 GetPathInOuterNamespace(c->runfsroot, mount.destination);
568
569 // If it's a bind mount relative to rootfs, append source to
570 // rootfs path, otherwise source path is absolute.
571 base::FilePath source;
572 if ((mount.flags & MS_BIND) && !mount.source.IsAbsolute()) {
573 source = GetPathInOuterNamespace(c->runfsroot, mount.source);
574 } else if (mount.loopback && !mount.source.IsAbsolute() &&
575 !c->config_root.empty()) {
576 source = GetPathInOuterNamespace(c->config_root, mount.source);
577 } else {
578 source = mount.source;
579 }
580
581 // Only create the destinations for external mounts, minijail will take
582 // care of those mounted in the new namespace.
583 if (mount.create && !mount.mount_in_ns) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700584 if (!SetupMountDestination(config, mount, source, dest))
585 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700586 }
587 if (mount.loopback) {
588 // Record this loopback file for cleanup later.
589 base::FilePath loop_source = source;
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700590 if (!LoopdevSetup(loop_source, &source))
591 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700592
593 // Save this to cleanup when shutting down.
594 c->loopdev_paths.push_back(source);
595 }
596 if (!mount.verity.empty()) {
597 // Set this device up via dm-verity.
598 std::string dm_name;
599 base::FilePath dm_source = source;
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700600 if (!DeviceMapperSetup(dm_source, mount.verity, &source, &dm_name))
601 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700602
603 // Save this to cleanup when shutting down.
604 c->device_mappers.push_back(dm_name);
605 }
606 if (mount.mount_in_ns) {
607 // We can mount this with minijail.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700608 if (minijail_mount_with_data(
609 c->jail.get(), source.value().c_str(),
610 mount.destination.value().c_str(), mount.type.c_str(), mount.flags,
611 mount.data.empty() ? nullptr : mount.data.c_str()) != 0) {
612 return false;
613 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700614 } else {
615 // Mount this externally and unmount it on exit.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700616 if (!MountExternal(source.value(), dest.value(), mount.type, mount.flags,
617 mount.data)) {
618 return false;
619 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700620 // Save this to unmount when shutting down.
621 c->ext_mounts.push_back(dest);
622 }
623
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700624 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700625}
626
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700627bool DoContainerMounts(struct container* c,
628 const struct container_config* config) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700629 UnmountExternalMounts(c);
630
631 // This will run in all the error cases.
632 base::ScopedClosureRunner teardown(base::Bind(
633 base::IgnoreResult(&UnmountExternalMounts), base::Unretained(c)));
634
635 for (const auto& mount : config->mounts) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700636 if (!DoContainerMount(c, config, mount))
637 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700638 }
639
640 // The mounts have been done successfully, no need to tear them down anymore.
641 ignore_result(teardown.Release());
642
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700643 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700644}
645
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700646bool ContainerCreateDevice(const struct container* c,
647 const struct container_config* config,
648 const Device& dev,
Stephen Barber7bae6642017-11-30 10:47:12 -0800649 int major,
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700650 int minor) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700651 mode_t mode = dev.fs_permissions;
652 switch (dev.type) {
653 case 'b':
654 mode |= S_IFBLK;
655 break;
656 case 'c':
657 mode |= S_IFCHR;
658 break;
659 default:
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700660 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700661 }
662
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700663 int uid_userns;
664 if (!GetUsernsOutsideId(config->uid_map, dev.uid, &uid_userns))
665 return false;
666 int gid_userns;
667 if (!GetUsernsOutsideId(config->gid_map, dev.gid, &gid_userns))
668 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700669
670 base::FilePath path = GetPathInOuterNamespace(c->runfsroot, dev.path);
Luis Hector Chavez92278e82017-10-16 11:30:27 -0700671 if (!libcontainer::CreateDirectoryOwnedBy(path.DirName(), 0755, uid_userns,
672 gid_userns)) {
Luis Hector Chavez5d51abb2017-10-11 17:05:57 -0700673 PLOG(ERROR) << "Failed to create parent directory for " << path.value();
674 return false;
675 }
Stephen Barber7bae6642017-11-30 10:47:12 -0800676 if (mknod(path.value().c_str(), mode, makedev(major, minor)) != 0 &&
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700677 errno != EEXIST) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700678 PLOG(ERROR) << "Failed to mknod " << path.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700679 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700680 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700681 if (chown(path.value().c_str(), uid_userns, gid_userns) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700682 PLOG(ERROR) << "Failed to chown " << path.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700683 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700684 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700685 if (chmod(path.value().c_str(), dev.fs_permissions) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700686 PLOG(ERROR) << "Failed to chmod " << path.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700687 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700688 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700689
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700690 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700691}
692
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700693bool MountRunfs(struct container* c, const struct container_config* config) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700694 {
695 std::string runfs_template = base::StringPrintf(
696 "%s/%s_XXXXXX", c->rundir.value().c_str(), c->name.c_str());
697 // TODO(lhchavez): Replace this with base::CreateTemporaryDirInDir().
698 char* runfs_path = mkdtemp(const_cast<char*>(runfs_template.c_str()));
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700699 if (!runfs_path) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700700 PLOG(ERROR) << "Failed to mkdtemp in " << c->rundir.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700701 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700702 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700703 c->runfs = base::FilePath(runfs_path);
704 }
705
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700706 int uid_userns;
707 if (!GetUsernsOutsideId(config->uid_map, config->uid, &uid_userns))
708 return false;
709 int gid_userns;
710 if (!GetUsernsOutsideId(config->gid_map, config->gid, &gid_userns))
711 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700712
713 // Make sure the container uid can access the rootfs.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700714 if (chmod(c->runfs.value().c_str(), 0700) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700715 PLOG(ERROR) << "Failed to chmod " << c->runfs.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700716 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700717 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700718 if (chown(c->runfs.value().c_str(), uid_userns, gid_userns) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700719 PLOG(ERROR) << "Failed to chown " << c->runfs.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700720 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700721 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700722
723 c->runfsroot = c->runfs.Append("root");
724
725 constexpr mode_t kRootDirMode = 0660;
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700726 if (mkdir(c->runfsroot.value().c_str(), kRootDirMode) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700727 PLOG(ERROR) << "Failed to mkdir " << c->runfsroot.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700728 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700729 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700730 if (chmod(c->runfsroot.value().c_str(), kRootDirMode) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700731 PLOG(ERROR) << "Failed to chmod " << c->runfsroot.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700732 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700733 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700734
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700735 if (mount(config->rootfs.value().c_str(), c->runfsroot.value().c_str(), "",
736 MS_BIND | (config->rootfs_mount_flags & MS_REC), nullptr) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700737 PLOG(ERROR) << "Failed to bind-mount " << config->rootfs.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700738 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700739 }
740
741 // MS_BIND ignores any flags passed to it (except MS_REC). We need a
742 // second call to mount() to actually set them.
743 if (config->rootfs_mount_flags &&
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700744 mount(config->rootfs.value().c_str(), c->runfsroot.value().c_str(), "",
745 (config->rootfs_mount_flags & ~MS_REC), nullptr) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700746 PLOG(ERROR) << "Failed to remount " << c->runfsroot.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700747 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700748 }
749
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700750 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700751}
752
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700753bool CreateDeviceNodes(struct container* c,
754 const struct container_config* config,
755 pid_t container_pid) {
756 for (const auto& dev : config->devices) {
Stephen Barber7bae6642017-11-30 10:47:12 -0800757 int major = dev.major;
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700758 int minor = dev.minor;
759
Stephen Barber7bae6642017-11-30 10:47:12 -0800760 if (dev.copy_major || dev.copy_minor) {
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700761 struct stat st_buff;
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700762 if (stat(dev.path.value().c_str(), &st_buff) != 0)
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700763 continue;
Stephen Barber7bae6642017-11-30 10:47:12 -0800764
765 if (dev.copy_major)
766 major = major(st_buff.st_rdev);
767 if (dev.copy_minor)
768 minor = minor(st_buff.st_rdev);
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700769 }
Stephen Barber7bae6642017-11-30 10:47:12 -0800770 if (major < 0 || minor < 0)
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700771 continue;
Stephen Barber7bae6642017-11-30 10:47:12 -0800772 if (!ContainerCreateDevice(c, config, dev, major, minor))
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700773 return false;
774 }
775
776 return true;
777}
778
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700779bool DeviceSetup(struct container* c, const struct container_config* config) {
Luis Hector Chavez76ae9ac2017-09-20 21:13:08 -0700780 c->cgroup->DenyAllDevices();
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700781
782 for (const auto& dev : config->cgroup_devices) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700783 if (!c->cgroup->AddDevice(dev.allow, dev.major, dev.minor, dev.read,
784 dev.write, dev.modify, dev.type)) {
785 return false;
786 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700787 }
788
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700789 for (const auto& loopdev_path : c->loopdev_paths) {
790 struct stat st;
791
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700792 if (stat(loopdev_path.value().c_str(), &st) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700793 PLOG(ERROR) << "Failed to stat " << loopdev_path.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700794 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700795 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700796 if (!c->cgroup->AddDevice(1, major(st.st_rdev), minor(st.st_rdev), 1, 0, 0,
797 'b')) {
798 return false;
799 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700800 }
801
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700802 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700803}
804
805int Setexeccon(void* payload) {
806 char* init_domain = reinterpret_cast<char*>(payload);
807 pid_t tid = syscall(SYS_gettid);
808
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700809 if (tid < 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700810 PLOG(ERROR) << "Failed to gettid";
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700811 return -errno;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700812 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700813
814 std::string exec_path =
815 base::StringPrintf("/proc/self/task/%d/attr/exec", tid);
816
817 base::ScopedFD fd(open(exec_path.c_str(), O_WRONLY | O_CLOEXEC));
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700818 if (!fd.is_valid()) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700819 PLOG(ERROR) << "Failed to open " << exec_path;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700820 return -errno;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700821 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700822
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700823 if (!base::WriteFileDescriptor(fd.get(), init_domain, strlen(init_domain))) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700824 PLOG(ERROR) << "Failed to write the SELinux label to " << exec_path;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700825 return -errno;
826 }
827
828 return 0;
829}
830
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700831bool ContainerTeardown(struct container* c) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700832 UnmountExternalMounts(c);
833 if (!c->runfsroot.empty() && !c->runfs.empty()) {
834 /* |c->runfsroot| may have been mounted recursively. Thus use
835 * MNT_DETACH to "immediately disconnect the filesystem and all
836 * filesystems mounted below it from each other and from the
837 * mount table". Otherwise one would need to unmount every
838 * single dependent mount before unmounting |c->runfsroot|
839 * itself.
840 */
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700841 if (umount2(c->runfsroot.value().c_str(), MNT_DETACH) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700842 PLOG(ERROR) << "Failed to detach " << c->runfsroot.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700843 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700844 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700845 if (rmdir(c->runfsroot.value().c_str()) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700846 PLOG(ERROR) << "Failed to rmdir " << c->runfsroot.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700847 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700848 }
Luis Hector Chavez15d0d1a2017-10-12 09:30:19 -0700849 c->runfsroot = base::FilePath();
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700850 }
851 if (!c->pid_file_path.empty()) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700852 if (unlink(c->pid_file_path.value().c_str()) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700853 PLOG(ERROR) << "Failed to unlink " << c->pid_file_path.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700854 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700855 }
Luis Hector Chavez15d0d1a2017-10-12 09:30:19 -0700856 c->pid_file_path = base::FilePath();
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700857 }
858 if (!c->runfs.empty()) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700859 if (rmdir(c->runfs.value().c_str()) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700860 PLOG(ERROR) << "Failed to rmdir " << c->runfs.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700861 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700862 }
Luis Hector Chavez15d0d1a2017-10-12 09:30:19 -0700863 c->runfs = base::FilePath();
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700864 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700865 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700866}
867
Luis Hector Chavez15d0d1a2017-10-12 09:30:19 -0700868void CancelContainerStart(struct container* c) {
869 if (c->init_pid != -1)
870 container_kill(c);
871 ContainerTeardown(c);
872}
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700873
874} // namespace
875
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700876struct container_config* container_config_create() {
Luis Hector Chavez5381d002017-09-16 12:54:24 -0700877 return new (std::nothrow) struct container_config();
Dylan Reid837c74a2016-01-22 17:25:21 -0800878}
879
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700880void container_config_destroy(struct container_config* c) {
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700881 if (c == nullptr)
882 return;
Luis Hector Chavez5381d002017-09-16 12:54:24 -0700883 delete c;
Dylan Reid837c74a2016-01-22 17:25:21 -0800884}
885
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700886int container_config_config_root(struct container_config* c,
887 const char* config_root) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700888 c->config_root = base::FilePath(config_root);
889 return 0;
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500890}
891
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700892const char* container_config_get_config_root(const struct container_config* c) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700893 return c->config_root.value().c_str();
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500894}
895
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700896int container_config_rootfs(struct container_config* c, const char* rootfs) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700897 c->rootfs = base::FilePath(rootfs);
898 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800899}
900
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700901const char* container_config_get_rootfs(const struct container_config* c) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700902 return c->rootfs.value().c_str();
Dylan Reid11456722016-05-02 11:24:50 -0700903}
904
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700905void container_config_rootfs_mount_flags(struct container_config* c,
906 unsigned long rootfs_mount_flags) {
907 /* Since we are going to add MS_REMOUNT anyways, add it here so we can
908 * simply check against zero later. MS_BIND is also added to avoid
909 * re-mounting the original filesystem, since the rootfs is always
910 * bind-mounted.
911 */
912 c->rootfs_mount_flags = MS_REMOUNT | MS_BIND | rootfs_mount_flags;
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -0700913}
914
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700915unsigned long container_config_get_rootfs_mount_flags(
916 const struct container_config* c) {
917 return c->rootfs_mount_flags;
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -0700918}
919
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700920int container_config_premounted_runfs(struct container_config* c,
921 const char* runfs) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700922 c->premounted_runfs = base::FilePath(runfs);
923 return 0;
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700924}
925
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700926const char* container_config_get_premounted_runfs(
927 const struct container_config* c) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700928 return c->premounted_runfs.value().c_str();
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700929}
930
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700931int container_config_pid_file(struct container_config* c, const char* path) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700932 c->pid_file_path = base::FilePath(path);
933 return 0;
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700934}
935
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700936const char* container_config_get_pid_file(const struct container_config* c) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700937 return c->pid_file_path.value().c_str();
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700938}
939
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700940int container_config_program_argv(struct container_config* c,
941 const char** argv,
942 size_t num_args) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700943 if (num_args < 1) {
944 errno = EINVAL;
945 return -1;
946 }
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700947 c->program_argv.clear();
948 c->program_argv.reserve(num_args);
949 for (size_t i = 0; i < num_args; ++i)
950 c->program_argv.emplace_back(argv[i]);
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700951 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800952}
953
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700954size_t container_config_get_num_program_args(const struct container_config* c) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700955 return c->program_argv.size();
Dylan Reid11456722016-05-02 11:24:50 -0700956}
957
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700958const char* container_config_get_program_arg(const struct container_config* c,
959 size_t index) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700960 if (index >= c->program_argv.size())
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700961 return nullptr;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700962 return c->program_argv[index].c_str();
Dylan Reid11456722016-05-02 11:24:50 -0700963}
964
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700965void container_config_uid(struct container_config* c, uid_t uid) {
966 c->uid = uid;
Dylan Reid1874feb2016-06-22 17:53:50 -0700967}
968
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700969uid_t container_config_get_uid(const struct container_config* c) {
970 return c->uid;
Dylan Reid1874feb2016-06-22 17:53:50 -0700971}
972
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700973int container_config_uid_map(struct container_config* c, const char* uid_map) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700974 c->uid_map = uid_map;
975 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800976}
977
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700978void container_config_gid(struct container_config* c, gid_t gid) {
979 c->gid = gid;
Dylan Reid1874feb2016-06-22 17:53:50 -0700980}
981
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700982gid_t container_config_get_gid(const struct container_config* c) {
983 return c->gid;
Dylan Reid1874feb2016-06-22 17:53:50 -0700984}
985
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700986int container_config_gid_map(struct container_config* c, const char* gid_map) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700987 c->gid_map = gid_map;
988 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800989}
990
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700991int container_config_alt_syscall_table(struct container_config* c,
992 const char* alt_syscall_table) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700993 c->alt_syscall_table = alt_syscall_table;
994 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800995}
996
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700997int container_config_add_rlimit(struct container_config* c,
998 int type,
Luis Hector Chavezda352462018-01-30 09:10:00 -0800999 rlim_t cur,
1000 rlim_t max) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001001 if (c->num_rlimits >= kMaxRlimits) {
1002 errno = ENOMEM;
1003 return -1;
1004 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001005 c->rlimits[c->num_rlimits].type = type;
1006 c->rlimits[c->num_rlimits].cur = cur;
1007 c->rlimits[c->num_rlimits].max = max;
1008 c->num_rlimits++;
1009 return 0;
Dylan Reid93fa4602017-06-06 13:39:31 -07001010}
1011
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001012int container_config_add_mount(struct container_config* c,
1013 const char* name,
1014 const char* source,
1015 const char* destination,
1016 const char* type,
1017 const char* data,
1018 const char* verity,
1019 int flags,
1020 int uid,
1021 int gid,
1022 int mode,
1023 int mount_in_ns,
1024 int create,
1025 int loopback) {
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001026 if (name == nullptr || source == nullptr || destination == nullptr ||
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001027 type == nullptr) {
1028 errno = EINVAL;
1029 return -1;
1030 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001031
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001032 c->mounts.emplace_back(Mount{name,
1033 base::FilePath(source),
1034 base::FilePath(destination),
1035 type,
1036 data ? data : "",
1037 verity ? verity : "",
1038 flags,
1039 uid,
1040 gid,
1041 mode,
1042 mount_in_ns != 0,
1043 create != 0,
1044 loopback != 0});
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001045
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001046 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -08001047}
1048
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001049int container_config_add_cgroup_device(struct container_config* c,
1050 int allow,
1051 char type,
1052 int major,
1053 int minor,
1054 int read,
1055 int write,
1056 int modify) {
Luis Hector Chaveze1062e82017-09-18 09:57:37 -07001057 c->cgroup_devices.emplace_back(CgroupDevice{
1058 allow != 0, type, major, minor, read != 0, write != 0, modify != 0});
Dylan Reid4843d6b2017-03-31 18:14:30 -07001059
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001060 return 0;
Dylan Reid4843d6b2017-03-31 18:14:30 -07001061}
1062
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001063int container_config_add_device(struct container_config* c,
1064 char type,
1065 const char* path,
1066 int fs_permissions,
1067 int major,
1068 int minor,
Stephen Barber7bae6642017-11-30 10:47:12 -08001069 int copy_major,
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001070 int copy_minor,
1071 int uid,
1072 int gid,
1073 int read_allowed,
1074 int write_allowed,
1075 int modify_allowed) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001076 if (path == nullptr) {
1077 errno = EINVAL;
1078 return -1;
1079 }
Stephen Barber7bae6642017-11-30 10:47:12 -08001080 /* If using a dynamic major/minor number, ensure that major/minor is -1. */
1081 if ((copy_major && (major != -1)) || (copy_minor && (minor != -1))) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001082 errno = EINVAL;
1083 return -1;
1084 }
Dylan Reid355d5e42016-04-29 16:53:31 -07001085
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001086 if (read_allowed || write_allowed || modify_allowed) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001087 if (container_config_add_cgroup_device(c, 1, type, major, minor,
1088 read_allowed, write_allowed,
1089 modify_allowed) != 0) {
1090 errno = ENOMEM;
1091 return -1;
Luis Hector Chaveze1062e82017-09-18 09:57:37 -07001092 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001093 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -07001094
Luis Hector Chaveze1062e82017-09-18 09:57:37 -07001095 c->devices.emplace_back(Device{
Stephen Barber7bae6642017-11-30 10:47:12 -08001096 type, base::FilePath(path), fs_permissions, major, minor, copy_major != 0,
1097 copy_minor != 0, uid, gid,
Luis Hector Chaveze1062e82017-09-18 09:57:37 -07001098 });
1099
1100 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -08001101}
1102
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001103int container_config_run_setfiles(struct container_config* c,
1104 const char* setfiles_cmd) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001105 c->run_setfiles = setfiles_cmd;
1106 return 0;
Dylan Reid2bd9ea92016-04-07 20:57:47 -07001107}
Dylan Reid837c74a2016-01-22 17:25:21 -08001108
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001109const char* container_config_get_run_setfiles(
1110 const struct container_config* c) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001111 return c->run_setfiles.c_str();
Dylan Reid11456722016-05-02 11:24:50 -07001112}
1113
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001114int container_config_set_cpu_shares(struct container_config* c, int shares) {
1115 /* CPU shares must be 2 or higher. */
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001116 if (shares < 2) {
1117 errno = EINVAL;
1118 return -1;
1119 }
Chinyue Chenfac909e2016-06-24 14:17:42 +08001120
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001121 c->cpu_cgparams.shares = shares;
1122 return 0;
Chinyue Chenfac909e2016-06-24 14:17:42 +08001123}
1124
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001125int container_config_set_cpu_cfs_params(struct container_config* c,
1126 int quota,
1127 int period) {
1128 /*
1129 * quota could be set higher than period to utilize more than one CPU.
1130 * quota could also be set as -1 to indicate the cgroup does not adhere
1131 * to any CPU time restrictions.
1132 */
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001133 if (quota <= 0 && quota != -1) {
1134 errno = EINVAL;
1135 return -1;
1136 }
1137 if (period <= 0) {
1138 errno = EINVAL;
1139 return -1;
1140 }
Chinyue Chenfac909e2016-06-24 14:17:42 +08001141
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001142 c->cpu_cgparams.quota = quota;
1143 c->cpu_cgparams.period = period;
1144 return 0;
Chinyue Chenfac909e2016-06-24 14:17:42 +08001145}
1146
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001147int container_config_set_cpu_rt_params(struct container_config* c,
1148 int rt_runtime,
1149 int rt_period) {
1150 /*
1151 * rt_runtime could be set as 0 to prevent the cgroup from using
1152 * realtime CPU.
1153 */
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001154 if (rt_runtime < 0 || rt_runtime >= rt_period) {
1155 errno = EINVAL;
1156 return -1;
1157 }
Chinyue Chenfac909e2016-06-24 14:17:42 +08001158
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001159 c->cpu_cgparams.rt_runtime = rt_runtime;
1160 c->cpu_cgparams.rt_period = rt_period;
1161 return 0;
Chinyue Chenfac909e2016-06-24 14:17:42 +08001162}
1163
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001164int container_config_get_cpu_shares(struct container_config* c) {
1165 return c->cpu_cgparams.shares;
Chinyue Chen4f3fd682016-07-01 14:11:42 +08001166}
1167
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001168int container_config_get_cpu_quota(struct container_config* c) {
1169 return c->cpu_cgparams.quota;
Chinyue Chen4f3fd682016-07-01 14:11:42 +08001170}
1171
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001172int container_config_get_cpu_period(struct container_config* c) {
1173 return c->cpu_cgparams.period;
Chinyue Chen4f3fd682016-07-01 14:11:42 +08001174}
1175
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001176int container_config_get_cpu_rt_runtime(struct container_config* c) {
1177 return c->cpu_cgparams.rt_runtime;
Chinyue Chen4f3fd682016-07-01 14:11:42 +08001178}
1179
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001180int container_config_get_cpu_rt_period(struct container_config* c) {
1181 return c->cpu_cgparams.rt_period;
Chinyue Chen4f3fd682016-07-01 14:11:42 +08001182}
1183
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001184int container_config_set_cgroup_parent(struct container_config* c,
1185 const char* parent,
1186 uid_t cgroup_owner,
1187 gid_t cgroup_group) {
1188 c->cgroup_owner = cgroup_owner;
1189 c->cgroup_group = cgroup_group;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001190 c->cgroup_parent = base::FilePath(parent);
1191 return 0;
Dylan Reid9e724af2016-07-21 09:58:07 -07001192}
1193
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001194const char* container_config_get_cgroup_parent(struct container_config* c) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001195 return c->cgroup_parent.value().c_str();
Dylan Reid9e724af2016-07-21 09:58:07 -07001196}
1197
Stephen Barber771653f2017-10-04 23:48:57 -07001198int container_config_namespaces(struct container_config* c,
1199 const char** namespaces,
1200 size_t num_ns) {
1201 if (num_ns < 1)
1202 return -EINVAL;
1203 c->namespaces.clear();
1204 for (size_t i = 0; i < num_ns; ++i)
1205 c->namespaces.emplace(namespaces[i]);
1206 return 0;
Keshav Santhanam1b6bf672016-08-10 18:35:12 -07001207}
1208
Stephen Barber771653f2017-10-04 23:48:57 -07001209size_t container_config_get_num_namespaces(const struct container_config* c) {
1210 return c->namespaces.size();
1211}
1212
1213bool container_config_has_namespace(const struct container_config* c,
1214 const char* ns) {
1215 return c->namespaces.find(ns) != c->namespaces.end();
Keshav Santhanam1b6bf672016-08-10 18:35:12 -07001216}
1217
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001218void container_config_keep_fds_open(struct container_config* c) {
yusukesf125f332017-12-08 13:45:15 -08001219 c->keep_fds_open = true;
Dylan Reidc4335842016-11-11 10:24:52 -08001220}
1221
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001222void container_config_set_capmask(struct container_config* c,
1223 uint64_t capmask,
1224 int ambient) {
yusukesf125f332017-12-08 13:45:15 -08001225 c->use_capmask = true;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001226 c->capmask = capmask;
1227 c->use_capmask_ambient = ambient;
Luis Hector Chavezff5978f2017-06-27 12:52:58 -07001228}
1229
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001230void container_config_set_securebits_skip_mask(struct container_config* c,
1231 uint64_t securebits_skip_mask) {
1232 c->securebits_skip_mask = securebits_skip_mask;
Luis Hector Chavezcd44ba72017-06-30 13:01:38 -07001233}
1234
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001235void container_config_set_run_as_init(struct container_config* c,
1236 int run_as_init) {
1237 c->do_init = !run_as_init;
Luis Hector Chavezdac65c32017-07-21 10:30:23 -07001238}
1239
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001240int container_config_set_selinux_context(struct container_config* c,
1241 const char* context) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001242 if (!context) {
1243 errno = EINVAL;
1244 return -1;
1245 }
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001246 c->selinux_context = context;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001247 return 0;
Luis Hector Chavez15e8e672017-07-20 15:13:27 -07001248}
1249
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001250void container_config_set_pre_execve_hook(struct container_config* c,
1251 int (*hook)(void*),
1252 void* payload) {
1253 c->pre_start_hook = hook;
1254 c->pre_start_hook_payload = payload;
Luis Hector Chavezf8e8f4c2017-08-01 01:09:39 -07001255}
1256
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001257void container_config_add_hook(struct container_config* c,
1258 minijail_hook_event_t event,
1259 libcontainer::HookCallback callback) {
1260 auto it = c->hooks.insert(
1261 std::make_pair(event, std::vector<libcontainer::HookCallback>()));
1262 it.first->second.emplace_back(std::move(callback));
1263}
1264
Luis Hector Chaveze03926a2017-09-28 17:28:49 -07001265int container_config_add_hook(struct container_config* c,
1266 minijail_hook_event_t event,
1267 const char* filename,
1268 const char** argv,
1269 size_t num_args,
1270 int* pstdin_fd,
1271 int* pstdout_fd,
1272 int* pstderr_fd) {
1273 std::vector<std::string> args;
1274 args.reserve(num_args);
1275 for (size_t i = 0; i < num_args; ++i)
1276 args.emplace_back(argv[i]);
1277
1278 // First element of the array belongs to the parent and the second one belongs
1279 // to the child.
1280 base::ScopedFD stdin_fds[2], stdout_fds[2], stderr_fds[2];
1281 if (pstdin_fd) {
1282 if (!libcontainer::Pipe2(&stdin_fds[1], &stdin_fds[0], 0))
1283 return -1;
1284 }
1285 if (pstdout_fd) {
1286 if (!libcontainer::Pipe2(&stdout_fds[0], &stdout_fds[0], 0))
1287 return -1;
1288 }
1289 if (pstderr_fd) {
1290 if (!libcontainer::Pipe2(&stderr_fds[0], &stderr_fds[0], 0))
1291 return -1;
1292 }
1293
1294 // After this point the call has been successful, so we can now commit to
1295 // whatever pipes we have opened.
1296 if (pstdin_fd) {
1297 *pstdin_fd = stdin_fds[0].release();
1298 c->inherited_fds.emplace_back(stdin_fds[1].get());
1299 }
1300 if (pstdout_fd) {
1301 *pstdout_fd = stdout_fds[0].release();
1302 c->inherited_fds.emplace_back(stdout_fds[1].get());
1303 }
1304 if (pstderr_fd) {
1305 *pstderr_fd = stderr_fds[0].release();
1306 c->inherited_fds.emplace_back(stderr_fds[1].get());
1307 }
1308 container_config_add_hook(
1309 c, event,
1310 libcontainer::CreateExecveCallback(
1311 base::FilePath(filename), args, std::move(stdin_fds[1]),
1312 std::move(stdout_fds[1]), std::move(stderr_fds[1])));
1313 return 0;
1314}
1315
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001316int container_config_inherit_fds(struct container_config* c,
1317 int* inherited_fds,
1318 size_t inherited_fd_count) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001319 if (!c->inherited_fds.empty()) {
1320 errno = EINVAL;
1321 return -1;
1322 }
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001323 for (size_t i = 0; i < inherited_fd_count; ++i)
1324 c->inherited_fds.emplace_back(inherited_fds[i]);
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001325 return 0;
Luis Hector Chavezf8e8f4c2017-08-01 01:09:39 -07001326}
1327
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001328struct container* container_new(const char* name, const char* rundir) {
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001329 struct container* c = new (std::nothrow) container();
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001330 if (!c)
1331 return nullptr;
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001332 c->rundir = base::FilePath(rundir);
1333 c->name = name;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001334 return c;
Dylan Reid837c74a2016-01-22 17:25:21 -08001335}
1336
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001337void container_destroy(struct container* c) {
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001338 delete c;
Dylan Reid837c74a2016-01-22 17:25:21 -08001339}
1340
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001341int container_start(struct container* c,
1342 const struct container_config* config) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001343 if (!c) {
1344 errno = EINVAL;
1345 return -1;
1346 }
1347 if (!config) {
1348 errno = EINVAL;
1349 return -1;
1350 }
1351 if (config->program_argv.empty()) {
1352 errno = EINVAL;
1353 return -1;
1354 }
Dylan Reide040c6b2016-05-02 18:49:02 -07001355
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001356 // This will run in all the error cases.
1357 base::ScopedClosureRunner teardown(
Luis Hector Chavez15d0d1a2017-10-12 09:30:19 -07001358 base::Bind(&CancelContainerStart, base::Unretained(c)));
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001359
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001360 if (!config->config_root.empty())
1361 c->config_root = config->config_root;
1362 if (!config->premounted_runfs.empty()) {
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001363 c->runfs.clear();
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001364 c->runfsroot = config->premounted_runfs;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001365 } else {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001366 if (!MountRunfs(c, config))
1367 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001368 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001369
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001370 c->jail.reset(minijail_new());
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001371 if (!c->jail) {
1372 errno = ENOMEM;
1373 return -1;
1374 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001375
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001376 if (!DoContainerMounts(c, config))
1377 return -1;
Dylan Reid837c74a2016-01-22 17:25:21 -08001378
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001379 int cgroup_uid;
1380 if (!GetUsernsOutsideId(config->uid_map, config->cgroup_owner, &cgroup_uid))
1381 return -1;
1382 int cgroup_gid;
1383 if (!GetUsernsOutsideId(config->gid_map, config->cgroup_group, &cgroup_gid))
1384 return -1;
Stephen Barber1a398c72017-01-23 12:39:44 -08001385
Luis Hector Chavez76ae9ac2017-09-20 21:13:08 -07001386 c->cgroup = libcontainer::Cgroup::Create(c->name,
1387 base::FilePath("/sys/fs/cgroup"),
1388 config->cgroup_parent,
1389 cgroup_uid,
1390 cgroup_gid);
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001391 if (!c->cgroup)
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001392 return -1;
Dylan Reida9966422016-07-21 10:11:34 -07001393
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001394 // Must be root to modify device cgroup or mknod.
1395 std::map<minijail_hook_event_t, std::vector<libcontainer::HookCallback>>
1396 hook_callbacks;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001397 if (getuid() == 0) {
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001398 if (!config->devices.empty()) {
1399 // Create the devices in the mount namespace.
1400 auto it = hook_callbacks.insert(
1401 std::make_pair(MINIJAIL_HOOK_EVENT_PRE_CHROOT,
1402 std::vector<libcontainer::HookCallback>()));
1403 it.first->second.emplace_back(
1404 libcontainer::AdaptCallbackToRunInNamespaces(
1405 base::Bind(&CreateDeviceNodes, base::Unretained(c),
1406 base::Unretained(config)),
1407 {CLONE_NEWNS}));
1408 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001409 if (!DeviceSetup(c, config))
1410 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001411 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001412
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001413 // Potentially run setfiles on mounts configured outside of the jail.
1414 if (!config->run_setfiles.empty()) {
1415 const base::FilePath kDataPath("/data");
1416 const base::FilePath kCachePath("/cache");
1417 std::vector<base::FilePath> destinations;
1418 for (const auto& mnt : config->mounts) {
1419 if (mnt.mount_in_ns)
1420 continue;
1421 if (mnt.flags & MS_RDONLY)
1422 continue;
Yusuke Sato91f11f02016-12-02 16:15:13 -08001423
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001424 // A hack to avoid setfiles on /data and /cache.
1425 if (mnt.destination == kDataPath || mnt.destination == kCachePath)
1426 continue;
Yusuke Sato91f11f02016-12-02 16:15:13 -08001427
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001428 destinations.emplace_back(
1429 GetPathInOuterNamespace(c->runfsroot, mnt.destination));
1430 }
1431
1432 if (!destinations.empty()) {
1433 auto it = hook_callbacks.insert(
1434 std::make_pair(MINIJAIL_HOOK_EVENT_PRE_CHROOT,
1435 std::vector<libcontainer::HookCallback>()));
1436 it.first->second.emplace_back(
1437 libcontainer::AdaptCallbackToRunInNamespaces(
1438 base::Bind(&RunSetfilesCommand, base::Unretained(c),
1439 base::Unretained(config), destinations),
1440 {CLONE_NEWNS}));
1441 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001442 }
Dylan Reidd7229582016-04-27 17:08:40 -07001443
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001444 /* Setup CPU cgroup params. */
1445 if (config->cpu_cgparams.shares) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001446 if (!c->cgroup->SetCpuShares(config->cpu_cgparams.shares))
1447 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001448 }
1449 if (config->cpu_cgparams.period) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001450 if (!c->cgroup->SetCpuQuota(config->cpu_cgparams.quota))
1451 return -1;
1452 if (!c->cgroup->SetCpuPeriod(config->cpu_cgparams.period))
1453 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001454 }
1455 if (config->cpu_cgparams.rt_period) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001456 if (!c->cgroup->SetCpuRtRuntime(config->cpu_cgparams.rt_runtime))
1457 return -1;
1458 if (!c->cgroup->SetCpuRtPeriod(config->cpu_cgparams.rt_period))
1459 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001460 }
Chinyue Chenfac909e2016-06-24 14:17:42 +08001461
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001462 /* Setup and start the container with libminijail. */
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001463 if (!config->pid_file_path.empty())
1464 c->pid_file_path = config->pid_file_path;
1465 else if (!c->runfs.empty())
1466 c->pid_file_path = c->runfs.Append("container.pid");
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001467
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001468 if (!c->pid_file_path.empty())
1469 minijail_write_pid_file(c->jail.get(), c->pid_file_path.value().c_str());
1470 minijail_reset_signal_mask(c->jail.get());
Dylan Reid837c74a2016-01-22 17:25:21 -08001471
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001472 /* Setup container namespaces. */
Stephen Barber771653f2017-10-04 23:48:57 -07001473 if (container_config_has_namespace(config, "ipc"))
1474 minijail_namespace_ipc(c->jail.get());
1475 if (container_config_has_namespace(config, "mount"))
1476 minijail_namespace_vfs(c->jail.get());
1477 if (container_config_has_namespace(config, "network"))
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001478 minijail_namespace_net(c->jail.get());
Stephen Barber771653f2017-10-04 23:48:57 -07001479 if (container_config_has_namespace(config, "pid"))
1480 minijail_namespace_pids(c->jail.get());
1481
1482 if (container_config_has_namespace(config, "user")) {
1483 minijail_namespace_user(c->jail.get());
1484 if (minijail_uidmap(c->jail.get(), config->uid_map.c_str()) != 0)
1485 return -1;
1486 if (minijail_gidmap(c->jail.get(), config->gid_map.c_str()) != 0)
1487 return -1;
1488 }
1489
1490 if (container_config_has_namespace(config, "cgroup"))
1491 minijail_namespace_cgroups(c->jail.get());
1492
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001493 if (getuid() != 0)
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001494 minijail_namespace_user_disable_setgroups(c->jail.get());
Dylan Reid837c74a2016-01-22 17:25:21 -08001495
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001496 /* Set the UID/GID inside the container if not 0. */
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001497 if (!GetUsernsOutsideId(config->uid_map, config->uid, nullptr))
1498 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001499 else if (config->uid > 0)
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001500 minijail_change_uid(c->jail.get(), config->uid);
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001501 if (!GetUsernsOutsideId(config->gid_map, config->gid, nullptr))
1502 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001503 else if (config->gid > 0)
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001504 minijail_change_gid(c->jail.get(), config->gid);
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001505
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001506 if (minijail_enter_pivot_root(c->jail.get(), c->runfsroot.value().c_str()) !=
1507 0) {
1508 return -1;
1509 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001510
Luis Hector Chavez76ae9ac2017-09-20 21:13:08 -07001511 // Add the cgroups configured above.
1512 for (int32_t i = 0; i < libcontainer::Cgroup::Type::NUM_TYPES; i++) {
1513 if (c->cgroup->has_tasks_path(i)) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001514 if (minijail_add_to_cgroup(
1515 c->jail.get(), c->cgroup->tasks_path(i).value().c_str()) != 0) {
1516 return -1;
1517 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001518 }
1519 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001520
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001521 if (!config->alt_syscall_table.empty())
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001522 minijail_use_alt_syscall(c->jail.get(), config->alt_syscall_table.c_str());
Dylan Reid837c74a2016-01-22 17:25:21 -08001523
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001524 for (int i = 0; i < config->num_rlimits; i++) {
Luis Hector Chaveze1062e82017-09-18 09:57:37 -07001525 const Rlimit& lim = config->rlimits[i];
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001526 if (minijail_rlimit(c->jail.get(), lim.type, lim.cur, lim.max) != 0)
1527 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001528 }
Dylan Reid93fa4602017-06-06 13:39:31 -07001529
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001530 if (!config->selinux_context.empty()) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001531 if (minijail_add_hook(c->jail.get(), &Setexeccon,
1532 const_cast<char*>(config->selinux_context.c_str()),
1533 MINIJAIL_HOOK_EVENT_PRE_EXECVE) != 0) {
1534 return -1;
1535 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001536 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001537
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001538 if (config->pre_start_hook) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001539 if (minijail_add_hook(c->jail.get(), config->pre_start_hook,
1540 config->pre_start_hook_payload,
1541 MINIJAIL_HOOK_EVENT_PRE_EXECVE) != 0) {
1542 return -1;
1543 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001544 }
Luis Hector Chavezf8e8f4c2017-08-01 01:09:39 -07001545
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001546 // Now that all pre-requisite hooks are installed, copy the ones in the
1547 // container_config object in the correct order.
1548 for (const auto& config_hook : config->hooks) {
1549 auto it = hook_callbacks.insert(std::make_pair(
1550 config_hook.first, std::vector<libcontainer::HookCallback>()));
1551 it.first->second.insert(it.first->second.end(), config_hook.second.begin(),
1552 config_hook.second.end());
1553 }
1554
1555 c->hook_states.clear();
1556 // Reserve enough memory to hold all the hooks, so that their addresses do not
1557 // get invalidated by reallocation.
1558 c->hook_states.reserve(MINIJAIL_HOOK_EVENT_MAX);
1559 for (minijail_hook_event_t event : {MINIJAIL_HOOK_EVENT_PRE_CHROOT,
1560 MINIJAIL_HOOK_EVENT_PRE_DROP_CAPS,
1561 MINIJAIL_HOOK_EVENT_PRE_EXECVE}) {
1562 const auto& it = hook_callbacks.find(event);
1563 if (it == hook_callbacks.end())
1564 continue;
1565 c->hook_states.emplace_back(
1566 std::make_pair(libcontainer::HookState(), it->second));
1567 if (!c->hook_states.back().first.InstallHook(c->jail.get(), event))
1568 return -1;
1569 }
1570
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001571 for (int fd : config->inherited_fds) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001572 if (minijail_preserve_fd(c->jail.get(), fd, fd) != 0)
1573 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001574 }
Luis Hector Chavezf8e8f4c2017-08-01 01:09:39 -07001575
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001576 /* TODO(dgreid) - remove this once shared mounts are cleaned up. */
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001577 minijail_skip_remount_private(c->jail.get());
Dylan Reid3da683b2016-04-05 03:35:35 -07001578
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001579 if (!config->keep_fds_open)
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001580 minijail_close_open_fds(c->jail.get());
Luis Hector Chaveze18e7d42016-10-12 07:35:32 -07001581
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001582 if (config->use_capmask) {
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001583 minijail_use_caps(c->jail.get(), config->capmask);
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001584 if (config->use_capmask_ambient)
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001585 minijail_set_ambient_caps(c->jail.get());
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001586 if (config->securebits_skip_mask) {
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001587 minijail_skip_setting_securebits(c->jail.get(),
1588 config->securebits_skip_mask);
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001589 }
1590 }
Luis Hector Chavezff5978f2017-06-27 12:52:58 -07001591
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001592 if (!config->do_init)
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001593 minijail_run_as_init(c->jail.get());
Luis Hector Chavezdac65c32017-07-21 10:30:23 -07001594
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001595 std::vector<char*> argv_cstr;
1596 argv_cstr.reserve(config->program_argv.size() + 1);
1597 for (const auto& arg : config->program_argv)
1598 argv_cstr.emplace_back(const_cast<char*>(arg.c_str()));
1599 argv_cstr.emplace_back(nullptr);
1600
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001601 if (minijail_run_pid_pipes_no_preload(c->jail.get(), argv_cstr[0],
1602 argv_cstr.data(), &c->init_pid, nullptr,
1603 nullptr, nullptr) != 0) {
1604 return -1;
1605 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001606
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001607 // |hook_states| is already sorted in the correct order.
1608 for (auto& hook_state : c->hook_states) {
1609 if (!hook_state.first.WaitForHookAndRun(hook_state.second, c->init_pid))
1610 return -1;
1611 }
1612
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001613 // The container has started successfully, no need to tear it down anymore.
1614 ignore_result(teardown.Release());
1615 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -08001616}
1617
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001618const char* container_root(struct container* c) {
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001619 return c->runfs.value().c_str();
Dylan Reid837c74a2016-01-22 17:25:21 -08001620}
1621
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001622int container_pid(struct container* c) {
1623 return c->init_pid;
Dylan Reid837c74a2016-01-22 17:25:21 -08001624}
1625
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001626int container_wait(struct container* c) {
1627 int rc;
Dylan Reidcf745c52016-04-22 10:18:03 -07001628
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001629 do {
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001630 rc = minijail_wait(c->jail.get());
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001631 } while (rc == -EINTR);
Dylan Reidcf745c52016-04-22 10:18:03 -07001632
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001633 // If the process had already been reaped, still perform teardown.
1634 if (rc == -ECHILD || rc >= 0) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001635 if (!ContainerTeardown(c))
1636 rc = -errno;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001637 }
1638 return rc;
Dylan Reid837c74a2016-01-22 17:25:21 -08001639}
1640
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001641int container_kill(struct container* c) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001642 if (kill(c->init_pid, SIGKILL) != 0 && errno != ESRCH) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -07001643 PLOG(ERROR) << "Failed to kill " << c->init_pid;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001644 return -errno;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -07001645 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001646 return container_wait(c);
Dylan Reid837c74a2016-01-22 17:25:21 -08001647}
yusukesbbc37a72017-11-21 09:51:54 -08001648
yusukes32622542018-01-05 18:59:52 -08001649char* container_config_dump(struct container_config* c, int sort_vectors) {
yusukesbbc37a72017-11-21 09:51:54 -08001650 std::stringstream out;
yusukes32622542018-01-05 18:59:52 -08001651 DumpConfig(&out, c, sort_vectors);
yusukesbbc37a72017-11-21 09:51:54 -08001652 return strdup(out.str().c_str());
1653}