blob: 126b2c5532c3f1e78c5de7347ff3af69a70610fa [file] [log] [blame]
Luis Hector Chavez81efb332017-09-18 14:01:29 -07001// Copyright 2016 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
Dylan Reid837c74a2016-01-22 17:25:21 -08004
Dylan Reid837c74a2016-01-22 17:25:21 -08005#include <errno.h>
6#include <fcntl.h>
Dylan Reid837c74a2016-01-22 17:25:21 -08007#include <signal.h>
Luis Hector Chavezff5978f2017-06-27 12:52:58 -07008#include <stdint.h>
Dylan Reid837c74a2016-01-22 17:25:21 -08009#include <stdlib.h>
10#include <string.h>
11#include <sys/mount.h>
12#include <sys/stat.h>
13#include <sys/types.h>
Dylan Reid2bd9ea92016-04-07 20:57:47 -070014#include <sys/wait.h>
Luis Hector Chavez836d7b22017-09-14 15:11:15 -070015#include <syscall.h>
Dylan Reid837c74a2016-01-22 17:25:21 -080016#include <unistd.h>
17
Luis Hector Chavez644d2042017-09-19 18:56:44 -070018#include <map>
Luis Hector Chavez5381d002017-09-16 12:54:24 -070019#include <memory>
Stephen Barber771653f2017-10-04 23:48:57 -070020#include <set>
Luis Hector Chavez5381d002017-09-16 12:54:24 -070021#include <string>
Luis Hector Chavez644d2042017-09-19 18:56:44 -070022#include <utility>
Luis Hector Chavez5381d002017-09-16 12:54:24 -070023#include <vector>
24
25#include <base/bind.h>
26#include <base/bind_helpers.h>
27#include <base/callback_helpers.h>
28#include <base/files/file_path.h>
29#include <base/files/file_util.h>
30#include <base/files/scoped_file.h>
Luis Hector Chavez835d39e2017-09-19 15:16:31 -070031#include <base/logging.h>
Luis Hector Chavez5381d002017-09-16 12:54:24 -070032#include <base/macros.h>
33#include <base/strings/string_util.h>
34#include <base/strings/stringprintf.h>
Luis Hector Chavez836d7b22017-09-14 15:11:15 -070035#include <libminijail.h>
Luis Hector Chavez626f5c82017-09-18 11:19:32 -070036#include <scoped_minijail.h>
Mike Frysinger412dbd22017-01-06 01:50:34 -050037
Luis Hector Chavez76ae9ac2017-09-20 21:13:08 -070038#include "libcontainer/cgroup.h"
Luis Hector Chavez644d2042017-09-19 18:56:44 -070039#include "libcontainer/config.h"
Luis Hector Chavez836d7b22017-09-14 15:11:15 -070040#include "libcontainer/libcontainer.h"
Luis Hector Chavez81efb332017-09-18 14:01:29 -070041#include "libcontainer/libcontainer_util.h"
Yusuke Sato91f11f02016-12-02 16:15:13 -080042
Luis Hector Chavez5381d002017-09-16 12:54:24 -070043namespace {
44
Luis Hector Chavez81efb332017-09-18 14:01:29 -070045using libcontainer::DeviceMapperDetach;
46using libcontainer::DeviceMapperSetup;
47using libcontainer::GetUsernsOutsideId;
48using libcontainer::LoopdevDetach;
49using libcontainer::LoopdevSetup;
50using libcontainer::MakeDir;
51using libcontainer::MountExternal;
52using libcontainer::TouchFile;
Mike Frysinger412dbd22017-01-06 01:50:34 -050053
Luis Hector Chavez81efb332017-09-18 14:01:29 -070054constexpr size_t kMaxNumSetfilesArgs = 128;
55constexpr size_t kMaxRlimits = 32; // Linux defines 15 at the time of writing.
Luis Hector Chavez479b95f2016-06-06 08:01:05 -070056
Luis Hector Chavez5381d002017-09-16 12:54:24 -070057struct Mount {
58 std::string name;
59 base::FilePath source;
60 base::FilePath destination;
61 std::string type;
62 std::string data;
63 std::string verity;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -070064 int flags;
65 int uid;
66 int gid;
67 int mode;
Luis Hector Chavez5381d002017-09-16 12:54:24 -070068
69 // True if mount should happen in new vfs ns.
70 bool mount_in_ns;
71
72 // True if target should be created if it doesn't exist.
73 bool create;
74
75 // True if target should be mounted via loopback.
76 bool loopback;
Dylan Reid837c74a2016-01-22 17:25:21 -080077};
78
Luis Hector Chaveze1062e82017-09-18 09:57:37 -070079struct Device {
80 // 'c' or 'b' for char or block
81 char type;
82 base::FilePath path;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -070083 int fs_permissions;
84 int major;
85 int minor;
Luis Hector Chaveze1062e82017-09-18 09:57:37 -070086
87 // Copy the minor from existing node, ignores |minor|.
88 bool copy_minor;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -070089 int uid;
90 int gid;
Dylan Reid4843d6b2017-03-31 18:14:30 -070091};
92
Luis Hector Chaveze1062e82017-09-18 09:57:37 -070093struct CgroupDevice {
94 bool allow;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -070095 char type;
Luis Hector Chaveze1062e82017-09-18 09:57:37 -070096
97 // -1 for either major or minor means all.
98 int major;
99 int minor;
100
101 bool read;
102 bool write;
103 bool modify;
Dylan Reid837c74a2016-01-22 17:25:21 -0800104};
105
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700106struct CpuCgroup {
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700107 int shares;
108 int quota;
109 int period;
110 int rt_runtime;
111 int rt_period;
Chinyue Chenfac909e2016-06-24 14:17:42 +0800112};
113
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700114struct Rlimit {
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700115 int type;
116 uint32_t cur;
117 uint32_t max;
Dylan Reid93fa4602017-06-06 13:39:31 -0700118};
119
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700120} // namespace
121
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700122// Structure that configures how the container is run.
Dylan Reid837c74a2016-01-22 17:25:21 -0800123struct container_config {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700124 // Path to the root of the container itself.
125 base::FilePath config_root;
126
127 // Path to the root of the container's filesystem.
128 base::FilePath rootfs;
129
130 // Flags that will be passed to mount() for the rootfs.
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700131 unsigned long rootfs_mount_flags;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700132
133 // Path to where the container will be run.
134 base::FilePath premounted_runfs;
135
136 // Path to the file where the pid should be written.
137 base::FilePath pid_file_path;
138
139 // The program to run and args, e.g. "/sbin/init".
140 std::vector<std::string> program_argv;
141
142 // The uid the container will run as.
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700143 uid_t uid;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700144
145 // Mapping of UIDs in the container, e.g. "0 100000 1024"
146 std::string uid_map;
147
148 // The gid the container will run as.
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700149 gid_t gid;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700150
151 // Mapping of GIDs in the container, e.g. "0 100000 1024"
152 std::string gid_map;
153
154 // Syscall table to use or nullptr if none.
155 std::string alt_syscall_table;
156
157 // Filesystems to mount in the new namespace.
Luis Hector Chavez5381d002017-09-16 12:54:24 -0700158 std::vector<Mount> mounts;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700159
Stephen Barber771653f2017-10-04 23:48:57 -0700160 // Namespaces that should be used for the container.
161 std::set<std::string> namespaces;
162
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700163 // Device nodes to create.
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700164 std::vector<Device> devices;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700165
166 // Device node cgroup permissions.
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700167 std::vector<CgroupDevice> cgroup_devices;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700168
169 // Should run setfiles on mounts to enable selinux.
170 std::string run_setfiles;
171
172 // CPU cgroup params.
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700173 CpuCgroup cpu_cgparams;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700174
175 // Parent dir for cgroup creation
176 base::FilePath cgroup_parent;
177
178 // uid to own the created cgroups
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700179 uid_t cgroup_owner;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700180
181 // gid to own the created cgroups
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700182 gid_t cgroup_group;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700183
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700184 // Allow the child process to keep open FDs (for stdin/out/err).
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700185 int keep_fds_open;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700186
187 // Array of rlimits for the contained process.
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700188 Rlimit rlimits[kMaxRlimits];
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700189
190 // The number of elements in `rlimits`.
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700191 int num_rlimits;
192 int use_capmask;
193 int use_capmask_ambient;
194 uint64_t capmask;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700195
196 // The mask of securebits to skip when restricting caps.
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700197 uint64_t securebits_skip_mask;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700198
199 // Whether the container needs an extra process to be run as init.
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700200 int do_init;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700201
202 // The SELinux context name the container will run under.
203 std::string selinux_context;
204
205 // A function pointer to be called prior to calling execve(2).
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700206 minijail_hook_t pre_start_hook;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700207
208 // Parameter that will be passed to pre_start_hook().
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700209 void* pre_start_hook_payload;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700210
Luis Hector Chaveze03926a2017-09-28 17:28:49 -0700211 // A list of file descriptors to inherit.
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700212 std::vector<int> inherited_fds;
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700213
214 // A list of hooks that will be called upon minijail reaching various states
215 // of execution.
216 std::map<minijail_hook_event_t, std::vector<libcontainer::HookCallback>>
217 hooks;
Dylan Reid837c74a2016-01-22 17:25:21 -0800218};
219
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700220// Container manipulation
221struct container {
Luis Hector Chavez76ae9ac2017-09-20 21:13:08 -0700222 std::unique_ptr<libcontainer::Cgroup> cgroup;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700223 ScopedMinijail jail;
Luis Hector Chavez15d0d1a2017-10-12 09:30:19 -0700224 pid_t init_pid = -1;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700225 base::FilePath config_root;
226 base::FilePath runfs;
227 base::FilePath rundir;
228 base::FilePath runfsroot;
229 base::FilePath pid_file_path;
230
231 // Mounts made outside of the minijail.
232 std::vector<base::FilePath> ext_mounts;
233 std::vector<base::FilePath> loopdev_paths;
234 std::vector<std::string> device_mappers;
235 std::string name;
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700236
237 std::vector<std::pair<libcontainer::HookState,
238 std::vector<libcontainer::HookCallback>>>
239 hook_states;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700240};
241
242namespace {
243
244// Returns the path for |path_in_container| in the outer namespace.
245base::FilePath GetPathInOuterNamespace(
246 const base::FilePath& root, const base::FilePath& path_in_container) {
247 if (path_in_container.IsAbsolute())
248 return base::FilePath(root.value() + path_in_container.value());
249 return root.Append(path_in_container);
250}
251
252// Make sure the mount target exists in the new rootfs. Create if needed and
253// possible.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700254bool SetupMountDestination(const struct container_config* config,
255 const Mount& mount,
256 const base::FilePath& source,
257 const base::FilePath& dest) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700258 struct stat st_buf;
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700259 if (stat(dest.value().c_str(), &st_buf) == 0) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700260 // destination exists.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700261 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700262 }
263
264 // Try to create the destination. Either make directory or touch a file
265 // depending on the source type.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700266 int uid_userns;
267 if (!GetUsernsOutsideId(config->uid_map, mount.uid, &uid_userns))
268 return false;
269 int gid_userns;
270 if (!GetUsernsOutsideId(config->gid_map, mount.gid, &gid_userns))
271 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700272
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700273 if (stat(source.value().c_str(), &st_buf) != 0 || S_ISDIR(st_buf.st_mode) ||
274 S_ISBLK(st_buf.st_mode)) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700275 return MakeDir(dest, uid_userns, gid_userns, mount.mode);
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700276 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700277
278 return TouchFile(dest, uid_userns, gid_userns, mount.mode);
279}
280
281// Fork and exec the setfiles command to configure the selinux policy.
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700282bool RunSetfilesCommand(const struct container* c,
283 const struct container_config* config,
284 const std::vector<base::FilePath>& destinations,
285 pid_t container_pid) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700286 int pid = fork();
287 if (pid == 0) {
288 size_t arg_index = 0;
289 const char* argv[kMaxNumSetfilesArgs];
290 const char* env[] = {
291 nullptr,
292 };
293
294 base::FilePath context_path = c->runfsroot.Append("file_contexts");
295
296 argv[arg_index++] = config->run_setfiles.c_str();
297 argv[arg_index++] = "-r";
298 argv[arg_index++] = c->runfsroot.value().c_str();
299 argv[arg_index++] = context_path.value().c_str();
300 if (arg_index + destinations.size() >= kMaxNumSetfilesArgs)
301 _exit(-E2BIG);
302 for (const auto& destination : destinations)
303 argv[arg_index++] = destination.value().c_str();
304 argv[arg_index] = nullptr;
305
306 execve(
307 argv[0], const_cast<char* const*>(argv), const_cast<char* const*>(env));
308
309 /* Command failed to exec if execve returns. */
310 _exit(-errno);
311 }
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700312 if (pid < 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700313 PLOG(ERROR) << "Failed to fork to run setfiles";
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700314 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700315 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700316
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700317 int status;
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700318 if (HANDLE_EINTR(waitpid(pid, &status, 0)) < 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700319 PLOG(ERROR) << "Failed to wait for setfiles";
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700320 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700321 }
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700322 if (!WIFEXITED(status)) {
323 LOG(ERROR) << "setfiles did not terminate cleanly";
324 return false;
325 }
326 if (WEXITSTATUS(status) != 0) {
327 LOG(ERROR) << "setfiles exited with non-zero status: "
328 << WEXITSTATUS(status);
329 return false;
330 }
331 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700332}
333
334// Unmounts anything we mounted in this mount namespace in the opposite order
335// that they were mounted.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700336bool UnmountExternalMounts(struct container* c) {
337 bool ret = true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700338
339 for (auto it = c->ext_mounts.rbegin(); it != c->ext_mounts.rend(); ++it) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700340 if (umount(it->value().c_str()) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700341 PLOG(ERROR) << "Failed to unmount " << it->value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700342 ret = false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700343 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700344 }
345 c->ext_mounts.clear();
346
347 for (auto it = c->loopdev_paths.rbegin(); it != c->loopdev_paths.rend();
348 ++it) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700349 if (!LoopdevDetach(*it))
350 ret = false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700351 }
352 c->loopdev_paths.clear();
353
354 for (auto it = c->device_mappers.rbegin(); it != c->device_mappers.rend();
355 ++it) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700356 if (!DeviceMapperDetach(*it))
357 ret = false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700358 }
359 c->device_mappers.clear();
360
361 return ret;
362}
363
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700364bool DoContainerMount(struct container* c,
365 const struct container_config* config,
366 const Mount& mount) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700367 base::FilePath dest =
368 GetPathInOuterNamespace(c->runfsroot, mount.destination);
369
370 // If it's a bind mount relative to rootfs, append source to
371 // rootfs path, otherwise source path is absolute.
372 base::FilePath source;
373 if ((mount.flags & MS_BIND) && !mount.source.IsAbsolute()) {
374 source = GetPathInOuterNamespace(c->runfsroot, mount.source);
375 } else if (mount.loopback && !mount.source.IsAbsolute() &&
376 !c->config_root.empty()) {
377 source = GetPathInOuterNamespace(c->config_root, mount.source);
378 } else {
379 source = mount.source;
380 }
381
382 // Only create the destinations for external mounts, minijail will take
383 // care of those mounted in the new namespace.
384 if (mount.create && !mount.mount_in_ns) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700385 if (!SetupMountDestination(config, mount, source, dest))
386 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700387 }
388 if (mount.loopback) {
389 // Record this loopback file for cleanup later.
390 base::FilePath loop_source = source;
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700391 if (!LoopdevSetup(loop_source, &source))
392 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700393
394 // Save this to cleanup when shutting down.
395 c->loopdev_paths.push_back(source);
396 }
397 if (!mount.verity.empty()) {
398 // Set this device up via dm-verity.
399 std::string dm_name;
400 base::FilePath dm_source = source;
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700401 if (!DeviceMapperSetup(dm_source, mount.verity, &source, &dm_name))
402 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700403
404 // Save this to cleanup when shutting down.
405 c->device_mappers.push_back(dm_name);
406 }
407 if (mount.mount_in_ns) {
408 // We can mount this with minijail.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700409 if (minijail_mount_with_data(
410 c->jail.get(), source.value().c_str(),
411 mount.destination.value().c_str(), mount.type.c_str(), mount.flags,
412 mount.data.empty() ? nullptr : mount.data.c_str()) != 0) {
413 return false;
414 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700415 } else {
416 // Mount this externally and unmount it on exit.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700417 if (!MountExternal(source.value(), dest.value(), mount.type, mount.flags,
418 mount.data)) {
419 return false;
420 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700421 // Save this to unmount when shutting down.
422 c->ext_mounts.push_back(dest);
423 }
424
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700425 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700426}
427
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700428bool DoContainerMounts(struct container* c,
429 const struct container_config* config) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700430 UnmountExternalMounts(c);
431
432 // This will run in all the error cases.
433 base::ScopedClosureRunner teardown(base::Bind(
434 base::IgnoreResult(&UnmountExternalMounts), base::Unretained(c)));
435
436 for (const auto& mount : config->mounts) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700437 if (!DoContainerMount(c, config, mount))
438 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700439 }
440
441 // The mounts have been done successfully, no need to tear them down anymore.
442 ignore_result(teardown.Release());
443
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700444 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700445}
446
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700447bool ContainerCreateDevice(const struct container* c,
448 const struct container_config* config,
449 const Device& dev,
450 int minor) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700451 mode_t mode = dev.fs_permissions;
452 switch (dev.type) {
453 case 'b':
454 mode |= S_IFBLK;
455 break;
456 case 'c':
457 mode |= S_IFCHR;
458 break;
459 default:
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700460 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700461 }
462
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700463 int uid_userns;
464 if (!GetUsernsOutsideId(config->uid_map, dev.uid, &uid_userns))
465 return false;
466 int gid_userns;
467 if (!GetUsernsOutsideId(config->gid_map, dev.gid, &gid_userns))
468 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700469
470 base::FilePath path = GetPathInOuterNamespace(c->runfsroot, dev.path);
Luis Hector Chavez5d51abb2017-10-11 17:05:57 -0700471 if (!base::CreateDirectory(path.DirName())) {
472 PLOG(ERROR) << "Failed to create parent directory for " << path.value();
473 return false;
474 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700475 if (mknod(path.value().c_str(), mode, makedev(dev.major, minor)) != 0 &&
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700476 errno != EEXIST) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700477 PLOG(ERROR) << "Failed to mknod " << path.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700478 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700479 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700480 if (chown(path.value().c_str(), uid_userns, gid_userns) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700481 PLOG(ERROR) << "Failed to chown " << path.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700482 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700483 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700484 if (chmod(path.value().c_str(), dev.fs_permissions) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700485 PLOG(ERROR) << "Failed to chmod " << path.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700486 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700487 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700488
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700489 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700490}
491
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700492bool MountRunfs(struct container* c, const struct container_config* config) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700493 {
494 std::string runfs_template = base::StringPrintf(
495 "%s/%s_XXXXXX", c->rundir.value().c_str(), c->name.c_str());
496 // TODO(lhchavez): Replace this with base::CreateTemporaryDirInDir().
497 char* runfs_path = mkdtemp(const_cast<char*>(runfs_template.c_str()));
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700498 if (!runfs_path) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700499 PLOG(ERROR) << "Failed to mkdtemp in " << c->rundir.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700500 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700501 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700502 c->runfs = base::FilePath(runfs_path);
503 }
504
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700505 int uid_userns;
506 if (!GetUsernsOutsideId(config->uid_map, config->uid, &uid_userns))
507 return false;
508 int gid_userns;
509 if (!GetUsernsOutsideId(config->gid_map, config->gid, &gid_userns))
510 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700511
512 // Make sure the container uid can access the rootfs.
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700513 if (chmod(c->runfs.value().c_str(), 0700) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700514 PLOG(ERROR) << "Failed to chmod " << c->runfs.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700515 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700516 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700517 if (chown(c->runfs.value().c_str(), uid_userns, gid_userns) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700518 PLOG(ERROR) << "Failed to chown " << c->runfs.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700519 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700520 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700521
522 c->runfsroot = c->runfs.Append("root");
523
524 constexpr mode_t kRootDirMode = 0660;
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700525 if (mkdir(c->runfsroot.value().c_str(), kRootDirMode) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700526 PLOG(ERROR) << "Failed to mkdir " << c->runfsroot.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700527 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700528 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700529 if (chmod(c->runfsroot.value().c_str(), kRootDirMode) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700530 PLOG(ERROR) << "Failed to chmod " << c->runfsroot.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700531 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700532 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700533
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700534 if (mount(config->rootfs.value().c_str(), c->runfsroot.value().c_str(), "",
535 MS_BIND | (config->rootfs_mount_flags & MS_REC), nullptr) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700536 PLOG(ERROR) << "Failed to bind-mount " << config->rootfs.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700537 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700538 }
539
540 // MS_BIND ignores any flags passed to it (except MS_REC). We need a
541 // second call to mount() to actually set them.
542 if (config->rootfs_mount_flags &&
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700543 mount(config->rootfs.value().c_str(), c->runfsroot.value().c_str(), "",
544 (config->rootfs_mount_flags & ~MS_REC), nullptr) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700545 PLOG(ERROR) << "Failed to remount " << c->runfsroot.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700546 return false;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700547 }
548
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700549 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700550}
551
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700552bool CreateDeviceNodes(struct container* c,
553 const struct container_config* config,
554 pid_t container_pid) {
555 for (const auto& dev : config->devices) {
556 int minor = dev.minor;
557
558 if (dev.copy_minor) {
559 struct stat st_buff;
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700560 if (stat(dev.path.value().c_str(), &st_buff) != 0)
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700561 continue;
562 minor = minor(st_buff.st_rdev);
563 }
564 if (minor < 0)
565 continue;
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700566 if (!ContainerCreateDevice(c, config, dev, minor))
Luis Hector Chavez644d2042017-09-19 18:56:44 -0700567 return false;
568 }
569
570 return true;
571}
572
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700573bool DeviceSetup(struct container* c, const struct container_config* config) {
Luis Hector Chavez76ae9ac2017-09-20 21:13:08 -0700574 c->cgroup->DenyAllDevices();
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700575
576 for (const auto& dev : config->cgroup_devices) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700577 if (!c->cgroup->AddDevice(dev.allow, dev.major, dev.minor, dev.read,
578 dev.write, dev.modify, dev.type)) {
579 return false;
580 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700581 }
582
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700583 for (const auto& loopdev_path : c->loopdev_paths) {
584 struct stat st;
585
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700586 if (stat(loopdev_path.value().c_str(), &st) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700587 PLOG(ERROR) << "Failed to stat " << loopdev_path.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700588 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700589 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700590 if (!c->cgroup->AddDevice(1, major(st.st_rdev), minor(st.st_rdev), 1, 0, 0,
591 'b')) {
592 return false;
593 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700594 }
595
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700596 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700597}
598
599int Setexeccon(void* payload) {
600 char* init_domain = reinterpret_cast<char*>(payload);
601 pid_t tid = syscall(SYS_gettid);
602
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700603 if (tid < 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700604 PLOG(ERROR) << "Failed to gettid";
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700605 return -errno;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700606 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700607
608 std::string exec_path =
609 base::StringPrintf("/proc/self/task/%d/attr/exec", tid);
610
611 base::ScopedFD fd(open(exec_path.c_str(), O_WRONLY | O_CLOEXEC));
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700612 if (!fd.is_valid()) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700613 PLOG(ERROR) << "Failed to open " << exec_path;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700614 return -errno;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700615 }
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700616
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700617 if (!base::WriteFileDescriptor(fd.get(), init_domain, strlen(init_domain))) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700618 PLOG(ERROR) << "Failed to write the SELinux label to " << exec_path;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700619 return -errno;
620 }
621
622 return 0;
623}
624
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700625bool ContainerTeardown(struct container* c) {
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700626 UnmountExternalMounts(c);
627 if (!c->runfsroot.empty() && !c->runfs.empty()) {
628 /* |c->runfsroot| may have been mounted recursively. Thus use
629 * MNT_DETACH to "immediately disconnect the filesystem and all
630 * filesystems mounted below it from each other and from the
631 * mount table". Otherwise one would need to unmount every
632 * single dependent mount before unmounting |c->runfsroot|
633 * itself.
634 */
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700635 if (umount2(c->runfsroot.value().c_str(), MNT_DETACH) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700636 PLOG(ERROR) << "Failed to detach " << c->runfsroot.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700637 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700638 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700639 if (rmdir(c->runfsroot.value().c_str()) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700640 PLOG(ERROR) << "Failed to rmdir " << c->runfsroot.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700641 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700642 }
Luis Hector Chavez15d0d1a2017-10-12 09:30:19 -0700643 c->runfsroot = base::FilePath();
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700644 }
645 if (!c->pid_file_path.empty()) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700646 if (unlink(c->pid_file_path.value().c_str()) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700647 PLOG(ERROR) << "Failed to unlink " << c->pid_file_path.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700648 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700649 }
Luis Hector Chavez15d0d1a2017-10-12 09:30:19 -0700650 c->pid_file_path = base::FilePath();
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700651 }
652 if (!c->runfs.empty()) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700653 if (rmdir(c->runfs.value().c_str()) != 0) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -0700654 PLOG(ERROR) << "Failed to rmdir " << c->runfs.value();
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700655 return false;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -0700656 }
Luis Hector Chavez15d0d1a2017-10-12 09:30:19 -0700657 c->runfs = base::FilePath();
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700658 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700659 return true;
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700660}
661
Luis Hector Chavez15d0d1a2017-10-12 09:30:19 -0700662void CancelContainerStart(struct container* c) {
663 if (c->init_pid != -1)
664 container_kill(c);
665 ContainerTeardown(c);
666}
Luis Hector Chavez81efb332017-09-18 14:01:29 -0700667
668} // namespace
669
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700670struct container_config* container_config_create() {
Luis Hector Chavez5381d002017-09-16 12:54:24 -0700671 return new (std::nothrow) struct container_config();
Dylan Reid837c74a2016-01-22 17:25:21 -0800672}
673
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700674void container_config_destroy(struct container_config* c) {
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700675 if (c == nullptr)
676 return;
Luis Hector Chavez5381d002017-09-16 12:54:24 -0700677 delete c;
Dylan Reid837c74a2016-01-22 17:25:21 -0800678}
679
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700680int container_config_config_root(struct container_config* c,
681 const char* config_root) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700682 c->config_root = base::FilePath(config_root);
683 return 0;
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500684}
685
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700686const char* container_config_get_config_root(const struct container_config* c) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700687 return c->config_root.value().c_str();
Mike Frysingerb22acdf2017-01-08 02:02:35 -0500688}
689
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700690int container_config_rootfs(struct container_config* c, const char* rootfs) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700691 c->rootfs = base::FilePath(rootfs);
692 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800693}
694
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700695const char* container_config_get_rootfs(const struct container_config* c) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700696 return c->rootfs.value().c_str();
Dylan Reid11456722016-05-02 11:24:50 -0700697}
698
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700699void container_config_rootfs_mount_flags(struct container_config* c,
700 unsigned long rootfs_mount_flags) {
701 /* Since we are going to add MS_REMOUNT anyways, add it here so we can
702 * simply check against zero later. MS_BIND is also added to avoid
703 * re-mounting the original filesystem, since the rootfs is always
704 * bind-mounted.
705 */
706 c->rootfs_mount_flags = MS_REMOUNT | MS_BIND | rootfs_mount_flags;
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -0700707}
708
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700709unsigned long container_config_get_rootfs_mount_flags(
710 const struct container_config* c) {
711 return c->rootfs_mount_flags;
Luis Hector Chavezc240e7e2016-09-22 10:33:03 -0700712}
713
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700714int container_config_premounted_runfs(struct container_config* c,
715 const char* runfs) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700716 c->premounted_runfs = base::FilePath(runfs);
717 return 0;
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700718}
719
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700720const char* container_config_get_premounted_runfs(
721 const struct container_config* c) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700722 return c->premounted_runfs.value().c_str();
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700723}
724
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700725int container_config_pid_file(struct container_config* c, const char* path) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700726 c->pid_file_path = base::FilePath(path);
727 return 0;
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700728}
729
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700730const char* container_config_get_pid_file(const struct container_config* c) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700731 return c->pid_file_path.value().c_str();
Keshav Santhanam0e4c3282016-07-14 10:25:16 -0700732}
733
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700734int container_config_program_argv(struct container_config* c,
735 const char** argv,
736 size_t num_args) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700737 if (num_args < 1) {
738 errno = EINVAL;
739 return -1;
740 }
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700741 c->program_argv.clear();
742 c->program_argv.reserve(num_args);
743 for (size_t i = 0; i < num_args; ++i)
744 c->program_argv.emplace_back(argv[i]);
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700745 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800746}
747
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700748size_t container_config_get_num_program_args(const struct container_config* c) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700749 return c->program_argv.size();
Dylan Reid11456722016-05-02 11:24:50 -0700750}
751
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700752const char* container_config_get_program_arg(const struct container_config* c,
753 size_t index) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700754 if (index >= c->program_argv.size())
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700755 return nullptr;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700756 return c->program_argv[index].c_str();
Dylan Reid11456722016-05-02 11:24:50 -0700757}
758
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700759void container_config_uid(struct container_config* c, uid_t uid) {
760 c->uid = uid;
Dylan Reid1874feb2016-06-22 17:53:50 -0700761}
762
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700763uid_t container_config_get_uid(const struct container_config* c) {
764 return c->uid;
Dylan Reid1874feb2016-06-22 17:53:50 -0700765}
766
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700767int container_config_uid_map(struct container_config* c, const char* uid_map) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700768 c->uid_map = uid_map;
769 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800770}
771
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700772void container_config_gid(struct container_config* c, gid_t gid) {
773 c->gid = gid;
Dylan Reid1874feb2016-06-22 17:53:50 -0700774}
775
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700776gid_t container_config_get_gid(const struct container_config* c) {
777 return c->gid;
Dylan Reid1874feb2016-06-22 17:53:50 -0700778}
779
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700780int container_config_gid_map(struct container_config* c, const char* gid_map) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700781 c->gid_map = gid_map;
782 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800783}
784
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700785int container_config_alt_syscall_table(struct container_config* c,
786 const char* alt_syscall_table) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700787 c->alt_syscall_table = alt_syscall_table;
788 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800789}
790
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700791int container_config_add_rlimit(struct container_config* c,
792 int type,
793 uint32_t cur,
794 uint32_t max) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700795 if (c->num_rlimits >= kMaxRlimits) {
796 errno = ENOMEM;
797 return -1;
798 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700799 c->rlimits[c->num_rlimits].type = type;
800 c->rlimits[c->num_rlimits].cur = cur;
801 c->rlimits[c->num_rlimits].max = max;
802 c->num_rlimits++;
803 return 0;
Dylan Reid93fa4602017-06-06 13:39:31 -0700804}
805
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700806int container_config_add_mount(struct container_config* c,
807 const char* name,
808 const char* source,
809 const char* destination,
810 const char* type,
811 const char* data,
812 const char* verity,
813 int flags,
814 int uid,
815 int gid,
816 int mode,
817 int mount_in_ns,
818 int create,
819 int loopback) {
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700820 if (name == nullptr || source == nullptr || destination == nullptr ||
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700821 type == nullptr) {
822 errno = EINVAL;
823 return -1;
824 }
Dylan Reid837c74a2016-01-22 17:25:21 -0800825
Luis Hector Chavez5381d002017-09-16 12:54:24 -0700826 c->mounts.emplace_back(Mount{name,
827 base::FilePath(source),
828 base::FilePath(destination),
829 type,
830 data ? data : "",
831 verity ? verity : "",
832 flags,
833 uid,
834 gid,
835 mode,
836 mount_in_ns != 0,
837 create != 0,
838 loopback != 0});
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700839
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700840 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800841}
842
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700843int container_config_add_cgroup_device(struct container_config* c,
844 int allow,
845 char type,
846 int major,
847 int minor,
848 int read,
849 int write,
850 int modify) {
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700851 c->cgroup_devices.emplace_back(CgroupDevice{
852 allow != 0, type, major, minor, read != 0, write != 0, modify != 0});
Dylan Reid4843d6b2017-03-31 18:14:30 -0700853
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700854 return 0;
Dylan Reid4843d6b2017-03-31 18:14:30 -0700855}
856
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700857int container_config_add_device(struct container_config* c,
858 char type,
859 const char* path,
860 int fs_permissions,
861 int major,
862 int minor,
863 int copy_minor,
864 int uid,
865 int gid,
866 int read_allowed,
867 int write_allowed,
868 int modify_allowed) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700869 if (path == nullptr) {
870 errno = EINVAL;
871 return -1;
872 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700873 /* If using a dynamic minor number, ensure that minor is -1. */
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700874 if (copy_minor && (minor != -1)) {
875 errno = EINVAL;
876 return -1;
877 }
Dylan Reid355d5e42016-04-29 16:53:31 -0700878
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700879 if (read_allowed || write_allowed || modify_allowed) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700880 if (container_config_add_cgroup_device(c, 1, type, major, minor,
881 read_allowed, write_allowed,
882 modify_allowed) != 0) {
883 errno = ENOMEM;
884 return -1;
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700885 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700886 }
Luis Hector Chavez479b95f2016-06-06 08:01:05 -0700887
Luis Hector Chaveze1062e82017-09-18 09:57:37 -0700888 c->devices.emplace_back(Device{
889 type,
890 base::FilePath(path),
891 fs_permissions,
892 major,
893 minor,
894 copy_minor != 0,
895 uid,
896 gid,
897 });
898
899 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -0800900}
901
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700902int container_config_run_setfiles(struct container_config* c,
903 const char* setfiles_cmd) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700904 c->run_setfiles = setfiles_cmd;
905 return 0;
Dylan Reid2bd9ea92016-04-07 20:57:47 -0700906}
Dylan Reid837c74a2016-01-22 17:25:21 -0800907
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700908const char* container_config_get_run_setfiles(
909 const struct container_config* c) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700910 return c->run_setfiles.c_str();
Dylan Reid11456722016-05-02 11:24:50 -0700911}
912
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700913int container_config_set_cpu_shares(struct container_config* c, int shares) {
914 /* CPU shares must be 2 or higher. */
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700915 if (shares < 2) {
916 errno = EINVAL;
917 return -1;
918 }
Chinyue Chenfac909e2016-06-24 14:17:42 +0800919
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700920 c->cpu_cgparams.shares = shares;
921 return 0;
Chinyue Chenfac909e2016-06-24 14:17:42 +0800922}
923
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700924int container_config_set_cpu_cfs_params(struct container_config* c,
925 int quota,
926 int period) {
927 /*
928 * quota could be set higher than period to utilize more than one CPU.
929 * quota could also be set as -1 to indicate the cgroup does not adhere
930 * to any CPU time restrictions.
931 */
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700932 if (quota <= 0 && quota != -1) {
933 errno = EINVAL;
934 return -1;
935 }
936 if (period <= 0) {
937 errno = EINVAL;
938 return -1;
939 }
Chinyue Chenfac909e2016-06-24 14:17:42 +0800940
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700941 c->cpu_cgparams.quota = quota;
942 c->cpu_cgparams.period = period;
943 return 0;
Chinyue Chenfac909e2016-06-24 14:17:42 +0800944}
945
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700946int container_config_set_cpu_rt_params(struct container_config* c,
947 int rt_runtime,
948 int rt_period) {
949 /*
950 * rt_runtime could be set as 0 to prevent the cgroup from using
951 * realtime CPU.
952 */
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -0700953 if (rt_runtime < 0 || rt_runtime >= rt_period) {
954 errno = EINVAL;
955 return -1;
956 }
Chinyue Chenfac909e2016-06-24 14:17:42 +0800957
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700958 c->cpu_cgparams.rt_runtime = rt_runtime;
959 c->cpu_cgparams.rt_period = rt_period;
960 return 0;
Chinyue Chenfac909e2016-06-24 14:17:42 +0800961}
962
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700963int container_config_get_cpu_shares(struct container_config* c) {
964 return c->cpu_cgparams.shares;
Chinyue Chen4f3fd682016-07-01 14:11:42 +0800965}
966
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700967int container_config_get_cpu_quota(struct container_config* c) {
968 return c->cpu_cgparams.quota;
Chinyue Chen4f3fd682016-07-01 14:11:42 +0800969}
970
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700971int container_config_get_cpu_period(struct container_config* c) {
972 return c->cpu_cgparams.period;
Chinyue Chen4f3fd682016-07-01 14:11:42 +0800973}
974
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700975int container_config_get_cpu_rt_runtime(struct container_config* c) {
976 return c->cpu_cgparams.rt_runtime;
Chinyue Chen4f3fd682016-07-01 14:11:42 +0800977}
978
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700979int container_config_get_cpu_rt_period(struct container_config* c) {
980 return c->cpu_cgparams.rt_period;
Chinyue Chen4f3fd682016-07-01 14:11:42 +0800981}
982
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700983int container_config_set_cgroup_parent(struct container_config* c,
984 const char* parent,
985 uid_t cgroup_owner,
986 gid_t cgroup_group) {
987 c->cgroup_owner = cgroup_owner;
988 c->cgroup_group = cgroup_group;
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700989 c->cgroup_parent = base::FilePath(parent);
990 return 0;
Dylan Reid9e724af2016-07-21 09:58:07 -0700991}
992
Luis Hector Chavez31735bc2017-09-15 08:17:10 -0700993const char* container_config_get_cgroup_parent(struct container_config* c) {
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -0700994 return c->cgroup_parent.value().c_str();
Dylan Reid9e724af2016-07-21 09:58:07 -0700995}
996
Stephen Barber771653f2017-10-04 23:48:57 -0700997int container_config_namespaces(struct container_config* c,
998 const char** namespaces,
999 size_t num_ns) {
1000 if (num_ns < 1)
1001 return -EINVAL;
1002 c->namespaces.clear();
1003 for (size_t i = 0; i < num_ns; ++i)
1004 c->namespaces.emplace(namespaces[i]);
1005 return 0;
Keshav Santhanam1b6bf672016-08-10 18:35:12 -07001006}
1007
Stephen Barber771653f2017-10-04 23:48:57 -07001008size_t container_config_get_num_namespaces(const struct container_config* c) {
1009 return c->namespaces.size();
1010}
1011
1012bool container_config_has_namespace(const struct container_config* c,
1013 const char* ns) {
1014 return c->namespaces.find(ns) != c->namespaces.end();
Keshav Santhanam1b6bf672016-08-10 18:35:12 -07001015}
1016
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001017void container_config_keep_fds_open(struct container_config* c) {
1018 c->keep_fds_open = 1;
Dylan Reidc4335842016-11-11 10:24:52 -08001019}
1020
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001021void container_config_set_capmask(struct container_config* c,
1022 uint64_t capmask,
1023 int ambient) {
1024 c->use_capmask = 1;
1025 c->capmask = capmask;
1026 c->use_capmask_ambient = ambient;
Luis Hector Chavezff5978f2017-06-27 12:52:58 -07001027}
1028
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001029void container_config_set_securebits_skip_mask(struct container_config* c,
1030 uint64_t securebits_skip_mask) {
1031 c->securebits_skip_mask = securebits_skip_mask;
Luis Hector Chavezcd44ba72017-06-30 13:01:38 -07001032}
1033
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001034void container_config_set_run_as_init(struct container_config* c,
1035 int run_as_init) {
1036 c->do_init = !run_as_init;
Luis Hector Chavezdac65c32017-07-21 10:30:23 -07001037}
1038
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001039int container_config_set_selinux_context(struct container_config* c,
1040 const char* context) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001041 if (!context) {
1042 errno = EINVAL;
1043 return -1;
1044 }
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001045 c->selinux_context = context;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001046 return 0;
Luis Hector Chavez15e8e672017-07-20 15:13:27 -07001047}
1048
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001049void container_config_set_pre_execve_hook(struct container_config* c,
1050 int (*hook)(void*),
1051 void* payload) {
1052 c->pre_start_hook = hook;
1053 c->pre_start_hook_payload = payload;
Luis Hector Chavezf8e8f4c2017-08-01 01:09:39 -07001054}
1055
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001056void container_config_add_hook(struct container_config* c,
1057 minijail_hook_event_t event,
1058 libcontainer::HookCallback callback) {
1059 auto it = c->hooks.insert(
1060 std::make_pair(event, std::vector<libcontainer::HookCallback>()));
1061 it.first->second.emplace_back(std::move(callback));
1062}
1063
Luis Hector Chaveze03926a2017-09-28 17:28:49 -07001064int container_config_add_hook(struct container_config* c,
1065 minijail_hook_event_t event,
1066 const char* filename,
1067 const char** argv,
1068 size_t num_args,
1069 int* pstdin_fd,
1070 int* pstdout_fd,
1071 int* pstderr_fd) {
1072 std::vector<std::string> args;
1073 args.reserve(num_args);
1074 for (size_t i = 0; i < num_args; ++i)
1075 args.emplace_back(argv[i]);
1076
1077 // First element of the array belongs to the parent and the second one belongs
1078 // to the child.
1079 base::ScopedFD stdin_fds[2], stdout_fds[2], stderr_fds[2];
1080 if (pstdin_fd) {
1081 if (!libcontainer::Pipe2(&stdin_fds[1], &stdin_fds[0], 0))
1082 return -1;
1083 }
1084 if (pstdout_fd) {
1085 if (!libcontainer::Pipe2(&stdout_fds[0], &stdout_fds[0], 0))
1086 return -1;
1087 }
1088 if (pstderr_fd) {
1089 if (!libcontainer::Pipe2(&stderr_fds[0], &stderr_fds[0], 0))
1090 return -1;
1091 }
1092
1093 // After this point the call has been successful, so we can now commit to
1094 // whatever pipes we have opened.
1095 if (pstdin_fd) {
1096 *pstdin_fd = stdin_fds[0].release();
1097 c->inherited_fds.emplace_back(stdin_fds[1].get());
1098 }
1099 if (pstdout_fd) {
1100 *pstdout_fd = stdout_fds[0].release();
1101 c->inherited_fds.emplace_back(stdout_fds[1].get());
1102 }
1103 if (pstderr_fd) {
1104 *pstderr_fd = stderr_fds[0].release();
1105 c->inherited_fds.emplace_back(stderr_fds[1].get());
1106 }
1107 container_config_add_hook(
1108 c, event,
1109 libcontainer::CreateExecveCallback(
1110 base::FilePath(filename), args, std::move(stdin_fds[1]),
1111 std::move(stdout_fds[1]), std::move(stderr_fds[1])));
1112 return 0;
1113}
1114
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001115int container_config_inherit_fds(struct container_config* c,
1116 int* inherited_fds,
1117 size_t inherited_fd_count) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001118 if (!c->inherited_fds.empty()) {
1119 errno = EINVAL;
1120 return -1;
1121 }
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001122 for (size_t i = 0; i < inherited_fd_count; ++i)
1123 c->inherited_fds.emplace_back(inherited_fds[i]);
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001124 return 0;
Luis Hector Chavezf8e8f4c2017-08-01 01:09:39 -07001125}
1126
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001127struct container* container_new(const char* name, const char* rundir) {
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001128 struct container* c = new (std::nothrow) container();
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001129 if (!c)
1130 return nullptr;
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001131 c->rundir = base::FilePath(rundir);
1132 c->name = name;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001133 return c;
Dylan Reid837c74a2016-01-22 17:25:21 -08001134}
1135
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001136void container_destroy(struct container* c) {
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001137 delete c;
Dylan Reid837c74a2016-01-22 17:25:21 -08001138}
1139
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001140int container_start(struct container* c,
1141 const struct container_config* config) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001142 if (!c) {
1143 errno = EINVAL;
1144 return -1;
1145 }
1146 if (!config) {
1147 errno = EINVAL;
1148 return -1;
1149 }
1150 if (config->program_argv.empty()) {
1151 errno = EINVAL;
1152 return -1;
1153 }
Dylan Reide040c6b2016-05-02 18:49:02 -07001154
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001155 // This will run in all the error cases.
1156 base::ScopedClosureRunner teardown(
Luis Hector Chavez15d0d1a2017-10-12 09:30:19 -07001157 base::Bind(&CancelContainerStart, base::Unretained(c)));
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001158
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001159 if (!config->config_root.empty())
1160 c->config_root = config->config_root;
1161 if (!config->premounted_runfs.empty()) {
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001162 c->runfs.clear();
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001163 c->runfsroot = config->premounted_runfs;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001164 } else {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001165 if (!MountRunfs(c, config))
1166 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001167 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001168
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001169 c->jail.reset(minijail_new());
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001170 if (!c->jail) {
1171 errno = ENOMEM;
1172 return -1;
1173 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001174
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001175 if (!DoContainerMounts(c, config))
1176 return -1;
Dylan Reid837c74a2016-01-22 17:25:21 -08001177
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001178 int cgroup_uid;
1179 if (!GetUsernsOutsideId(config->uid_map, config->cgroup_owner, &cgroup_uid))
1180 return -1;
1181 int cgroup_gid;
1182 if (!GetUsernsOutsideId(config->gid_map, config->cgroup_group, &cgroup_gid))
1183 return -1;
Stephen Barber1a398c72017-01-23 12:39:44 -08001184
Luis Hector Chavez76ae9ac2017-09-20 21:13:08 -07001185 c->cgroup = libcontainer::Cgroup::Create(c->name,
1186 base::FilePath("/sys/fs/cgroup"),
1187 config->cgroup_parent,
1188 cgroup_uid,
1189 cgroup_gid);
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001190 if (!c->cgroup)
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001191 return -1;
Dylan Reida9966422016-07-21 10:11:34 -07001192
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001193 // Must be root to modify device cgroup or mknod.
1194 std::map<minijail_hook_event_t, std::vector<libcontainer::HookCallback>>
1195 hook_callbacks;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001196 if (getuid() == 0) {
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001197 if (!config->devices.empty()) {
1198 // Create the devices in the mount namespace.
1199 auto it = hook_callbacks.insert(
1200 std::make_pair(MINIJAIL_HOOK_EVENT_PRE_CHROOT,
1201 std::vector<libcontainer::HookCallback>()));
1202 it.first->second.emplace_back(
1203 libcontainer::AdaptCallbackToRunInNamespaces(
1204 base::Bind(&CreateDeviceNodes, base::Unretained(c),
1205 base::Unretained(config)),
1206 {CLONE_NEWNS}));
1207 }
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001208 if (!DeviceSetup(c, config))
1209 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001210 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001211
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001212 // Potentially run setfiles on mounts configured outside of the jail.
1213 if (!config->run_setfiles.empty()) {
1214 const base::FilePath kDataPath("/data");
1215 const base::FilePath kCachePath("/cache");
1216 std::vector<base::FilePath> destinations;
1217 for (const auto& mnt : config->mounts) {
1218 if (mnt.mount_in_ns)
1219 continue;
1220 if (mnt.flags & MS_RDONLY)
1221 continue;
Yusuke Sato91f11f02016-12-02 16:15:13 -08001222
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001223 // A hack to avoid setfiles on /data and /cache.
1224 if (mnt.destination == kDataPath || mnt.destination == kCachePath)
1225 continue;
Yusuke Sato91f11f02016-12-02 16:15:13 -08001226
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001227 destinations.emplace_back(
1228 GetPathInOuterNamespace(c->runfsroot, mnt.destination));
1229 }
1230
1231 if (!destinations.empty()) {
1232 auto it = hook_callbacks.insert(
1233 std::make_pair(MINIJAIL_HOOK_EVENT_PRE_CHROOT,
1234 std::vector<libcontainer::HookCallback>()));
1235 it.first->second.emplace_back(
1236 libcontainer::AdaptCallbackToRunInNamespaces(
1237 base::Bind(&RunSetfilesCommand, base::Unretained(c),
1238 base::Unretained(config), destinations),
1239 {CLONE_NEWNS}));
1240 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001241 }
Dylan Reidd7229582016-04-27 17:08:40 -07001242
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001243 /* Setup CPU cgroup params. */
1244 if (config->cpu_cgparams.shares) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001245 if (!c->cgroup->SetCpuShares(config->cpu_cgparams.shares))
1246 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001247 }
1248 if (config->cpu_cgparams.period) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001249 if (!c->cgroup->SetCpuQuota(config->cpu_cgparams.quota))
1250 return -1;
1251 if (!c->cgroup->SetCpuPeriod(config->cpu_cgparams.period))
1252 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001253 }
1254 if (config->cpu_cgparams.rt_period) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001255 if (!c->cgroup->SetCpuRtRuntime(config->cpu_cgparams.rt_runtime))
1256 return -1;
1257 if (!c->cgroup->SetCpuRtPeriod(config->cpu_cgparams.rt_period))
1258 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001259 }
Chinyue Chenfac909e2016-06-24 14:17:42 +08001260
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001261 /* Setup and start the container with libminijail. */
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001262 if (!config->pid_file_path.empty())
1263 c->pid_file_path = config->pid_file_path;
1264 else if (!c->runfs.empty())
1265 c->pid_file_path = c->runfs.Append("container.pid");
Keshav Santhanam0e4c3282016-07-14 10:25:16 -07001266
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001267 if (!c->pid_file_path.empty())
1268 minijail_write_pid_file(c->jail.get(), c->pid_file_path.value().c_str());
1269 minijail_reset_signal_mask(c->jail.get());
Dylan Reid837c74a2016-01-22 17:25:21 -08001270
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001271 /* Setup container namespaces. */
Stephen Barber771653f2017-10-04 23:48:57 -07001272 if (container_config_has_namespace(config, "ipc"))
1273 minijail_namespace_ipc(c->jail.get());
1274 if (container_config_has_namespace(config, "mount"))
1275 minijail_namespace_vfs(c->jail.get());
1276 if (container_config_has_namespace(config, "network"))
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001277 minijail_namespace_net(c->jail.get());
Stephen Barber771653f2017-10-04 23:48:57 -07001278 if (container_config_has_namespace(config, "pid"))
1279 minijail_namespace_pids(c->jail.get());
1280
1281 if (container_config_has_namespace(config, "user")) {
1282 minijail_namespace_user(c->jail.get());
1283 if (minijail_uidmap(c->jail.get(), config->uid_map.c_str()) != 0)
1284 return -1;
1285 if (minijail_gidmap(c->jail.get(), config->gid_map.c_str()) != 0)
1286 return -1;
1287 }
1288
1289 if (container_config_has_namespace(config, "cgroup"))
1290 minijail_namespace_cgroups(c->jail.get());
1291
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001292 if (getuid() != 0)
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001293 minijail_namespace_user_disable_setgroups(c->jail.get());
Dylan Reid837c74a2016-01-22 17:25:21 -08001294
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001295 /* Set the UID/GID inside the container if not 0. */
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001296 if (!GetUsernsOutsideId(config->uid_map, config->uid, nullptr))
1297 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001298 else if (config->uid > 0)
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001299 minijail_change_uid(c->jail.get(), config->uid);
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001300 if (!GetUsernsOutsideId(config->gid_map, config->gid, nullptr))
1301 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001302 else if (config->gid > 0)
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001303 minijail_change_gid(c->jail.get(), config->gid);
Keshav Santhanam36485ff2016-08-02 16:21:02 -07001304
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001305 if (minijail_enter_pivot_root(c->jail.get(), c->runfsroot.value().c_str()) !=
1306 0) {
1307 return -1;
1308 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001309
Luis Hector Chavez76ae9ac2017-09-20 21:13:08 -07001310 // Add the cgroups configured above.
1311 for (int32_t i = 0; i < libcontainer::Cgroup::Type::NUM_TYPES; i++) {
1312 if (c->cgroup->has_tasks_path(i)) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001313 if (minijail_add_to_cgroup(
1314 c->jail.get(), c->cgroup->tasks_path(i).value().c_str()) != 0) {
1315 return -1;
1316 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001317 }
1318 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001319
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001320 if (!config->alt_syscall_table.empty())
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001321 minijail_use_alt_syscall(c->jail.get(), config->alt_syscall_table.c_str());
Dylan Reid837c74a2016-01-22 17:25:21 -08001322
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001323 for (int i = 0; i < config->num_rlimits; i++) {
Luis Hector Chaveze1062e82017-09-18 09:57:37 -07001324 const Rlimit& lim = config->rlimits[i];
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001325 if (minijail_rlimit(c->jail.get(), lim.type, lim.cur, lim.max) != 0)
1326 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001327 }
Dylan Reid93fa4602017-06-06 13:39:31 -07001328
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001329 if (!config->selinux_context.empty()) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001330 if (minijail_add_hook(c->jail.get(), &Setexeccon,
1331 const_cast<char*>(config->selinux_context.c_str()),
1332 MINIJAIL_HOOK_EVENT_PRE_EXECVE) != 0) {
1333 return -1;
1334 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001335 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001336
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001337 if (config->pre_start_hook) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001338 if (minijail_add_hook(c->jail.get(), config->pre_start_hook,
1339 config->pre_start_hook_payload,
1340 MINIJAIL_HOOK_EVENT_PRE_EXECVE) != 0) {
1341 return -1;
1342 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001343 }
Luis Hector Chavezf8e8f4c2017-08-01 01:09:39 -07001344
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001345 // Now that all pre-requisite hooks are installed, copy the ones in the
1346 // container_config object in the correct order.
1347 for (const auto& config_hook : config->hooks) {
1348 auto it = hook_callbacks.insert(std::make_pair(
1349 config_hook.first, std::vector<libcontainer::HookCallback>()));
1350 it.first->second.insert(it.first->second.end(), config_hook.second.begin(),
1351 config_hook.second.end());
1352 }
1353
1354 c->hook_states.clear();
1355 // Reserve enough memory to hold all the hooks, so that their addresses do not
1356 // get invalidated by reallocation.
1357 c->hook_states.reserve(MINIJAIL_HOOK_EVENT_MAX);
1358 for (minijail_hook_event_t event : {MINIJAIL_HOOK_EVENT_PRE_CHROOT,
1359 MINIJAIL_HOOK_EVENT_PRE_DROP_CAPS,
1360 MINIJAIL_HOOK_EVENT_PRE_EXECVE}) {
1361 const auto& it = hook_callbacks.find(event);
1362 if (it == hook_callbacks.end())
1363 continue;
1364 c->hook_states.emplace_back(
1365 std::make_pair(libcontainer::HookState(), it->second));
1366 if (!c->hook_states.back().first.InstallHook(c->jail.get(), event))
1367 return -1;
1368 }
1369
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001370 for (int fd : config->inherited_fds) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001371 if (minijail_preserve_fd(c->jail.get(), fd, fd) != 0)
1372 return -1;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001373 }
Luis Hector Chavezf8e8f4c2017-08-01 01:09:39 -07001374
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001375 /* TODO(dgreid) - remove this once shared mounts are cleaned up. */
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001376 minijail_skip_remount_private(c->jail.get());
Dylan Reid3da683b2016-04-05 03:35:35 -07001377
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001378 if (!config->keep_fds_open)
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001379 minijail_close_open_fds(c->jail.get());
Luis Hector Chaveze18e7d42016-10-12 07:35:32 -07001380
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001381 if (config->use_capmask) {
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001382 minijail_use_caps(c->jail.get(), config->capmask);
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001383 if (config->use_capmask_ambient)
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001384 minijail_set_ambient_caps(c->jail.get());
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001385 if (config->securebits_skip_mask) {
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001386 minijail_skip_setting_securebits(c->jail.get(),
1387 config->securebits_skip_mask);
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001388 }
1389 }
Luis Hector Chavezff5978f2017-06-27 12:52:58 -07001390
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001391 if (!config->do_init)
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001392 minijail_run_as_init(c->jail.get());
Luis Hector Chavezdac65c32017-07-21 10:30:23 -07001393
Luis Hector Chavez9cde12a2017-09-18 10:53:38 -07001394 std::vector<char*> argv_cstr;
1395 argv_cstr.reserve(config->program_argv.size() + 1);
1396 for (const auto& arg : config->program_argv)
1397 argv_cstr.emplace_back(const_cast<char*>(arg.c_str()));
1398 argv_cstr.emplace_back(nullptr);
1399
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001400 if (minijail_run_pid_pipes_no_preload(c->jail.get(), argv_cstr[0],
1401 argv_cstr.data(), &c->init_pid, nullptr,
1402 nullptr, nullptr) != 0) {
1403 return -1;
1404 }
Dylan Reid837c74a2016-01-22 17:25:21 -08001405
Luis Hector Chavez644d2042017-09-19 18:56:44 -07001406 // |hook_states| is already sorted in the correct order.
1407 for (auto& hook_state : c->hook_states) {
1408 if (!hook_state.first.WaitForHookAndRun(hook_state.second, c->init_pid))
1409 return -1;
1410 }
1411
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001412 // The container has started successfully, no need to tear it down anymore.
1413 ignore_result(teardown.Release());
1414 return 0;
Dylan Reid837c74a2016-01-22 17:25:21 -08001415}
1416
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001417const char* container_root(struct container* c) {
Luis Hector Chavez5381d002017-09-16 12:54:24 -07001418 return c->runfs.value().c_str();
Dylan Reid837c74a2016-01-22 17:25:21 -08001419}
1420
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001421int container_pid(struct container* c) {
1422 return c->init_pid;
Dylan Reid837c74a2016-01-22 17:25:21 -08001423}
1424
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001425int container_wait(struct container* c) {
1426 int rc;
Dylan Reidcf745c52016-04-22 10:18:03 -07001427
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001428 do {
Luis Hector Chavez626f5c82017-09-18 11:19:32 -07001429 rc = minijail_wait(c->jail.get());
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001430 } while (rc == -EINTR);
Dylan Reidcf745c52016-04-22 10:18:03 -07001431
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001432 // If the process had already been reaped, still perform teardown.
1433 if (rc == -ECHILD || rc >= 0) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001434 if (!ContainerTeardown(c))
1435 rc = -errno;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001436 }
1437 return rc;
Dylan Reid837c74a2016-01-22 17:25:21 -08001438}
1439
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001440int container_kill(struct container* c) {
Luis Hector Chavez1f7e60c2017-09-27 22:03:48 -07001441 if (kill(c->init_pid, SIGKILL) != 0 && errno != ESRCH) {
Luis Hector Chavezdc61f8d2017-10-02 11:12:46 -07001442 PLOG(ERROR) << "Failed to kill " << c->init_pid;
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001443 return -errno;
Luis Hector Chavez835d39e2017-09-19 15:16:31 -07001444 }
Luis Hector Chavez31735bc2017-09-15 08:17:10 -07001445 return container_wait(c);
Dylan Reid837c74a2016-01-22 17:25:21 -08001446}