blob: 1fe2236a7050d927cc1b199a9575dd450e123475 [file] [log] [blame]
Luis Hector Chaveze09de302017-09-07 16:32:44 -07001// Copyright 2017 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "run_oci/run_oci_utils.h"
6
Luis Hector Chavez5bc7bb72017-10-27 11:29:37 -07007#include <fcntl.h>
Luis Hector Chaveze09de302017-09-07 16:32:44 -07008#include <mntent.h>
9#include <stdio.h>
Luis Hector Chavez60f9bb12017-10-16 11:33:01 -070010#include <sys/capability.h>
Luis Hector Chavezb2c3b3b2018-08-01 13:35:40 -070011#include <sys/epoll.h>
Luis Hector Chavezb77da862018-02-21 18:20:03 -080012#include <sys/mount.h>
Luis Hector Chavezb2c3b3b2018-08-01 13:35:40 -070013#include <sys/signal.h>
Luis Hector Chavez5bc7bb72017-10-27 11:29:37 -070014#include <sys/stat.h>
Yusuke Satoc7661a82020-05-04 13:21:17 -070015#include <sys/statvfs.h>
16#include <sys/types.h>
Luis Hector Chavezb2c3b3b2018-08-01 13:35:40 -070017#include <sys/wait.h>
Luis Hector Chavez5bc7bb72017-10-27 11:29:37 -070018#include <unistd.h>
Luis Hector Chaveze09de302017-09-07 16:32:44 -070019
Luis Hector Chavez60f9bb12017-10-16 11:33:01 -070020#include <type_traits>
Luis Hector Chavezb2c3b3b2018-08-01 13:35:40 -070021#include <utility>
Luis Hector Chaveze09de302017-09-07 16:32:44 -070022
23#include <base/files/file_util.h>
Qijiang Fane19d67d2020-04-01 08:18:39 +090024#include <base/stl_util.h>
Luis Hector Chavezb2c3b3b2018-08-01 13:35:40 -070025#include <base/strings/string_piece.h>
Luis Hector Chaveze09de302017-09-07 16:32:44 -070026#include <base/strings/string_split.h>
Luis Hector Chavezb77da862018-02-21 18:20:03 -080027#include <base/strings/string_util.h>
Yusuke Satoc7661a82020-05-04 13:21:17 -070028#include <brillo/key_value_store.h>
Luis Hector Chavez5bc7bb72017-10-27 11:29:37 -070029#include <brillo/syslog_logging.h>
Luis Hector Chavezb9f09a52018-04-18 12:39:56 -070030#include <libminijail.h>
Luis Hector Chavezb77da862018-02-21 18:20:03 -080031#include <libmount/libmount.h>
Luis Hector Chaveze09de302017-09-07 16:32:44 -070032
Luis Hector Chavezb2c3b3b2018-08-01 13:35:40 -070033// Avoid including syslog.h because it interacts badly with base::logging.
34extern "C" void syslog(int priority, const char* format, ...);
35
Luis Hector Chaveze09de302017-09-07 16:32:44 -070036namespace run_oci {
37
Luis Hector Chavezb2c3b3b2018-08-01 13:35:40 -070038namespace {
39
40// We avoid using LOG_* because they interacts badly with base::logging, which
41// re-defines LOG_* and causes all sorts of confusion.
42constexpr int kSyslogLogWarningPriority = 4;
43constexpr int kSyslogLogInfoPriority = 6;
44
45// Creates a pipe where the read end of it is made to be close-on-exec and the
46// write end of it is associated with one of the well-known stdio FDs (e.g.
47// STDOUT_FILENO/STDERR_FILENO).
48bool CreateStdioPipe(base::ScopedFD* pipe_read_fd, int stdio_fd) {
49 base::ScopedFD pipe_write_fd;
50
51 if (!Pipe(pipe_read_fd, &pipe_write_fd, O_CLOEXEC)) {
52 PLOG(ERROR) << "Failed to create pipe for " << stdio_fd;
53 return false;
54 }
55
56 if (pipe_write_fd.get() == stdio_fd) {
57 // The write fd is already the correct fd number, but it needs to have the
58 // close-on-exec flag cleared.
59 if (fcntl(pipe_write_fd.get(), F_SETFD, 0) == -1) {
60 PLOG(ERROR) << "Failed to set FD_CLOEXEC on read end of pipe for "
61 << stdio_fd;
62 return false;
63 }
64 // Finally, release it so that it is not closed upon returning.
65 ignore_result(pipe_write_fd.release());
66 } else {
67 if (dup2(pipe_write_fd.get(), stdio_fd) == -1) {
68 PLOG(ERROR) << "Failed to redirect stdio for " << stdio_fd;
69 return false;
70 }
71 }
72
73 return true;
74}
75
Yusuke Satoc7661a82020-05-04 13:21:17 -070076bool IsTestImage() {
77 brillo::KeyValueStore store;
78 std::string channel;
79 if (!store.Load(base::FilePath("/etc/lsb-release"))) {
80 LOG(WARNING) << "Failed to parse /etc/lsb-release, assuming non-test image";
81 return false;
82 }
83
84 if (!store.GetString("CHROMEOS_RELEASE_TRACK", &channel)) {
85 LOG(WARNING) << "Couldn't find release track an /etc/lsb-release, assuming "
86 "non-test image";
87 return false;
88 }
89
90 return base::StartsWith(channel, "test", base::CompareCase::SENSITIVE);
91}
92
Luis Hector Chavezb2c3b3b2018-08-01 13:35:40 -070093} // namespace
94
95SyslogStdioAdapter::SyslogStdioAdapter(base::Process child)
96 : child_(std::move(child)) {}
97
98SyslogStdioAdapter::~SyslogStdioAdapter() {
99 if (!child_.Terminate(0 /* exit_code */, true /* wait */))
100 LOG(ERROR) << "Failed to terminate logger process";
101}
102
103std::unique_ptr<SyslogStdioAdapter> SyslogStdioAdapter::Create() {
104 base::ScopedFD stdout_pipe_read_fd, stderr_pipe_read_fd;
105
106 if (!CreateStdioPipe(&stdout_pipe_read_fd, STDOUT_FILENO))
107 return nullptr;
108 if (!CreateStdioPipe(&stderr_pipe_read_fd, STDERR_FILENO))
109 return nullptr;
110
111 // Redirect all minijail logs to avoid them appearing in multiple places.
112 minijail_log_to_fd(STDOUT_FILENO, kSyslogLogInfoPriority);
113
114 brillo::SetLogFlags(brillo::kLogToSyslog | brillo::kLogHeader);
115 logging::SetLogItems(false /* pid */, false /* tid */, false /* timestamp */,
116 false /* tick_count */);
117
118 pid_t child = fork();
119 if (child == -1) {
120 PLOG(ERROR) << "Failed to fork";
121 return nullptr;
122 }
123
124 if (child == 0) {
125 close(STDOUT_FILENO);
126 close(STDERR_FILENO);
127 SyslogStdioAdapter::RunLoop(std::move(stdout_pipe_read_fd),
128 std::move(stderr_pipe_read_fd));
129 _exit(1);
130 }
131
132 return std::unique_ptr<SyslogStdioAdapter>(
133 new SyslogStdioAdapter(base::Process(child)));
134}
135
136// static
137void SyslogStdioAdapter::RunLoop(base::ScopedFD stdout_fd,
138 base::ScopedFD stderr_fd) {
139 base::ScopedFD epollfd(epoll_create(1 /*arbitrary, ignored by kernel*/));
140 if (!epollfd.is_valid()) {
141 PLOG(ERROR) << "Failed to open epoll fd";
142 return;
143 }
144
145 struct EpollDescriptor {
146 base::ScopedFD* fd;
147 const char* name;
148 int priority;
149 } epoll_descriptors[2] = {{&stdout_fd, "stdout", kSyslogLogInfoPriority},
150 {&stderr_fd, "stderr", kSyslogLogWarningPriority}};
151 for (auto& descriptor : epoll_descriptors) {
152 struct epoll_event ev;
153 ev.events = EPOLLIN;
154 ev.data.ptr = &descriptor;
155 if (epoll_ctl(epollfd.get(), EPOLL_CTL_ADD, descriptor.fd->get(), &ev) ==
156 -1) {
157 PLOG(ERROR) << "Failed to register " << descriptor.name;
158 return;
159 }
160 }
161
162 char buffer[4096];
Qijiang Fane19d67d2020-04-01 08:18:39 +0900163 struct epoll_event events[base::size(epoll_descriptors)];
Luis Hector Chavezb2c3b3b2018-08-01 13:35:40 -0700164 while (true) {
165 int nfds =
Qijiang Fane19d67d2020-04-01 08:18:39 +0900166 HANDLE_EINTR(epoll_wait(epollfd.get(), events, base::size(events), -1));
Luis Hector Chavezb2c3b3b2018-08-01 13:35:40 -0700167 if (nfds == -1) {
168 PLOG(ERROR) << "Failed to epoll_wait";
169 return;
170 }
171
172 for (int i = 0; i < nfds; i++) {
173 EpollDescriptor* descriptor =
174 reinterpret_cast<EpollDescriptor*>(events[i].data.ptr);
175 ssize_t bytes =
176 HANDLE_EINTR(read(descriptor->fd->get(), buffer, sizeof(buffer)));
177 if (bytes <= 0) {
178 PLOG(ERROR) << "Failed to read from " << descriptor->name;
179 epoll_ctl(epollfd.get(), EPOLL_CTL_DEL, descriptor->fd->get(), nullptr);
180 descriptor->fd->reset();
181 continue;
182 }
183 if (bytes == 0) {
184 LOG(ERROR) << descriptor->name << " was closed";
185 epoll_ctl(epollfd.get(), EPOLL_CTL_DEL, descriptor->fd->get(), nullptr);
186 descriptor->fd->reset();
187 continue;
188 }
189
190 // This assumes that the writer's output is buffered and flushed on a
191 // line-by-line basis. This is true in practice and requires much simpler
192 // code, but may lead to lines that straddle a buffer size or partial
193 // lines that are output using raw write(2) syscalls being split across
194 // two read(2) syscalls.
195 base::StringPiece lines(buffer, bytes);
196 for (const auto& line :
197 base::SplitString(lines.as_string(), "\n", base::KEEP_WHITESPACE,
198 base::SPLIT_WANT_NONEMPTY)) {
199 syslog(descriptor->priority, "[%s] %s", descriptor->name, line.data());
200 }
201 }
202 }
203}
204
Luis Hector Chavezb77da862018-02-21 18:20:03 -0800205bool Mountpoint::operator==(const Mountpoint& other) const {
206 return path == other.path && mountflags == other.mountflags &&
207 data_string == other.data_string;
208}
209
210std::string ParseMountOptions(const std::vector<std::string>& options,
211 int* mount_flags_out,
212 int* negated_mount_flags_out,
213 int* bind_flags_out,
214 int* mount_propagation_flags_out,
215 bool* loopback_out,
216 std::string* verity_options) {
217 std::string option_string_out;
218 *mount_flags_out = 0;
219 *negated_mount_flags_out = 0;
220 *bind_flags_out = 0;
221 *mount_propagation_flags_out = 0;
222 *loopback_out = false;
223
224 const struct libmnt_optmap* linux_option_map =
225 mnt_get_builtin_optmap(MNT_LINUX_MAP);
226
227 constexpr int kMountPropagationFlagsMask =
228 MS_PRIVATE | MS_SLAVE | MS_SHARED | MS_UNBINDABLE;
229
230 for (const auto& option : options) {
231 const struct libmnt_optmap* map_entry = nullptr;
232
233 for (const struct libmnt_optmap* it = linux_option_map; it->name; ++it) {
234 if (option == it->name && it->id) {
235 map_entry = it;
236 break;
237 }
238 }
239
240 if (map_entry) {
241 // This is a known flag name.
242 if (map_entry->id & MS_BIND) {
243 *bind_flags_out |= map_entry->id;
244 } else if (map_entry->id & kMountPropagationFlagsMask) {
245 *mount_propagation_flags_out |= map_entry->id;
246 } else if (map_entry->mask & MNT_INVERT) {
247 *negated_mount_flags_out |= map_entry->id;
248 } else {
249 *mount_flags_out |= map_entry->id;
250 }
251 } else if (option == "loop") {
252 *loopback_out = true;
253 } else if (base::StartsWith(option, "dm=", base::CompareCase::SENSITIVE)) {
254 *verity_options = option.substr(3, std::string::npos);
255 } else {
256 // Unknown options get appended to the string passed to mount data.
257 if (!option_string_out.empty())
258 option_string_out += ",";
259 option_string_out += option;
260 }
261 }
262
263 return option_string_out;
264}
265
266std::vector<Mountpoint> GetMountpointsUnder(
Luis Hector Chaveze09de302017-09-07 16:32:44 -0700267 const base::FilePath& root, const base::FilePath& procSelfMountsPath) {
268 base::ScopedFILE mountinfo(fopen(procSelfMountsPath.value().c_str(), "r"));
269 if (!mountinfo) {
270 PLOG(ERROR) << "Failed to open " << procSelfMountsPath.value();
Luis Hector Chavezb77da862018-02-21 18:20:03 -0800271 return std::vector<Mountpoint>();
Luis Hector Chaveze09de302017-09-07 16:32:44 -0700272 }
273
274 struct mntent mount_entry;
275
276 std::string line;
277 char buffer[1024];
Luis Hector Chavezb77da862018-02-21 18:20:03 -0800278 std::vector<Mountpoint> mountpoints;
Luis Hector Chaveze09de302017-09-07 16:32:44 -0700279 while (getmntent_r(mountinfo.get(), &mount_entry, buffer, sizeof(buffer))) {
280 // Only return paths that are under |root|.
Luis Hector Chavezb77da862018-02-21 18:20:03 -0800281 const std::string path = mount_entry.mnt_dir;
282 if (path.compare(0, root.value().size(), root.value()) != 0)
Luis Hector Chaveze09de302017-09-07 16:32:44 -0700283 continue;
Luis Hector Chavezb77da862018-02-21 18:20:03 -0800284
285 int mount_flags, negated_mount_flags, bind_mount_flags,
286 mount_propagation_flags;
287 bool loopback;
288 std::string verity_options;
289 std::string options = ParseMountOptions(
290 base::SplitString(mount_entry.mnt_opts, ",", base::TRIM_WHITESPACE,
291 base::SPLIT_WANT_NONEMPTY),
292 &mount_flags, &negated_mount_flags, &bind_mount_flags,
293 &mount_propagation_flags, &loopback, &verity_options);
294 mountpoints.emplace_back(
295 Mountpoint{base::FilePath(path), mount_flags, options});
Luis Hector Chaveze09de302017-09-07 16:32:44 -0700296 }
297
298 return mountpoints;
299}
300
Luis Hector Chavez60f9bb12017-10-16 11:33:01 -0700301bool HasCapSysAdmin() {
302 if (!CAP_IS_SUPPORTED(CAP_SYS_ADMIN))
303 return false;
304
305 std::unique_ptr<std::remove_pointer_t<cap_t>, decltype(&cap_free)> caps(
306 cap_get_proc(), &cap_free);
307 if (!caps) {
308 PLOG(ERROR) << "Failed to get process' capabilities";
309 return false;
310 }
311
312 cap_flag_value_t cap_value;
313 if (cap_get_flag(caps.get(), CAP_SYS_ADMIN, CAP_EFFECTIVE, &cap_value) != 0) {
314 PLOG(ERROR) << "Failed to get the value of CAP_SYS_ADMIN";
315 return false;
316 }
317 return cap_value == CAP_SET;
318}
319
Luis Hector Chavez5bc7bb72017-10-27 11:29:37 -0700320bool RedirectLoggingAndStdio(const base::FilePath& log_file) {
321 base::ScopedFD log_fd(HANDLE_EINTR(
322 open(log_file.value().c_str(), O_CREAT | O_WRONLY | O_APPEND, 0644)));
323 if (!log_fd.is_valid()) {
324 PLOG(ERROR) << "Failed to open log file '" << log_file.value() << "'";
325 return false;
326 }
327 // Redirecting stdout/stderr for the hooks' benefit.
328 if (dup2(log_fd.get(), STDOUT_FILENO) == -1) {
329 PLOG(ERROR) << "Failed to redirect stdout";
330 return false;
331 }
332 if (dup2(log_fd.get(), STDERR_FILENO) == -1) {
333 PLOG(ERROR) << "Failed to redirect stderr";
334 return false;
335 }
Luis Hector Chavezb9f09a52018-04-18 12:39:56 -0700336 // Redirect all minijail logs to make them easier to find.
Luis Hector Chavezb9f09a52018-04-18 12:39:56 -0700337 minijail_log_to_fd(STDERR_FILENO, kSyslogLogInfoPriority);
338
Luis Hector Chavez5bc7bb72017-10-27 11:29:37 -0700339 brillo::SetLogFlags(brillo::kLogHeader | brillo::kLogToStderr);
340 logging::SetLogItems(true /* pid */, false /* tid */, true /* timestamp */,
341 false /* tick_count */);
342 return true;
343}
344
Luis Hector Chavezb2c3b3b2018-08-01 13:35:40 -0700345bool Pipe(base::ScopedFD* read_fd, base::ScopedFD* write_fd, int flags) {
346 int pipe_fds[2];
347 if (HANDLE_EINTR(pipe2(pipe_fds, flags)) != 0)
348 return false;
349 read_fd->reset(pipe_fds[0]);
350 write_fd->reset(pipe_fds[1]);
351 return true;
352}
353
Qijiang Fan20014ba2020-05-30 17:55:16 +0900354brillo::SafeFD OpenOciConfigSafelyInternal(const base::FilePath& config_path,
355 bool enable_noexec_check) {
Yusuke Satoc7661a82020-05-04 13:21:17 -0700356 brillo::SafeFD::SafeFDResult result(
357 brillo::SafeFD::Root().first.OpenExistingFile(config_path,
358 O_RDONLY | O_CLOEXEC));
359 if (brillo::SafeFD::IsError(result.second)) {
360 LOG(ERROR) << "Failed to open " << config_path.value() << " with error "
361 << static_cast<int>(result.second);
362 return brillo::SafeFD();
363 }
364
365 brillo::SafeFD fd(std::move(result.first));
366 struct statvfs buf;
367 if (HANDLE_EINTR(fstatvfs(fd.get(), &buf)) < 0) {
368 PLOG(ERROR) << "Failed to statvfs container config: "
369 << config_path.value();
370 return brillo::SafeFD();
371 }
372
Qijiang Fan20014ba2020-05-30 17:55:16 +0900373 if (enable_noexec_check && (buf.f_flag & ST_NOEXEC)) {
Yusuke Satoc7661a82020-05-04 13:21:17 -0700374 LOG(ERROR) << config_path.value() << " is on a noexec filesystem";
375 errno = EPERM;
376 return brillo::SafeFD();
377 }
378 return fd;
379}
380
Qijiang Fan20014ba2020-05-30 17:55:16 +0900381brillo::SafeFD OpenOciConfigSafely(const base::FilePath& config_path) {
382 // Don't check the flag on a test image. security.RunOCI relies on configs on
383 // a writable partition.
384 return OpenOciConfigSafelyInternal(config_path,
385 !IsTestImage() /* enable_noexec_check */);
386}
387
388brillo::SafeFD OpenOciConfigSafelyForTest(const base::FilePath& config_path,
389 bool enable_noexec_check) {
390 return OpenOciConfigSafelyInternal(config_path, enable_noexec_check);
391}
392
Luis Hector Chaveze09de302017-09-07 16:32:44 -0700393} // namespace run_oci