Eduardo Otubo | 2f668be | 2012-08-14 18:44:06 -0300 | [diff] [blame] | 1 | /* |
| 2 | * QEMU seccomp mode 2 support with libseccomp |
| 3 | * |
| 4 | * Copyright IBM, Corp. 2012 |
| 5 | * |
| 6 | * Authors: |
| 7 | * Eduardo Otubo <eotubo@br.ibm.com> |
| 8 | * |
| 9 | * This work is licensed under the terms of the GNU GPL, version 2. See |
| 10 | * the COPYING file in the top-level directory. |
| 11 | * |
| 12 | * Contributions after 2012-01-13 are licensed under the terms of the |
| 13 | * GNU GPL, version 2 or (at your option) any later version. |
| 14 | */ |
Peter Maydell | d38ea87 | 2016-01-29 17:50:05 +0000 | [diff] [blame] | 15 | #include "qemu/osdep.h" |
Yi Min Zhao | 9d0fdec | 2018-05-31 11:29:37 +0800 | [diff] [blame] | 16 | #include "qemu/config-file.h" |
| 17 | #include "qemu/option.h" |
| 18 | #include "qemu/module.h" |
| 19 | #include "qemu/error-report.h" |
| 20 | #include <sys/prctl.h> |
Eduardo Otubo | 2f668be | 2012-08-14 18:44:06 -0300 | [diff] [blame] | 21 | #include <seccomp.h> |
Paolo Bonzini | 9c17d61 | 2012-12-17 18:20:04 +0100 | [diff] [blame] | 22 | #include "sysemu/seccomp.h" |
Eduardo Otubo | 2f668be | 2012-08-14 18:44:06 -0300 | [diff] [blame] | 23 | |
James Hogan | 81bed73 | 2016-04-08 14:16:33 +0100 | [diff] [blame] | 24 | /* For some architectures (notably ARM) cacheflush is not supported until |
| 25 | * libseccomp 2.2.3, but configure enforces that we are using a more recent |
| 26 | * version on those hosts, so it is OK for this check to be less strict. |
| 27 | */ |
Andrew Jones | 47d2067 | 2015-11-02 23:53:26 +0100 | [diff] [blame] | 28 | #if SCMP_VER_MAJOR >= 3 |
| 29 | #define HAVE_CACHEFLUSH |
James Hogan | 81bed73 | 2016-04-08 14:16:33 +0100 | [diff] [blame] | 30 | #elif SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR >= 2 |
Andrew Jones | 47d2067 | 2015-11-02 23:53:26 +0100 | [diff] [blame] | 31 | #define HAVE_CACHEFLUSH |
| 32 | #endif |
| 33 | |
Eduardo Otubo | 2f668be | 2012-08-14 18:44:06 -0300 | [diff] [blame] | 34 | struct QemuSeccompSyscall { |
| 35 | int32_t num; |
Eduardo Otubo | 1bd6152 | 2017-02-28 21:13:12 +0100 | [diff] [blame] | 36 | uint8_t set; |
Marc-André Lureau | 056de1e | 2018-07-10 16:55:57 +0200 | [diff] [blame] | 37 | uint8_t narg; |
| 38 | const struct scmp_arg_cmp *arg_cmp; |
| 39 | }; |
| 40 | |
| 41 | const struct scmp_arg_cmp sched_setscheduler_arg[] = { |
| 42 | SCMP_A1(SCMP_CMP_NE, SCHED_IDLE) |
Eduardo Otubo | 2f668be | 2012-08-14 18:44:06 -0300 | [diff] [blame] | 43 | }; |
| 44 | |
Eduardo Otubo | 1bd6152 | 2017-02-28 21:13:12 +0100 | [diff] [blame] | 45 | static const struct QemuSeccompSyscall blacklist[] = { |
| 46 | /* default set of syscalls to blacklist */ |
| 47 | { SCMP_SYS(reboot), QEMU_SECCOMP_SET_DEFAULT }, |
| 48 | { SCMP_SYS(swapon), QEMU_SECCOMP_SET_DEFAULT }, |
| 49 | { SCMP_SYS(swapoff), QEMU_SECCOMP_SET_DEFAULT }, |
| 50 | { SCMP_SYS(syslog), QEMU_SECCOMP_SET_DEFAULT }, |
| 51 | { SCMP_SYS(mount), QEMU_SECCOMP_SET_DEFAULT }, |
| 52 | { SCMP_SYS(umount), QEMU_SECCOMP_SET_DEFAULT }, |
| 53 | { SCMP_SYS(kexec_load), QEMU_SECCOMP_SET_DEFAULT }, |
| 54 | { SCMP_SYS(afs_syscall), QEMU_SECCOMP_SET_DEFAULT }, |
| 55 | { SCMP_SYS(break), QEMU_SECCOMP_SET_DEFAULT }, |
| 56 | { SCMP_SYS(ftime), QEMU_SECCOMP_SET_DEFAULT }, |
| 57 | { SCMP_SYS(getpmsg), QEMU_SECCOMP_SET_DEFAULT }, |
| 58 | { SCMP_SYS(gtty), QEMU_SECCOMP_SET_DEFAULT }, |
| 59 | { SCMP_SYS(lock), QEMU_SECCOMP_SET_DEFAULT }, |
| 60 | { SCMP_SYS(mpx), QEMU_SECCOMP_SET_DEFAULT }, |
| 61 | { SCMP_SYS(prof), QEMU_SECCOMP_SET_DEFAULT }, |
| 62 | { SCMP_SYS(profil), QEMU_SECCOMP_SET_DEFAULT }, |
| 63 | { SCMP_SYS(putpmsg), QEMU_SECCOMP_SET_DEFAULT }, |
| 64 | { SCMP_SYS(security), QEMU_SECCOMP_SET_DEFAULT }, |
| 65 | { SCMP_SYS(stty), QEMU_SECCOMP_SET_DEFAULT }, |
| 66 | { SCMP_SYS(tuxcall), QEMU_SECCOMP_SET_DEFAULT }, |
| 67 | { SCMP_SYS(ulimit), QEMU_SECCOMP_SET_DEFAULT }, |
| 68 | { SCMP_SYS(vserver), QEMU_SECCOMP_SET_DEFAULT }, |
Eduardo Otubo | 2b716fa | 2017-03-01 23:17:29 +0100 | [diff] [blame] | 69 | /* obsolete */ |
| 70 | { SCMP_SYS(readdir), QEMU_SECCOMP_SET_OBSOLETE }, |
| 71 | { SCMP_SYS(_sysctl), QEMU_SECCOMP_SET_OBSOLETE }, |
| 72 | { SCMP_SYS(bdflush), QEMU_SECCOMP_SET_OBSOLETE }, |
| 73 | { SCMP_SYS(create_module), QEMU_SECCOMP_SET_OBSOLETE }, |
| 74 | { SCMP_SYS(get_kernel_syms), QEMU_SECCOMP_SET_OBSOLETE }, |
| 75 | { SCMP_SYS(query_module), QEMU_SECCOMP_SET_OBSOLETE }, |
| 76 | { SCMP_SYS(sgetmask), QEMU_SECCOMP_SET_OBSOLETE }, |
| 77 | { SCMP_SYS(ssetmask), QEMU_SECCOMP_SET_OBSOLETE }, |
| 78 | { SCMP_SYS(sysfs), QEMU_SECCOMP_SET_OBSOLETE }, |
| 79 | { SCMP_SYS(uselib), QEMU_SECCOMP_SET_OBSOLETE }, |
| 80 | { SCMP_SYS(ustat), QEMU_SECCOMP_SET_OBSOLETE }, |
Eduardo Otubo | 73a1e64 | 2017-03-13 22:13:27 +0100 | [diff] [blame] | 81 | /* privileged */ |
| 82 | { SCMP_SYS(setuid), QEMU_SECCOMP_SET_PRIVILEGED }, |
| 83 | { SCMP_SYS(setgid), QEMU_SECCOMP_SET_PRIVILEGED }, |
| 84 | { SCMP_SYS(setpgid), QEMU_SECCOMP_SET_PRIVILEGED }, |
| 85 | { SCMP_SYS(setsid), QEMU_SECCOMP_SET_PRIVILEGED }, |
| 86 | { SCMP_SYS(setreuid), QEMU_SECCOMP_SET_PRIVILEGED }, |
| 87 | { SCMP_SYS(setregid), QEMU_SECCOMP_SET_PRIVILEGED }, |
| 88 | { SCMP_SYS(setresuid), QEMU_SECCOMP_SET_PRIVILEGED }, |
| 89 | { SCMP_SYS(setresgid), QEMU_SECCOMP_SET_PRIVILEGED }, |
| 90 | { SCMP_SYS(setfsuid), QEMU_SECCOMP_SET_PRIVILEGED }, |
| 91 | { SCMP_SYS(setfsgid), QEMU_SECCOMP_SET_PRIVILEGED }, |
Eduardo Otubo | 995a226 | 2017-03-13 22:16:01 +0100 | [diff] [blame] | 92 | /* spawn */ |
| 93 | { SCMP_SYS(fork), QEMU_SECCOMP_SET_SPAWN }, |
| 94 | { SCMP_SYS(vfork), QEMU_SECCOMP_SET_SPAWN }, |
| 95 | { SCMP_SYS(execve), QEMU_SECCOMP_SET_SPAWN }, |
Eduardo Otubo | 24f8cdc | 2017-03-13 22:18:51 +0100 | [diff] [blame] | 96 | /* resource control */ |
| 97 | { SCMP_SYS(getpriority), QEMU_SECCOMP_SET_RESOURCECTL }, |
| 98 | { SCMP_SYS(setpriority), QEMU_SECCOMP_SET_RESOURCECTL }, |
| 99 | { SCMP_SYS(sched_setparam), QEMU_SECCOMP_SET_RESOURCECTL }, |
| 100 | { SCMP_SYS(sched_getparam), QEMU_SECCOMP_SET_RESOURCECTL }, |
Marc-André Lureau | 056de1e | 2018-07-10 16:55:57 +0200 | [diff] [blame] | 101 | { SCMP_SYS(sched_setscheduler), QEMU_SECCOMP_SET_RESOURCECTL, |
| 102 | ARRAY_SIZE(sched_setscheduler_arg), sched_setscheduler_arg }, |
Eduardo Otubo | 24f8cdc | 2017-03-13 22:18:51 +0100 | [diff] [blame] | 103 | { SCMP_SYS(sched_getscheduler), QEMU_SECCOMP_SET_RESOURCECTL }, |
| 104 | { SCMP_SYS(sched_setaffinity), QEMU_SECCOMP_SET_RESOURCECTL }, |
| 105 | { SCMP_SYS(sched_getaffinity), QEMU_SECCOMP_SET_RESOURCECTL }, |
| 106 | { SCMP_SYS(sched_get_priority_max), QEMU_SECCOMP_SET_RESOURCECTL }, |
| 107 | { SCMP_SYS(sched_get_priority_min), QEMU_SECCOMP_SET_RESOURCECTL }, |
Eduardo Otubo | 2f668be | 2012-08-14 18:44:06 -0300 | [diff] [blame] | 108 | }; |
| 109 | |
Eduardo Otubo | 2b716fa | 2017-03-01 23:17:29 +0100 | [diff] [blame] | 110 | |
Yi Min Zhao | 9d0fdec | 2018-05-31 11:29:37 +0800 | [diff] [blame] | 111 | static int seccomp_start(uint32_t seccomp_opts) |
Eduardo Otubo | 2f668be | 2012-08-14 18:44:06 -0300 | [diff] [blame] | 112 | { |
| 113 | int rc = 0; |
| 114 | unsigned int i = 0; |
| 115 | scmp_filter_ctx ctx; |
| 116 | |
Eduardo Otubo | 1bd6152 | 2017-02-28 21:13:12 +0100 | [diff] [blame] | 117 | ctx = seccomp_init(SCMP_ACT_ALLOW); |
Eduardo Otubo | 2f668be | 2012-08-14 18:44:06 -0300 | [diff] [blame] | 118 | if (ctx == NULL) { |
Corey Bryant | 2a13f99 | 2013-12-18 11:48:11 -0500 | [diff] [blame] | 119 | rc = -1; |
Eduardo Otubo | 2f668be | 2012-08-14 18:44:06 -0300 | [diff] [blame] | 120 | goto seccomp_return; |
| 121 | } |
| 122 | |
Eduardo Otubo | 1bd6152 | 2017-02-28 21:13:12 +0100 | [diff] [blame] | 123 | for (i = 0; i < ARRAY_SIZE(blacklist); i++) { |
Eduardo Otubo | 2b716fa | 2017-03-01 23:17:29 +0100 | [diff] [blame] | 124 | if (!(seccomp_opts & blacklist[i].set)) { |
| 125 | continue; |
| 126 | } |
| 127 | |
Marc-André Lureau | 6f2231e | 2018-08-22 19:02:47 +0200 | [diff] [blame^] | 128 | rc = seccomp_rule_add_array(ctx, SCMP_ACT_TRAP, blacklist[i].num, |
Marc-André Lureau | 056de1e | 2018-07-10 16:55:57 +0200 | [diff] [blame] | 129 | blacklist[i].narg, blacklist[i].arg_cmp); |
Eduardo Otubo | 2f668be | 2012-08-14 18:44:06 -0300 | [diff] [blame] | 130 | if (rc < 0) { |
| 131 | goto seccomp_return; |
| 132 | } |
| 133 | } |
| 134 | |
| 135 | rc = seccomp_load(ctx); |
| 136 | |
| 137 | seccomp_return: |
| 138 | seccomp_release(ctx); |
| 139 | return rc; |
| 140 | } |
Yi Min Zhao | 9d0fdec | 2018-05-31 11:29:37 +0800 | [diff] [blame] | 141 | |
| 142 | #ifdef CONFIG_SECCOMP |
| 143 | int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp) |
| 144 | { |
| 145 | if (qemu_opt_get_bool(opts, "enable", false)) { |
| 146 | uint32_t seccomp_opts = QEMU_SECCOMP_SET_DEFAULT |
| 147 | | QEMU_SECCOMP_SET_OBSOLETE; |
| 148 | const char *value = NULL; |
| 149 | |
| 150 | value = qemu_opt_get(opts, "obsolete"); |
| 151 | if (value) { |
| 152 | if (g_str_equal(value, "allow")) { |
| 153 | seccomp_opts &= ~QEMU_SECCOMP_SET_OBSOLETE; |
| 154 | } else if (g_str_equal(value, "deny")) { |
| 155 | /* this is the default option, this if is here |
| 156 | * to provide a little bit of consistency for |
| 157 | * the command line */ |
| 158 | } else { |
| 159 | error_report("invalid argument for obsolete"); |
| 160 | return -1; |
| 161 | } |
| 162 | } |
| 163 | |
| 164 | value = qemu_opt_get(opts, "elevateprivileges"); |
| 165 | if (value) { |
| 166 | if (g_str_equal(value, "deny")) { |
| 167 | seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED; |
| 168 | } else if (g_str_equal(value, "children")) { |
| 169 | seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED; |
| 170 | |
| 171 | /* calling prctl directly because we're |
| 172 | * not sure if host has CAP_SYS_ADMIN set*/ |
| 173 | if (prctl(PR_SET_NO_NEW_PRIVS, 1)) { |
| 174 | error_report("failed to set no_new_privs " |
| 175 | "aborting"); |
| 176 | return -1; |
| 177 | } |
| 178 | } else if (g_str_equal(value, "allow")) { |
| 179 | /* default value */ |
| 180 | } else { |
| 181 | error_report("invalid argument for elevateprivileges"); |
| 182 | return -1; |
| 183 | } |
| 184 | } |
| 185 | |
| 186 | value = qemu_opt_get(opts, "spawn"); |
| 187 | if (value) { |
| 188 | if (g_str_equal(value, "deny")) { |
| 189 | seccomp_opts |= QEMU_SECCOMP_SET_SPAWN; |
| 190 | } else if (g_str_equal(value, "allow")) { |
| 191 | /* default value */ |
| 192 | } else { |
| 193 | error_report("invalid argument for spawn"); |
| 194 | return -1; |
| 195 | } |
| 196 | } |
| 197 | |
| 198 | value = qemu_opt_get(opts, "resourcecontrol"); |
| 199 | if (value) { |
| 200 | if (g_str_equal(value, "deny")) { |
| 201 | seccomp_opts |= QEMU_SECCOMP_SET_RESOURCECTL; |
| 202 | } else if (g_str_equal(value, "allow")) { |
| 203 | /* default value */ |
| 204 | } else { |
| 205 | error_report("invalid argument for resourcecontrol"); |
| 206 | return -1; |
| 207 | } |
| 208 | } |
| 209 | |
| 210 | if (seccomp_start(seccomp_opts) < 0) { |
| 211 | error_report("failed to install seccomp syscall filter " |
| 212 | "in the kernel"); |
| 213 | return -1; |
| 214 | } |
| 215 | } |
| 216 | |
| 217 | return 0; |
| 218 | } |
| 219 | |
| 220 | static QemuOptsList qemu_sandbox_opts = { |
| 221 | .name = "sandbox", |
| 222 | .implied_opt_name = "enable", |
| 223 | .head = QTAILQ_HEAD_INITIALIZER(qemu_sandbox_opts.head), |
| 224 | .desc = { |
| 225 | { |
| 226 | .name = "enable", |
| 227 | .type = QEMU_OPT_BOOL, |
| 228 | }, |
| 229 | { |
| 230 | .name = "obsolete", |
| 231 | .type = QEMU_OPT_STRING, |
| 232 | }, |
| 233 | { |
| 234 | .name = "elevateprivileges", |
| 235 | .type = QEMU_OPT_STRING, |
| 236 | }, |
| 237 | { |
| 238 | .name = "spawn", |
| 239 | .type = QEMU_OPT_STRING, |
| 240 | }, |
| 241 | { |
| 242 | .name = "resourcecontrol", |
| 243 | .type = QEMU_OPT_STRING, |
| 244 | }, |
| 245 | { /* end of list */ } |
| 246 | }, |
| 247 | }; |
| 248 | |
| 249 | static void seccomp_register(void) |
| 250 | { |
| 251 | qemu_add_opts(&qemu_sandbox_opts); |
| 252 | } |
| 253 | opts_init(seccomp_register); |
| 254 | #endif |