Tom Lendacky | 7744ccd | 2017-07-17 16:10:03 -0500 | [diff] [blame] | 1 | /* |
| 2 | * AMD Memory Encryption Support |
| 3 | * |
| 4 | * Copyright (C) 2016 Advanced Micro Devices, Inc. |
| 5 | * |
| 6 | * Author: Tom Lendacky <thomas.lendacky@amd.com> |
| 7 | * |
| 8 | * This program is free software; you can redistribute it and/or modify |
| 9 | * it under the terms of the GNU General Public License version 2 as |
| 10 | * published by the Free Software Foundation. |
| 11 | */ |
| 12 | |
Tom Lendacky | bc829ee | 2017-09-29 11:24:19 -0500 | [diff] [blame] | 13 | #define DISABLE_BRANCH_PROFILING |
| 14 | |
Tom Lendacky | 7744ccd | 2017-07-17 16:10:03 -0500 | [diff] [blame] | 15 | #include <linux/linkage.h> |
Tom Lendacky | 5868f36 | 2017-07-17 16:10:05 -0500 | [diff] [blame] | 16 | #include <linux/init.h> |
Tom Lendacky | 21729f8 | 2017-07-17 16:10:07 -0500 | [diff] [blame] | 17 | #include <linux/mm.h> |
Tom Lendacky | c775320 | 2017-07-17 16:10:21 -0500 | [diff] [blame] | 18 | #include <linux/dma-mapping.h> |
| 19 | #include <linux/swiotlb.h> |
Tom Lendacky | aca20d5 | 2017-07-17 16:10:35 -0500 | [diff] [blame] | 20 | #include <linux/mem_encrypt.h> |
Tom Lendacky | 7744ccd | 2017-07-17 16:10:03 -0500 | [diff] [blame] | 21 | |
Tom Lendacky | 7f8b7e7 | 2017-07-17 16:10:10 -0500 | [diff] [blame] | 22 | #include <asm/tlbflush.h> |
| 23 | #include <asm/fixmap.h> |
Tom Lendacky | b9d0520 | 2017-07-17 16:10:11 -0500 | [diff] [blame] | 24 | #include <asm/setup.h> |
| 25 | #include <asm/bootparam.h> |
Tom Lendacky | c775320 | 2017-07-17 16:10:21 -0500 | [diff] [blame] | 26 | #include <asm/set_memory.h> |
Tom Lendacky | 6ebcb06 | 2017-07-17 16:10:32 -0500 | [diff] [blame] | 27 | #include <asm/cacheflush.h> |
| 28 | #include <asm/sections.h> |
Tom Lendacky | aca20d5 | 2017-07-17 16:10:35 -0500 | [diff] [blame] | 29 | #include <asm/processor-flags.h> |
| 30 | #include <asm/msr.h> |
| 31 | #include <asm/cmdline.h> |
| 32 | |
| 33 | static char sme_cmdline_arg[] __initdata = "mem_encrypt"; |
| 34 | static char sme_cmdline_on[] __initdata = "on"; |
| 35 | static char sme_cmdline_off[] __initdata = "off"; |
Tom Lendacky | 7f8b7e7 | 2017-07-17 16:10:10 -0500 | [diff] [blame] | 36 | |
Tom Lendacky | 7744ccd | 2017-07-17 16:10:03 -0500 | [diff] [blame] | 37 | /* |
| 38 | * Since SME related variables are set early in the boot process they must |
| 39 | * reside in the .data section so as not to be zeroed out when the .bss |
| 40 | * section is later cleared. |
| 41 | */ |
Borislav Petkov | 21d9bb4 | 2017-09-07 11:38:37 +0200 | [diff] [blame] | 42 | u64 sme_me_mask __section(.data) = 0; |
Tom Lendacky | 7744ccd | 2017-07-17 16:10:03 -0500 | [diff] [blame] | 43 | EXPORT_SYMBOL_GPL(sme_me_mask); |
Tom Lendacky | 606b21d | 2017-10-20 09:30:55 -0500 | [diff] [blame^] | 44 | DEFINE_STATIC_KEY_FALSE(sev_enable_key); |
| 45 | EXPORT_SYMBOL_GPL(sev_enable_key); |
Tom Lendacky | 5868f36 | 2017-07-17 16:10:05 -0500 | [diff] [blame] | 46 | |
Tom Lendacky | d8aa7ee | 2017-10-20 09:30:44 -0500 | [diff] [blame] | 47 | static bool sev_enabled __section(.data); |
| 48 | |
Tom Lendacky | 7f8b7e7 | 2017-07-17 16:10:10 -0500 | [diff] [blame] | 49 | /* Buffer used for early in-place encryption by BSP, no locking needed */ |
| 50 | static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE); |
| 51 | |
| 52 | /* |
| 53 | * This routine does not change the underlying encryption setting of the |
| 54 | * page(s) that map this memory. It assumes that eventually the memory is |
| 55 | * meant to be accessed as either encrypted or decrypted but the contents |
| 56 | * are currently not in the desired state. |
| 57 | * |
| 58 | * This routine follows the steps outlined in the AMD64 Architecture |
| 59 | * Programmer's Manual Volume 2, Section 7.10.8 Encrypt-in-Place. |
| 60 | */ |
| 61 | static void __init __sme_early_enc_dec(resource_size_t paddr, |
| 62 | unsigned long size, bool enc) |
| 63 | { |
| 64 | void *src, *dst; |
| 65 | size_t len; |
| 66 | |
| 67 | if (!sme_me_mask) |
| 68 | return; |
| 69 | |
| 70 | local_flush_tlb(); |
| 71 | wbinvd(); |
| 72 | |
| 73 | /* |
| 74 | * There are limited number of early mapping slots, so map (at most) |
| 75 | * one page at time. |
| 76 | */ |
| 77 | while (size) { |
| 78 | len = min_t(size_t, sizeof(sme_early_buffer), size); |
| 79 | |
| 80 | /* |
| 81 | * Create mappings for the current and desired format of |
| 82 | * the memory. Use a write-protected mapping for the source. |
| 83 | */ |
| 84 | src = enc ? early_memremap_decrypted_wp(paddr, len) : |
| 85 | early_memremap_encrypted_wp(paddr, len); |
| 86 | |
| 87 | dst = enc ? early_memremap_encrypted(paddr, len) : |
| 88 | early_memremap_decrypted(paddr, len); |
| 89 | |
| 90 | /* |
| 91 | * If a mapping can't be obtained to perform the operation, |
| 92 | * then eventual access of that area in the desired mode |
| 93 | * will cause a crash. |
| 94 | */ |
| 95 | BUG_ON(!src || !dst); |
| 96 | |
| 97 | /* |
| 98 | * Use a temporary buffer, of cache-line multiple size, to |
| 99 | * avoid data corruption as documented in the APM. |
| 100 | */ |
| 101 | memcpy(sme_early_buffer, src, len); |
| 102 | memcpy(dst, sme_early_buffer, len); |
| 103 | |
| 104 | early_memunmap(dst, len); |
| 105 | early_memunmap(src, len); |
| 106 | |
| 107 | paddr += len; |
| 108 | size -= len; |
| 109 | } |
| 110 | } |
| 111 | |
| 112 | void __init sme_early_encrypt(resource_size_t paddr, unsigned long size) |
| 113 | { |
| 114 | __sme_early_enc_dec(paddr, size, true); |
| 115 | } |
| 116 | |
| 117 | void __init sme_early_decrypt(resource_size_t paddr, unsigned long size) |
| 118 | { |
| 119 | __sme_early_enc_dec(paddr, size, false); |
| 120 | } |
| 121 | |
Tom Lendacky | b9d0520 | 2017-07-17 16:10:11 -0500 | [diff] [blame] | 122 | static void __init __sme_early_map_unmap_mem(void *vaddr, unsigned long size, |
| 123 | bool map) |
| 124 | { |
| 125 | unsigned long paddr = (unsigned long)vaddr - __PAGE_OFFSET; |
| 126 | pmdval_t pmd_flags, pmd; |
| 127 | |
| 128 | /* Use early_pmd_flags but remove the encryption mask */ |
| 129 | pmd_flags = __sme_clr(early_pmd_flags); |
| 130 | |
| 131 | do { |
| 132 | pmd = map ? (paddr & PMD_MASK) + pmd_flags : 0; |
| 133 | __early_make_pgtable((unsigned long)vaddr, pmd); |
| 134 | |
| 135 | vaddr += PMD_SIZE; |
| 136 | paddr += PMD_SIZE; |
| 137 | size = (size <= PMD_SIZE) ? 0 : size - PMD_SIZE; |
| 138 | } while (size); |
| 139 | |
| 140 | __native_flush_tlb(); |
| 141 | } |
| 142 | |
| 143 | void __init sme_unmap_bootdata(char *real_mode_data) |
| 144 | { |
| 145 | struct boot_params *boot_data; |
| 146 | unsigned long cmdline_paddr; |
| 147 | |
| 148 | if (!sme_active()) |
| 149 | return; |
| 150 | |
| 151 | /* Get the command line address before unmapping the real_mode_data */ |
| 152 | boot_data = (struct boot_params *)real_mode_data; |
| 153 | cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32); |
| 154 | |
| 155 | __sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), false); |
| 156 | |
| 157 | if (!cmdline_paddr) |
| 158 | return; |
| 159 | |
| 160 | __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, false); |
| 161 | } |
| 162 | |
| 163 | void __init sme_map_bootdata(char *real_mode_data) |
| 164 | { |
| 165 | struct boot_params *boot_data; |
| 166 | unsigned long cmdline_paddr; |
| 167 | |
| 168 | if (!sme_active()) |
| 169 | return; |
| 170 | |
| 171 | __sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), true); |
| 172 | |
| 173 | /* Get the command line address after mapping the real_mode_data */ |
| 174 | boot_data = (struct boot_params *)real_mode_data; |
| 175 | cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32); |
| 176 | |
| 177 | if (!cmdline_paddr) |
| 178 | return; |
| 179 | |
| 180 | __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, true); |
| 181 | } |
| 182 | |
Tom Lendacky | 21729f8 | 2017-07-17 16:10:07 -0500 | [diff] [blame] | 183 | void __init sme_early_init(void) |
| 184 | { |
| 185 | unsigned int i; |
| 186 | |
| 187 | if (!sme_me_mask) |
| 188 | return; |
| 189 | |
| 190 | early_pmd_flags = __sme_set(early_pmd_flags); |
| 191 | |
| 192 | __supported_pte_mask = __sme_set(__supported_pte_mask); |
| 193 | |
| 194 | /* Update the protection map with memory encryption mask */ |
| 195 | for (i = 0; i < ARRAY_SIZE(protection_map); i++) |
| 196 | protection_map[i] = pgprot_encrypted(protection_map[i]); |
Tom Lendacky | d7b417f | 2017-10-20 09:30:53 -0500 | [diff] [blame] | 197 | |
| 198 | if (sev_active()) |
| 199 | swiotlb_force = SWIOTLB_FORCE; |
| 200 | } |
| 201 | |
| 202 | static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, |
| 203 | gfp_t gfp, unsigned long attrs) |
| 204 | { |
| 205 | unsigned long dma_mask; |
| 206 | unsigned int order; |
| 207 | struct page *page; |
| 208 | void *vaddr = NULL; |
| 209 | |
| 210 | dma_mask = dma_alloc_coherent_mask(dev, gfp); |
| 211 | order = get_order(size); |
| 212 | |
| 213 | /* |
| 214 | * Memory will be memset to zero after marking decrypted, so don't |
| 215 | * bother clearing it before. |
| 216 | */ |
| 217 | gfp &= ~__GFP_ZERO; |
| 218 | |
| 219 | page = alloc_pages_node(dev_to_node(dev), gfp, order); |
| 220 | if (page) { |
| 221 | dma_addr_t addr; |
| 222 | |
| 223 | /* |
| 224 | * Since we will be clearing the encryption bit, check the |
| 225 | * mask with it already cleared. |
| 226 | */ |
| 227 | addr = __sme_clr(phys_to_dma(dev, page_to_phys(page))); |
| 228 | if ((addr + size) > dma_mask) { |
| 229 | __free_pages(page, get_order(size)); |
| 230 | } else { |
| 231 | vaddr = page_address(page); |
| 232 | *dma_handle = addr; |
| 233 | } |
| 234 | } |
| 235 | |
| 236 | if (!vaddr) |
| 237 | vaddr = swiotlb_alloc_coherent(dev, size, dma_handle, gfp); |
| 238 | |
| 239 | if (!vaddr) |
| 240 | return NULL; |
| 241 | |
| 242 | /* Clear the SME encryption bit for DMA use if not swiotlb area */ |
| 243 | if (!is_swiotlb_buffer(dma_to_phys(dev, *dma_handle))) { |
| 244 | set_memory_decrypted((unsigned long)vaddr, 1 << order); |
| 245 | memset(vaddr, 0, PAGE_SIZE << order); |
| 246 | *dma_handle = __sme_clr(*dma_handle); |
| 247 | } |
| 248 | |
| 249 | return vaddr; |
| 250 | } |
| 251 | |
| 252 | static void sev_free(struct device *dev, size_t size, void *vaddr, |
| 253 | dma_addr_t dma_handle, unsigned long attrs) |
| 254 | { |
| 255 | /* Set the SME encryption bit for re-use if not swiotlb area */ |
| 256 | if (!is_swiotlb_buffer(dma_to_phys(dev, dma_handle))) |
| 257 | set_memory_encrypted((unsigned long)vaddr, |
| 258 | 1 << get_order(size)); |
| 259 | |
| 260 | swiotlb_free_coherent(dev, size, vaddr, dma_handle); |
Tom Lendacky | 21729f8 | 2017-07-17 16:10:07 -0500 | [diff] [blame] | 261 | } |
| 262 | |
Tom Lendacky | d8aa7ee | 2017-10-20 09:30:44 -0500 | [diff] [blame] | 263 | /* |
| 264 | * SME and SEV are very similar but they are not the same, so there are |
| 265 | * times that the kernel will need to distinguish between SME and SEV. The |
| 266 | * sme_active() and sev_active() functions are used for this. When a |
| 267 | * distinction isn't needed, the mem_encrypt_active() function can be used. |
| 268 | * |
| 269 | * The trampoline code is a good example for this requirement. Before |
| 270 | * paging is activated, SME will access all memory as decrypted, but SEV |
| 271 | * will access all memory as encrypted. So, when APs are being brought |
| 272 | * up under SME the trampoline area cannot be encrypted, whereas under SEV |
| 273 | * the trampoline area must be encrypted. |
| 274 | */ |
| 275 | bool sme_active(void) |
| 276 | { |
| 277 | return sme_me_mask && !sev_enabled; |
| 278 | } |
| 279 | EXPORT_SYMBOL_GPL(sme_active); |
| 280 | |
| 281 | bool sev_active(void) |
| 282 | { |
| 283 | return sme_me_mask && sev_enabled; |
| 284 | } |
| 285 | EXPORT_SYMBOL_GPL(sev_active); |
| 286 | |
Tom Lendacky | d7b417f | 2017-10-20 09:30:53 -0500 | [diff] [blame] | 287 | static const struct dma_map_ops sev_dma_ops = { |
| 288 | .alloc = sev_alloc, |
| 289 | .free = sev_free, |
| 290 | .map_page = swiotlb_map_page, |
| 291 | .unmap_page = swiotlb_unmap_page, |
| 292 | .map_sg = swiotlb_map_sg_attrs, |
| 293 | .unmap_sg = swiotlb_unmap_sg_attrs, |
| 294 | .sync_single_for_cpu = swiotlb_sync_single_for_cpu, |
| 295 | .sync_single_for_device = swiotlb_sync_single_for_device, |
| 296 | .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, |
| 297 | .sync_sg_for_device = swiotlb_sync_sg_for_device, |
| 298 | .mapping_error = swiotlb_dma_mapping_error, |
| 299 | }; |
| 300 | |
Tom Lendacky | c775320 | 2017-07-17 16:10:21 -0500 | [diff] [blame] | 301 | /* Architecture __weak replacement functions */ |
| 302 | void __init mem_encrypt_init(void) |
| 303 | { |
| 304 | if (!sme_me_mask) |
| 305 | return; |
| 306 | |
| 307 | /* Call into SWIOTLB to update the SWIOTLB DMA buffers */ |
| 308 | swiotlb_update_mem_attributes(); |
Tom Lendacky | aca20d5 | 2017-07-17 16:10:35 -0500 | [diff] [blame] | 309 | |
Tom Lendacky | d7b417f | 2017-10-20 09:30:53 -0500 | [diff] [blame] | 310 | /* |
| 311 | * With SEV, DMA operations cannot use encryption. New DMA ops |
| 312 | * are required in order to mark the DMA areas as decrypted or |
| 313 | * to use bounce buffers. |
| 314 | */ |
| 315 | if (sev_active()) |
| 316 | dma_ops = &sev_dma_ops; |
| 317 | |
Tom Lendacky | 606b21d | 2017-10-20 09:30:55 -0500 | [diff] [blame^] | 318 | /* |
| 319 | * With SEV, we need to unroll the rep string I/O instructions. |
| 320 | */ |
| 321 | if (sev_active()) |
| 322 | static_branch_enable(&sev_enable_key); |
| 323 | |
Tom Lendacky | 1958b5f | 2017-10-20 09:30:54 -0500 | [diff] [blame] | 324 | pr_info("AMD %s active\n", |
| 325 | sev_active() ? "Secure Encrypted Virtualization (SEV)" |
| 326 | : "Secure Memory Encryption (SME)"); |
Tom Lendacky | c775320 | 2017-07-17 16:10:21 -0500 | [diff] [blame] | 327 | } |
| 328 | |
| 329 | void swiotlb_set_mem_attributes(void *vaddr, unsigned long size) |
| 330 | { |
| 331 | WARN(PAGE_ALIGN(size) != size, |
| 332 | "size is not page-aligned (%#lx)\n", size); |
| 333 | |
| 334 | /* Make the SWIOTLB buffer area decrypted */ |
| 335 | set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT); |
| 336 | } |
| 337 | |
Tom Lendacky | 6ebcb06 | 2017-07-17 16:10:32 -0500 | [diff] [blame] | 338 | static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start, |
| 339 | unsigned long end) |
| 340 | { |
| 341 | unsigned long pgd_start, pgd_end, pgd_size; |
| 342 | pgd_t *pgd_p; |
| 343 | |
| 344 | pgd_start = start & PGDIR_MASK; |
| 345 | pgd_end = end & PGDIR_MASK; |
| 346 | |
| 347 | pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1); |
| 348 | pgd_size *= sizeof(pgd_t); |
| 349 | |
| 350 | pgd_p = pgd_base + pgd_index(start); |
| 351 | |
| 352 | memset(pgd_p, 0, pgd_size); |
| 353 | } |
| 354 | |
| 355 | #define PGD_FLAGS _KERNPG_TABLE_NOENC |
| 356 | #define P4D_FLAGS _KERNPG_TABLE_NOENC |
| 357 | #define PUD_FLAGS _KERNPG_TABLE_NOENC |
| 358 | #define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) |
| 359 | |
| 360 | static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area, |
| 361 | unsigned long vaddr, pmdval_t pmd_val) |
| 362 | { |
| 363 | pgd_t *pgd_p; |
| 364 | p4d_t *p4d_p; |
| 365 | pud_t *pud_p; |
| 366 | pmd_t *pmd_p; |
| 367 | |
| 368 | pgd_p = pgd_base + pgd_index(vaddr); |
| 369 | if (native_pgd_val(*pgd_p)) { |
| 370 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) |
| 371 | p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK); |
| 372 | else |
| 373 | pud_p = (pud_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK); |
| 374 | } else { |
| 375 | pgd_t pgd; |
| 376 | |
| 377 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { |
| 378 | p4d_p = pgtable_area; |
| 379 | memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D); |
| 380 | pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D; |
| 381 | |
| 382 | pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS); |
| 383 | } else { |
| 384 | pud_p = pgtable_area; |
| 385 | memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); |
| 386 | pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; |
| 387 | |
| 388 | pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS); |
| 389 | } |
| 390 | native_set_pgd(pgd_p, pgd); |
| 391 | } |
| 392 | |
| 393 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { |
| 394 | p4d_p += p4d_index(vaddr); |
| 395 | if (native_p4d_val(*p4d_p)) { |
| 396 | pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK); |
| 397 | } else { |
| 398 | p4d_t p4d; |
| 399 | |
| 400 | pud_p = pgtable_area; |
| 401 | memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); |
| 402 | pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; |
| 403 | |
| 404 | p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS); |
| 405 | native_set_p4d(p4d_p, p4d); |
| 406 | } |
| 407 | } |
| 408 | |
| 409 | pud_p += pud_index(vaddr); |
| 410 | if (native_pud_val(*pud_p)) { |
| 411 | if (native_pud_val(*pud_p) & _PAGE_PSE) |
| 412 | goto out; |
| 413 | |
| 414 | pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK); |
| 415 | } else { |
| 416 | pud_t pud; |
| 417 | |
| 418 | pmd_p = pgtable_area; |
| 419 | memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD); |
| 420 | pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD; |
| 421 | |
| 422 | pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS); |
| 423 | native_set_pud(pud_p, pud); |
| 424 | } |
| 425 | |
| 426 | pmd_p += pmd_index(vaddr); |
| 427 | if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE)) |
| 428 | native_set_pmd(pmd_p, native_make_pmd(pmd_val)); |
| 429 | |
| 430 | out: |
| 431 | return pgtable_area; |
| 432 | } |
| 433 | |
| 434 | static unsigned long __init sme_pgtable_calc(unsigned long len) |
| 435 | { |
| 436 | unsigned long p4d_size, pud_size, pmd_size; |
| 437 | unsigned long total; |
| 438 | |
| 439 | /* |
| 440 | * Perform a relatively simplistic calculation of the pagetable |
| 441 | * entries that are needed. That mappings will be covered by 2MB |
| 442 | * PMD entries so we can conservatively calculate the required |
| 443 | * number of P4D, PUD and PMD structures needed to perform the |
| 444 | * mappings. Incrementing the count for each covers the case where |
| 445 | * the addresses cross entries. |
| 446 | */ |
| 447 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { |
| 448 | p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1; |
| 449 | p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D; |
| 450 | pud_size = (ALIGN(len, P4D_SIZE) / P4D_SIZE) + 1; |
| 451 | pud_size *= sizeof(pud_t) * PTRS_PER_PUD; |
| 452 | } else { |
| 453 | p4d_size = 0; |
| 454 | pud_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1; |
| 455 | pud_size *= sizeof(pud_t) * PTRS_PER_PUD; |
| 456 | } |
| 457 | pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1; |
| 458 | pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD; |
| 459 | |
| 460 | total = p4d_size + pud_size + pmd_size; |
| 461 | |
| 462 | /* |
| 463 | * Now calculate the added pagetable structures needed to populate |
| 464 | * the new pagetables. |
| 465 | */ |
| 466 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { |
| 467 | p4d_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE; |
| 468 | p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D; |
| 469 | pud_size = ALIGN(total, P4D_SIZE) / P4D_SIZE; |
| 470 | pud_size *= sizeof(pud_t) * PTRS_PER_PUD; |
| 471 | } else { |
| 472 | p4d_size = 0; |
| 473 | pud_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE; |
| 474 | pud_size *= sizeof(pud_t) * PTRS_PER_PUD; |
| 475 | } |
| 476 | pmd_size = ALIGN(total, PUD_SIZE) / PUD_SIZE; |
| 477 | pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD; |
| 478 | |
| 479 | total += p4d_size + pud_size + pmd_size; |
| 480 | |
| 481 | return total; |
| 482 | } |
| 483 | |
Tom Lendacky | 5868f36 | 2017-07-17 16:10:05 -0500 | [diff] [blame] | 484 | void __init sme_encrypt_kernel(void) |
| 485 | { |
Tom Lendacky | 6ebcb06 | 2017-07-17 16:10:32 -0500 | [diff] [blame] | 486 | unsigned long workarea_start, workarea_end, workarea_len; |
| 487 | unsigned long execute_start, execute_end, execute_len; |
| 488 | unsigned long kernel_start, kernel_end, kernel_len; |
| 489 | unsigned long pgtable_area_len; |
| 490 | unsigned long paddr, pmd_flags; |
| 491 | unsigned long decrypted_base; |
| 492 | void *pgtable_area; |
| 493 | pgd_t *pgd; |
| 494 | |
| 495 | if (!sme_active()) |
| 496 | return; |
| 497 | |
| 498 | /* |
| 499 | * Prepare for encrypting the kernel by building new pagetables with |
| 500 | * the necessary attributes needed to encrypt the kernel in place. |
| 501 | * |
| 502 | * One range of virtual addresses will map the memory occupied |
| 503 | * by the kernel as encrypted. |
| 504 | * |
| 505 | * Another range of virtual addresses will map the memory occupied |
| 506 | * by the kernel as decrypted and write-protected. |
| 507 | * |
| 508 | * The use of write-protect attribute will prevent any of the |
| 509 | * memory from being cached. |
| 510 | */ |
| 511 | |
| 512 | /* Physical addresses gives us the identity mapped virtual addresses */ |
| 513 | kernel_start = __pa_symbol(_text); |
| 514 | kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE); |
| 515 | kernel_len = kernel_end - kernel_start; |
| 516 | |
| 517 | /* Set the encryption workarea to be immediately after the kernel */ |
| 518 | workarea_start = kernel_end; |
| 519 | |
| 520 | /* |
| 521 | * Calculate required number of workarea bytes needed: |
| 522 | * executable encryption area size: |
| 523 | * stack page (PAGE_SIZE) |
| 524 | * encryption routine page (PAGE_SIZE) |
| 525 | * intermediate copy buffer (PMD_PAGE_SIZE) |
| 526 | * pagetable structures for the encryption of the kernel |
| 527 | * pagetable structures for workarea (in case not currently mapped) |
| 528 | */ |
| 529 | execute_start = workarea_start; |
| 530 | execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE; |
| 531 | execute_len = execute_end - execute_start; |
| 532 | |
| 533 | /* |
| 534 | * One PGD for both encrypted and decrypted mappings and a set of |
| 535 | * PUDs and PMDs for each of the encrypted and decrypted mappings. |
| 536 | */ |
| 537 | pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD; |
| 538 | pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2; |
| 539 | |
| 540 | /* PUDs and PMDs needed in the current pagetables for the workarea */ |
| 541 | pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len); |
| 542 | |
| 543 | /* |
| 544 | * The total workarea includes the executable encryption area and |
| 545 | * the pagetable area. |
| 546 | */ |
| 547 | workarea_len = execute_len + pgtable_area_len; |
| 548 | workarea_end = workarea_start + workarea_len; |
| 549 | |
| 550 | /* |
| 551 | * Set the address to the start of where newly created pagetable |
| 552 | * structures (PGDs, PUDs and PMDs) will be allocated. New pagetable |
| 553 | * structures are created when the workarea is added to the current |
| 554 | * pagetables and when the new encrypted and decrypted kernel |
| 555 | * mappings are populated. |
| 556 | */ |
| 557 | pgtable_area = (void *)execute_end; |
| 558 | |
| 559 | /* |
| 560 | * Make sure the current pagetable structure has entries for |
| 561 | * addressing the workarea. |
| 562 | */ |
| 563 | pgd = (pgd_t *)native_read_cr3_pa(); |
| 564 | paddr = workarea_start; |
| 565 | while (paddr < workarea_end) { |
| 566 | pgtable_area = sme_populate_pgd(pgd, pgtable_area, |
| 567 | paddr, |
| 568 | paddr + PMD_FLAGS); |
| 569 | |
| 570 | paddr += PMD_PAGE_SIZE; |
| 571 | } |
| 572 | |
| 573 | /* Flush the TLB - no globals so cr3 is enough */ |
| 574 | native_write_cr3(__native_read_cr3()); |
| 575 | |
| 576 | /* |
| 577 | * A new pagetable structure is being built to allow for the kernel |
| 578 | * to be encrypted. It starts with an empty PGD that will then be |
| 579 | * populated with new PUDs and PMDs as the encrypted and decrypted |
| 580 | * kernel mappings are created. |
| 581 | */ |
| 582 | pgd = pgtable_area; |
| 583 | memset(pgd, 0, sizeof(*pgd) * PTRS_PER_PGD); |
| 584 | pgtable_area += sizeof(*pgd) * PTRS_PER_PGD; |
| 585 | |
| 586 | /* Add encrypted kernel (identity) mappings */ |
| 587 | pmd_flags = PMD_FLAGS | _PAGE_ENC; |
| 588 | paddr = kernel_start; |
| 589 | while (paddr < kernel_end) { |
| 590 | pgtable_area = sme_populate_pgd(pgd, pgtable_area, |
| 591 | paddr, |
| 592 | paddr + pmd_flags); |
| 593 | |
| 594 | paddr += PMD_PAGE_SIZE; |
| 595 | } |
| 596 | |
| 597 | /* |
| 598 | * A different PGD index/entry must be used to get different |
| 599 | * pagetable entries for the decrypted mapping. Choose the next |
| 600 | * PGD index and convert it to a virtual address to be used as |
| 601 | * the base of the mapping. |
| 602 | */ |
| 603 | decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1); |
| 604 | decrypted_base <<= PGDIR_SHIFT; |
| 605 | |
| 606 | /* Add decrypted, write-protected kernel (non-identity) mappings */ |
| 607 | pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT); |
| 608 | paddr = kernel_start; |
| 609 | while (paddr < kernel_end) { |
| 610 | pgtable_area = sme_populate_pgd(pgd, pgtable_area, |
| 611 | paddr + decrypted_base, |
| 612 | paddr + pmd_flags); |
| 613 | |
| 614 | paddr += PMD_PAGE_SIZE; |
| 615 | } |
| 616 | |
| 617 | /* Add decrypted workarea mappings to both kernel mappings */ |
| 618 | paddr = workarea_start; |
| 619 | while (paddr < workarea_end) { |
| 620 | pgtable_area = sme_populate_pgd(pgd, pgtable_area, |
| 621 | paddr, |
| 622 | paddr + PMD_FLAGS); |
| 623 | |
| 624 | pgtable_area = sme_populate_pgd(pgd, pgtable_area, |
| 625 | paddr + decrypted_base, |
| 626 | paddr + PMD_FLAGS); |
| 627 | |
| 628 | paddr += PMD_PAGE_SIZE; |
| 629 | } |
| 630 | |
| 631 | /* Perform the encryption */ |
| 632 | sme_encrypt_execute(kernel_start, kernel_start + decrypted_base, |
| 633 | kernel_len, workarea_start, (unsigned long)pgd); |
| 634 | |
| 635 | /* |
| 636 | * At this point we are running encrypted. Remove the mappings for |
| 637 | * the decrypted areas - all that is needed for this is to remove |
| 638 | * the PGD entry/entries. |
| 639 | */ |
| 640 | sme_clear_pgd(pgd, kernel_start + decrypted_base, |
| 641 | kernel_end + decrypted_base); |
| 642 | |
| 643 | sme_clear_pgd(pgd, workarea_start + decrypted_base, |
| 644 | workarea_end + decrypted_base); |
| 645 | |
| 646 | /* Flush the TLB - no globals so cr3 is enough */ |
| 647 | native_write_cr3(__native_read_cr3()); |
Tom Lendacky | 5868f36 | 2017-07-17 16:10:05 -0500 | [diff] [blame] | 648 | } |
| 649 | |
Tom Lendacky | aca20d5 | 2017-07-17 16:10:35 -0500 | [diff] [blame] | 650 | void __init __nostackprotector sme_enable(struct boot_params *bp) |
Tom Lendacky | 5868f36 | 2017-07-17 16:10:05 -0500 | [diff] [blame] | 651 | { |
Tom Lendacky | aca20d5 | 2017-07-17 16:10:35 -0500 | [diff] [blame] | 652 | const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off; |
| 653 | unsigned int eax, ebx, ecx, edx; |
Tom Lendacky | 1958b5f | 2017-10-20 09:30:54 -0500 | [diff] [blame] | 654 | unsigned long feature_mask; |
Tom Lendacky | aca20d5 | 2017-07-17 16:10:35 -0500 | [diff] [blame] | 655 | bool active_by_default; |
| 656 | unsigned long me_mask; |
| 657 | char buffer[16]; |
| 658 | u64 msr; |
| 659 | |
Tom Lendacky | 1958b5f | 2017-10-20 09:30:54 -0500 | [diff] [blame] | 660 | /* Check for the SME/SEV support leaf */ |
Tom Lendacky | aca20d5 | 2017-07-17 16:10:35 -0500 | [diff] [blame] | 661 | eax = 0x80000000; |
| 662 | ecx = 0; |
| 663 | native_cpuid(&eax, &ebx, &ecx, &edx); |
| 664 | if (eax < 0x8000001f) |
| 665 | return; |
| 666 | |
Tom Lendacky | 1958b5f | 2017-10-20 09:30:54 -0500 | [diff] [blame] | 667 | #define AMD_SME_BIT BIT(0) |
| 668 | #define AMD_SEV_BIT BIT(1) |
Tom Lendacky | aca20d5 | 2017-07-17 16:10:35 -0500 | [diff] [blame] | 669 | /* |
Tom Lendacky | 1958b5f | 2017-10-20 09:30:54 -0500 | [diff] [blame] | 670 | * Set the feature mask (SME or SEV) based on whether we are |
| 671 | * running under a hypervisor. |
| 672 | */ |
| 673 | eax = 1; |
| 674 | ecx = 0; |
| 675 | native_cpuid(&eax, &ebx, &ecx, &edx); |
| 676 | feature_mask = (ecx & BIT(31)) ? AMD_SEV_BIT : AMD_SME_BIT; |
| 677 | |
| 678 | /* |
| 679 | * Check for the SME/SEV feature: |
| 680 | * CPUID Fn8000_001F[EAX] |
| 681 | * - Bit 0 - Secure Memory Encryption support |
| 682 | * - Bit 1 - Secure Encrypted Virtualization support |
| 683 | * CPUID Fn8000_001F[EBX] |
| 684 | * - Bits 5:0 - Pagetable bit position used to indicate encryption |
Tom Lendacky | aca20d5 | 2017-07-17 16:10:35 -0500 | [diff] [blame] | 685 | */ |
| 686 | eax = 0x8000001f; |
| 687 | ecx = 0; |
| 688 | native_cpuid(&eax, &ebx, &ecx, &edx); |
Tom Lendacky | 1958b5f | 2017-10-20 09:30:54 -0500 | [diff] [blame] | 689 | if (!(eax & feature_mask)) |
Tom Lendacky | aca20d5 | 2017-07-17 16:10:35 -0500 | [diff] [blame] | 690 | return; |
| 691 | |
| 692 | me_mask = 1UL << (ebx & 0x3f); |
| 693 | |
Tom Lendacky | 1958b5f | 2017-10-20 09:30:54 -0500 | [diff] [blame] | 694 | /* Check if memory encryption is enabled */ |
| 695 | if (feature_mask == AMD_SME_BIT) { |
| 696 | /* For SME, check the SYSCFG MSR */ |
| 697 | msr = __rdmsr(MSR_K8_SYSCFG); |
| 698 | if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) |
| 699 | return; |
| 700 | } else { |
| 701 | /* For SEV, check the SEV MSR */ |
| 702 | msr = __rdmsr(MSR_AMD64_SEV); |
| 703 | if (!(msr & MSR_AMD64_SEV_ENABLED)) |
| 704 | return; |
| 705 | |
| 706 | /* SEV state cannot be controlled by a command line option */ |
| 707 | sme_me_mask = me_mask; |
| 708 | sev_enabled = true; |
Tom Lendacky | aca20d5 | 2017-07-17 16:10:35 -0500 | [diff] [blame] | 709 | return; |
Tom Lendacky | 1958b5f | 2017-10-20 09:30:54 -0500 | [diff] [blame] | 710 | } |
Tom Lendacky | aca20d5 | 2017-07-17 16:10:35 -0500 | [diff] [blame] | 711 | |
| 712 | /* |
| 713 | * Fixups have not been applied to phys_base yet and we're running |
| 714 | * identity mapped, so we must obtain the address to the SME command |
| 715 | * line argument data using rip-relative addressing. |
| 716 | */ |
| 717 | asm ("lea sme_cmdline_arg(%%rip), %0" |
| 718 | : "=r" (cmdline_arg) |
| 719 | : "p" (sme_cmdline_arg)); |
| 720 | asm ("lea sme_cmdline_on(%%rip), %0" |
| 721 | : "=r" (cmdline_on) |
| 722 | : "p" (sme_cmdline_on)); |
| 723 | asm ("lea sme_cmdline_off(%%rip), %0" |
| 724 | : "=r" (cmdline_off) |
| 725 | : "p" (sme_cmdline_off)); |
| 726 | |
| 727 | if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT)) |
| 728 | active_by_default = true; |
| 729 | else |
| 730 | active_by_default = false; |
| 731 | |
| 732 | cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr | |
| 733 | ((u64)bp->ext_cmd_line_ptr << 32)); |
| 734 | |
| 735 | cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)); |
| 736 | |
| 737 | if (!strncmp(buffer, cmdline_on, sizeof(buffer))) |
| 738 | sme_me_mask = me_mask; |
| 739 | else if (!strncmp(buffer, cmdline_off, sizeof(buffer))) |
| 740 | sme_me_mask = 0; |
| 741 | else |
| 742 | sme_me_mask = active_by_default ? me_mask : 0; |
Tom Lendacky | 5868f36 | 2017-07-17 16:10:05 -0500 | [diff] [blame] | 743 | } |