blob: c8f7f77e9fa9f46bb9e4248a904f3aa64d89d595 [file] [log] [blame]
Dave Younga43cac02015-09-09 15:38:51 -07001/*
2 * kexec: kexec_file_load system call
3 *
4 * Copyright (C) 2014 Red Hat Inc.
5 * Authors:
6 * Vivek Goyal <vgoyal@redhat.com>
7 *
8 * This source code is licensed under the GNU General Public License,
9 * Version 2. See the file COPYING for more details.
10 */
11
Minfei Huangde90a6b2015-11-06 16:32:45 -080012#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
Dave Younga43cac02015-09-09 15:38:51 -070014#include <linux/capability.h>
15#include <linux/mm.h>
16#include <linux/file.h>
17#include <linux/slab.h>
18#include <linux/kexec.h>
19#include <linux/mutex.h>
20#include <linux/list.h>
Mimi Zoharb804def2016-01-14 20:59:14 -050021#include <linux/fs.h>
Mimi Zohar7b8589c2016-12-19 16:22:48 -080022#include <linux/ima.h>
Dave Younga43cac02015-09-09 15:38:51 -070023#include <crypto/hash.h>
24#include <crypto/sha.h>
25#include <linux/syscalls.h>
26#include <linux/vmalloc.h>
27#include "kexec_internal.h"
28
29/*
30 * Declare these symbols weak so that if architecture provides a purgatory,
31 * these will be overridden.
32 */
33char __weak kexec_purgatory[0];
34size_t __weak kexec_purgatory_size = 0;
35
36static int kexec_calculate_store_digests(struct kimage *image);
37
Dave Younga43cac02015-09-09 15:38:51 -070038/* Architectures can provide this probe function */
39int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
40 unsigned long buf_len)
41{
42 return -ENOEXEC;
43}
44
45void * __weak arch_kexec_kernel_image_load(struct kimage *image)
46{
47 return ERR_PTR(-ENOEXEC);
48}
49
50int __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
51{
52 return -EINVAL;
53}
54
Xunlei Pang978e30c2016-01-20 15:00:36 -080055#ifdef CONFIG_KEXEC_VERIFY_SIG
Dave Younga43cac02015-09-09 15:38:51 -070056int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
57 unsigned long buf_len)
58{
59 return -EKEYREJECTED;
60}
Xunlei Pang978e30c2016-01-20 15:00:36 -080061#endif
Dave Younga43cac02015-09-09 15:38:51 -070062
63/* Apply relocations of type RELA */
64int __weak
65arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
66 unsigned int relsec)
67{
68 pr_err("RELA relocation unsupported.\n");
69 return -ENOEXEC;
70}
71
72/* Apply relocations of type REL */
73int __weak
74arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
75 unsigned int relsec)
76{
77 pr_err("REL relocation unsupported.\n");
78 return -ENOEXEC;
79}
80
81/*
82 * Free up memory used by kernel, initrd, and command line. This is temporary
83 * memory allocation which is not needed any more after these buffers have
84 * been loaded into separate segments and have been copied elsewhere.
85 */
86void kimage_file_post_load_cleanup(struct kimage *image)
87{
88 struct purgatory_info *pi = &image->purgatory_info;
89
90 vfree(image->kernel_buf);
91 image->kernel_buf = NULL;
92
93 vfree(image->initrd_buf);
94 image->initrd_buf = NULL;
95
96 kfree(image->cmdline_buf);
97 image->cmdline_buf = NULL;
98
99 vfree(pi->purgatory_buf);
100 pi->purgatory_buf = NULL;
101
102 vfree(pi->sechdrs);
103 pi->sechdrs = NULL;
104
105 /* See if architecture has anything to cleanup post load */
106 arch_kimage_file_post_load_cleanup(image);
107
108 /*
109 * Above call should have called into bootloader to free up
110 * any data stored in kimage->image_loader_data. It should
111 * be ok now to free it up.
112 */
113 kfree(image->image_loader_data);
114 image->image_loader_data = NULL;
115}
116
117/*
118 * In file mode list of segments is prepared by kernel. Copy relevant
119 * data from user space, do error checking, prepare segment list
120 */
121static int
122kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
123 const char __user *cmdline_ptr,
124 unsigned long cmdline_len, unsigned flags)
125{
126 int ret = 0;
127 void *ldata;
Mimi Zoharb804def2016-01-14 20:59:14 -0500128 loff_t size;
Dave Younga43cac02015-09-09 15:38:51 -0700129
Mimi Zoharb804def2016-01-14 20:59:14 -0500130 ret = kernel_read_file_from_fd(kernel_fd, &image->kernel_buf,
131 &size, INT_MAX, READING_KEXEC_IMAGE);
Dave Younga43cac02015-09-09 15:38:51 -0700132 if (ret)
133 return ret;
Mimi Zoharb804def2016-01-14 20:59:14 -0500134 image->kernel_buf_len = size;
Dave Younga43cac02015-09-09 15:38:51 -0700135
Mimi Zohar7b8589c2016-12-19 16:22:48 -0800136 /* IMA needs to pass the measurement list to the next kernel. */
137 ima_add_kexec_buffer(image);
138
Dave Younga43cac02015-09-09 15:38:51 -0700139 /* Call arch image probe handlers */
140 ret = arch_kexec_kernel_image_probe(image, image->kernel_buf,
141 image->kernel_buf_len);
Dave Younga43cac02015-09-09 15:38:51 -0700142 if (ret)
143 goto out;
144
145#ifdef CONFIG_KEXEC_VERIFY_SIG
146 ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
147 image->kernel_buf_len);
148 if (ret) {
149 pr_debug("kernel signature verification failed.\n");
150 goto out;
151 }
152 pr_debug("kernel signature verification successful.\n");
153#endif
154 /* It is possible that there no initramfs is being loaded */
155 if (!(flags & KEXEC_FILE_NO_INITRAMFS)) {
Mimi Zoharb804def2016-01-14 20:59:14 -0500156 ret = kernel_read_file_from_fd(initrd_fd, &image->initrd_buf,
157 &size, INT_MAX,
158 READING_KEXEC_INITRAMFS);
Dave Younga43cac02015-09-09 15:38:51 -0700159 if (ret)
160 goto out;
Mimi Zoharb804def2016-01-14 20:59:14 -0500161 image->initrd_buf_len = size;
Dave Younga43cac02015-09-09 15:38:51 -0700162 }
163
164 if (cmdline_len) {
Al Viroa9bd8df2017-05-13 18:39:01 -0400165 image->cmdline_buf = memdup_user(cmdline_ptr, cmdline_len);
166 if (IS_ERR(image->cmdline_buf)) {
167 ret = PTR_ERR(image->cmdline_buf);
168 image->cmdline_buf = NULL;
Dave Younga43cac02015-09-09 15:38:51 -0700169 goto out;
170 }
171
172 image->cmdline_buf_len = cmdline_len;
173
174 /* command line should be a string with last byte null */
175 if (image->cmdline_buf[cmdline_len - 1] != '\0') {
176 ret = -EINVAL;
177 goto out;
178 }
179 }
180
181 /* Call arch image load handlers */
182 ldata = arch_kexec_kernel_image_load(image);
183
184 if (IS_ERR(ldata)) {
185 ret = PTR_ERR(ldata);
186 goto out;
187 }
188
189 image->image_loader_data = ldata;
190out:
191 /* In case of error, free up all allocated memory in this function */
192 if (ret)
193 kimage_file_post_load_cleanup(image);
194 return ret;
195}
196
197static int
198kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
199 int initrd_fd, const char __user *cmdline_ptr,
200 unsigned long cmdline_len, unsigned long flags)
201{
202 int ret;
203 struct kimage *image;
204 bool kexec_on_panic = flags & KEXEC_FILE_ON_CRASH;
205
206 image = do_kimage_alloc_init();
207 if (!image)
208 return -ENOMEM;
209
210 image->file_mode = 1;
211
212 if (kexec_on_panic) {
213 /* Enable special crash kernel control page alloc policy. */
214 image->control_page = crashk_res.start;
215 image->type = KEXEC_TYPE_CRASH;
216 }
217
218 ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd,
219 cmdline_ptr, cmdline_len, flags);
220 if (ret)
221 goto out_free_image;
222
223 ret = sanity_check_segment_list(image);
224 if (ret)
225 goto out_free_post_load_bufs;
226
227 ret = -ENOMEM;
228 image->control_code_page = kimage_alloc_control_pages(image,
229 get_order(KEXEC_CONTROL_PAGE_SIZE));
230 if (!image->control_code_page) {
231 pr_err("Could not allocate control_code_buffer\n");
232 goto out_free_post_load_bufs;
233 }
234
235 if (!kexec_on_panic) {
236 image->swap_page = kimage_alloc_control_pages(image, 0);
237 if (!image->swap_page) {
238 pr_err("Could not allocate swap buffer\n");
239 goto out_free_control_pages;
240 }
241 }
242
243 *rimage = image;
244 return 0;
245out_free_control_pages:
246 kimage_free_page_list(&image->control_pages);
247out_free_post_load_bufs:
248 kimage_file_post_load_cleanup(image);
249out_free_image:
250 kfree(image);
251 return ret;
252}
253
254SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
255 unsigned long, cmdline_len, const char __user *, cmdline_ptr,
256 unsigned long, flags)
257{
258 int ret = 0, i;
259 struct kimage **dest_image, *image;
260
261 /* We only trust the superuser with rebooting the system. */
262 if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
263 return -EPERM;
264
265 /* Make sure we have a legal set of flags */
266 if (flags != (flags & KEXEC_FILE_FLAGS))
267 return -EINVAL;
268
269 image = NULL;
270
271 if (!mutex_trylock(&kexec_mutex))
272 return -EBUSY;
273
274 dest_image = &kexec_image;
Xunlei Pang9b492cf2016-05-23 16:24:10 -0700275 if (flags & KEXEC_FILE_ON_CRASH) {
Dave Younga43cac02015-09-09 15:38:51 -0700276 dest_image = &kexec_crash_image;
Xunlei Pang9b492cf2016-05-23 16:24:10 -0700277 if (kexec_crash_image)
278 arch_kexec_unprotect_crashkres();
279 }
Dave Younga43cac02015-09-09 15:38:51 -0700280
281 if (flags & KEXEC_FILE_UNLOAD)
282 goto exchange;
283
284 /*
285 * In case of crash, new kernel gets loaded in reserved region. It is
286 * same memory where old crash kernel might be loaded. Free any
287 * current crash dump kernel before we corrupt it.
288 */
289 if (flags & KEXEC_FILE_ON_CRASH)
290 kimage_free(xchg(&kexec_crash_image, NULL));
291
292 ret = kimage_file_alloc_init(&image, kernel_fd, initrd_fd, cmdline_ptr,
293 cmdline_len, flags);
294 if (ret)
295 goto out;
296
297 ret = machine_kexec_prepare(image);
298 if (ret)
299 goto out;
300
Xunlei Pang12293842017-07-12 14:33:21 -0700301 /*
302 * Some architecture(like S390) may touch the crash memory before
303 * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
304 */
305 ret = kimage_crash_copy_vmcoreinfo(image);
306 if (ret)
307 goto out;
308
Dave Younga43cac02015-09-09 15:38:51 -0700309 ret = kexec_calculate_store_digests(image);
310 if (ret)
311 goto out;
312
313 for (i = 0; i < image->nr_segments; i++) {
314 struct kexec_segment *ksegment;
315
316 ksegment = &image->segment[i];
317 pr_debug("Loading segment %d: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n",
318 i, ksegment->buf, ksegment->bufsz, ksegment->mem,
319 ksegment->memsz);
320
321 ret = kimage_load_segment(image, &image->segment[i]);
322 if (ret)
323 goto out;
324 }
325
326 kimage_terminate(image);
327
328 /*
329 * Free up any temporary buffers allocated which are not needed
330 * after image has been loaded
331 */
332 kimage_file_post_load_cleanup(image);
333exchange:
334 image = xchg(dest_image, image);
335out:
Xunlei Pang9b492cf2016-05-23 16:24:10 -0700336 if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image)
337 arch_kexec_protect_crashkres();
338
Dave Younga43cac02015-09-09 15:38:51 -0700339 mutex_unlock(&kexec_mutex);
340 kimage_free(image);
341 return ret;
342}
343
344static int locate_mem_hole_top_down(unsigned long start, unsigned long end,
345 struct kexec_buf *kbuf)
346{
347 struct kimage *image = kbuf->image;
348 unsigned long temp_start, temp_end;
349
350 temp_end = min(end, kbuf->buf_max);
351 temp_start = temp_end - kbuf->memsz;
352
353 do {
354 /* align down start */
355 temp_start = temp_start & (~(kbuf->buf_align - 1));
356
357 if (temp_start < start || temp_start < kbuf->buf_min)
358 return 0;
359
360 temp_end = temp_start + kbuf->memsz - 1;
361
362 /*
363 * Make sure this does not conflict with any of existing
364 * segments
365 */
366 if (kimage_is_destination_range(image, temp_start, temp_end)) {
367 temp_start = temp_start - PAGE_SIZE;
368 continue;
369 }
370
371 /* We found a suitable memory range */
372 break;
373 } while (1);
374
375 /* If we are here, we found a suitable memory range */
376 kbuf->mem = temp_start;
377
378 /* Success, stop navigating through remaining System RAM ranges */
379 return 1;
380}
381
382static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end,
383 struct kexec_buf *kbuf)
384{
385 struct kimage *image = kbuf->image;
386 unsigned long temp_start, temp_end;
387
388 temp_start = max(start, kbuf->buf_min);
389
390 do {
391 temp_start = ALIGN(temp_start, kbuf->buf_align);
392 temp_end = temp_start + kbuf->memsz - 1;
393
394 if (temp_end > end || temp_end > kbuf->buf_max)
395 return 0;
396 /*
397 * Make sure this does not conflict with any of existing
398 * segments
399 */
400 if (kimage_is_destination_range(image, temp_start, temp_end)) {
401 temp_start = temp_start + PAGE_SIZE;
402 continue;
403 }
404
405 /* We found a suitable memory range */
406 break;
407 } while (1);
408
409 /* If we are here, we found a suitable memory range */
410 kbuf->mem = temp_start;
411
412 /* Success, stop navigating through remaining System RAM ranges */
413 return 1;
414}
415
416static int locate_mem_hole_callback(u64 start, u64 end, void *arg)
417{
418 struct kexec_buf *kbuf = (struct kexec_buf *)arg;
419 unsigned long sz = end - start + 1;
420
421 /* Returning 0 will take to next memory range */
422 if (sz < kbuf->memsz)
423 return 0;
424
425 if (end < kbuf->buf_min || start > kbuf->buf_max)
426 return 0;
427
428 /*
429 * Allocate memory top down with-in ram range. Otherwise bottom up
430 * allocation.
431 */
432 if (kbuf->top_down)
433 return locate_mem_hole_top_down(start, end, kbuf);
434 return locate_mem_hole_bottom_up(start, end, kbuf);
435}
436
Thiago Jung Bauermann60fe3912016-11-29 23:45:47 +1100437/**
438 * arch_kexec_walk_mem - call func(data) on free memory regions
439 * @kbuf: Context info for the search. Also passed to @func.
440 * @func: Function to call for each memory region.
441 *
442 * Return: The memory walk will stop when func returns a non-zero value
443 * and that value will be returned. If all free regions are visited without
444 * func returning non-zero, then zero will be returned.
445 */
446int __weak arch_kexec_walk_mem(struct kexec_buf *kbuf,
447 int (*func)(u64, u64, void *))
448{
449 if (kbuf->image->type == KEXEC_TYPE_CRASH)
450 return walk_iomem_res_desc(crashk_res.desc,
451 IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
452 crashk_res.start, crashk_res.end,
453 kbuf, func);
454 else
455 return walk_system_ram_res(0, ULONG_MAX, kbuf, func);
456}
457
Thiago Jung Bauermannec2b9bf2016-11-29 23:45:48 +1100458/**
Thiago Jung Bauermanne2e806f2016-11-29 23:45:49 +1100459 * kexec_locate_mem_hole - find free memory for the purgatory or the next kernel
460 * @kbuf: Parameters for the memory search.
461 *
462 * On success, kbuf->mem will have the start address of the memory region found.
463 *
464 * Return: 0 on success, negative errno on error.
465 */
466int kexec_locate_mem_hole(struct kexec_buf *kbuf)
467{
468 int ret;
469
470 ret = arch_kexec_walk_mem(kbuf, locate_mem_hole_callback);
471
472 return ret == 1 ? 0 : -EADDRNOTAVAIL;
473}
474
475/**
Thiago Jung Bauermannec2b9bf2016-11-29 23:45:48 +1100476 * kexec_add_buffer - place a buffer in a kexec segment
477 * @kbuf: Buffer contents and memory parameters.
478 *
479 * This function assumes that kexec_mutex is held.
480 * On successful return, @kbuf->mem will have the physical address of
481 * the buffer in memory.
482 *
483 * Return: 0 on success, negative errno on error.
Dave Younga43cac02015-09-09 15:38:51 -0700484 */
Thiago Jung Bauermannec2b9bf2016-11-29 23:45:48 +1100485int kexec_add_buffer(struct kexec_buf *kbuf)
Dave Younga43cac02015-09-09 15:38:51 -0700486{
487
488 struct kexec_segment *ksegment;
Dave Younga43cac02015-09-09 15:38:51 -0700489 int ret;
490
491 /* Currently adding segment this way is allowed only in file mode */
Thiago Jung Bauermannec2b9bf2016-11-29 23:45:48 +1100492 if (!kbuf->image->file_mode)
Dave Younga43cac02015-09-09 15:38:51 -0700493 return -EINVAL;
494
Thiago Jung Bauermannec2b9bf2016-11-29 23:45:48 +1100495 if (kbuf->image->nr_segments >= KEXEC_SEGMENT_MAX)
Dave Younga43cac02015-09-09 15:38:51 -0700496 return -EINVAL;
497
498 /*
499 * Make sure we are not trying to add buffer after allocating
500 * control pages. All segments need to be placed first before
501 * any control pages are allocated. As control page allocation
502 * logic goes through list of segments to make sure there are
503 * no destination overlaps.
504 */
Thiago Jung Bauermannec2b9bf2016-11-29 23:45:48 +1100505 if (!list_empty(&kbuf->image->control_pages)) {
Dave Younga43cac02015-09-09 15:38:51 -0700506 WARN_ON(1);
507 return -EINVAL;
508 }
509
Thiago Jung Bauermannec2b9bf2016-11-29 23:45:48 +1100510 /* Ensure minimum alignment needed for segments. */
511 kbuf->memsz = ALIGN(kbuf->memsz, PAGE_SIZE);
512 kbuf->buf_align = max(kbuf->buf_align, PAGE_SIZE);
Dave Younga43cac02015-09-09 15:38:51 -0700513
514 /* Walk the RAM ranges and allocate a suitable range for the buffer */
Thiago Jung Bauermanne2e806f2016-11-29 23:45:49 +1100515 ret = kexec_locate_mem_hole(kbuf);
516 if (ret)
517 return ret;
Dave Younga43cac02015-09-09 15:38:51 -0700518
519 /* Found a suitable memory range */
Thiago Jung Bauermannec2b9bf2016-11-29 23:45:48 +1100520 ksegment = &kbuf->image->segment[kbuf->image->nr_segments];
Dave Younga43cac02015-09-09 15:38:51 -0700521 ksegment->kbuf = kbuf->buffer;
522 ksegment->bufsz = kbuf->bufsz;
523 ksegment->mem = kbuf->mem;
524 ksegment->memsz = kbuf->memsz;
Thiago Jung Bauermannec2b9bf2016-11-29 23:45:48 +1100525 kbuf->image->nr_segments++;
Dave Younga43cac02015-09-09 15:38:51 -0700526 return 0;
527}
528
529/* Calculate and store the digest of segments */
530static int kexec_calculate_store_digests(struct kimage *image)
531{
532 struct crypto_shash *tfm;
533 struct shash_desc *desc;
534 int ret = 0, i, j, zero_buf_sz, sha_region_sz;
535 size_t desc_size, nullsz;
536 char *digest;
537 void *zero_buf;
538 struct kexec_sha_region *sha_regions;
539 struct purgatory_info *pi = &image->purgatory_info;
540
541 zero_buf = __va(page_to_pfn(ZERO_PAGE(0)) << PAGE_SHIFT);
542 zero_buf_sz = PAGE_SIZE;
543
544 tfm = crypto_alloc_shash("sha256", 0, 0);
545 if (IS_ERR(tfm)) {
546 ret = PTR_ERR(tfm);
547 goto out;
548 }
549
550 desc_size = crypto_shash_descsize(tfm) + sizeof(*desc);
551 desc = kzalloc(desc_size, GFP_KERNEL);
552 if (!desc) {
553 ret = -ENOMEM;
554 goto out_free_tfm;
555 }
556
557 sha_region_sz = KEXEC_SEGMENT_MAX * sizeof(struct kexec_sha_region);
558 sha_regions = vzalloc(sha_region_sz);
559 if (!sha_regions)
560 goto out_free_desc;
561
562 desc->tfm = tfm;
563 desc->flags = 0;
564
565 ret = crypto_shash_init(desc);
566 if (ret < 0)
567 goto out_free_sha_regions;
568
569 digest = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL);
570 if (!digest) {
571 ret = -ENOMEM;
572 goto out_free_sha_regions;
573 }
574
575 for (j = i = 0; i < image->nr_segments; i++) {
576 struct kexec_segment *ksegment;
577
578 ksegment = &image->segment[i];
579 /*
580 * Skip purgatory as it will be modified once we put digest
581 * info in purgatory.
582 */
583 if (ksegment->kbuf == pi->purgatory_buf)
584 continue;
585
586 ret = crypto_shash_update(desc, ksegment->kbuf,
587 ksegment->bufsz);
588 if (ret)
589 break;
590
591 /*
592 * Assume rest of the buffer is filled with zero and
593 * update digest accordingly.
594 */
595 nullsz = ksegment->memsz - ksegment->bufsz;
596 while (nullsz) {
597 unsigned long bytes = nullsz;
598
599 if (bytes > zero_buf_sz)
600 bytes = zero_buf_sz;
601 ret = crypto_shash_update(desc, zero_buf, bytes);
602 if (ret)
603 break;
604 nullsz -= bytes;
605 }
606
607 if (ret)
608 break;
609
610 sha_regions[j].start = ksegment->mem;
611 sha_regions[j].len = ksegment->memsz;
612 j++;
613 }
614
615 if (!ret) {
616 ret = crypto_shash_final(desc, digest);
617 if (ret)
618 goto out_free_digest;
Thomas Gleixner40c50c12017-03-10 13:17:18 +0100619 ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha_regions",
620 sha_regions, sha_region_sz, 0);
Dave Younga43cac02015-09-09 15:38:51 -0700621 if (ret)
622 goto out_free_digest;
623
Thomas Gleixner40c50c12017-03-10 13:17:18 +0100624 ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha256_digest",
625 digest, SHA256_DIGEST_SIZE, 0);
Dave Younga43cac02015-09-09 15:38:51 -0700626 if (ret)
627 goto out_free_digest;
628 }
629
630out_free_digest:
631 kfree(digest);
632out_free_sha_regions:
633 vfree(sha_regions);
634out_free_desc:
635 kfree(desc);
636out_free_tfm:
637 kfree(tfm);
638out:
639 return ret;
640}
641
642/* Actually load purgatory. Lot of code taken from kexec-tools */
643static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
644 unsigned long max, int top_down)
645{
646 struct purgatory_info *pi = &image->purgatory_info;
Thiago Jung Bauermannec2b9bf2016-11-29 23:45:48 +1100647 unsigned long align, bss_align, bss_sz, bss_pad;
648 unsigned long entry, load_addr, curr_load_addr, bss_addr, offset;
Dave Younga43cac02015-09-09 15:38:51 -0700649 unsigned char *buf_addr, *src;
650 int i, ret = 0, entry_sidx = -1;
651 const Elf_Shdr *sechdrs_c;
652 Elf_Shdr *sechdrs = NULL;
Thiago Jung Bauermannec2b9bf2016-11-29 23:45:48 +1100653 struct kexec_buf kbuf = { .image = image, .bufsz = 0, .buf_align = 1,
654 .buf_min = min, .buf_max = max,
655 .top_down = top_down };
Dave Younga43cac02015-09-09 15:38:51 -0700656
657 /*
658 * sechdrs_c points to section headers in purgatory and are read
659 * only. No modifications allowed.
660 */
661 sechdrs_c = (void *)pi->ehdr + pi->ehdr->e_shoff;
662
663 /*
664 * We can not modify sechdrs_c[] and its fields. It is read only.
665 * Copy it over to a local copy where one can store some temporary
666 * data and free it at the end. We need to modify ->sh_addr and
667 * ->sh_offset fields to keep track of permanent and temporary
668 * locations of sections.
669 */
670 sechdrs = vzalloc(pi->ehdr->e_shnum * sizeof(Elf_Shdr));
671 if (!sechdrs)
672 return -ENOMEM;
673
674 memcpy(sechdrs, sechdrs_c, pi->ehdr->e_shnum * sizeof(Elf_Shdr));
675
676 /*
677 * We seem to have multiple copies of sections. First copy is which
678 * is embedded in kernel in read only section. Some of these sections
679 * will be copied to a temporary buffer and relocated. And these
680 * sections will finally be copied to their final destination at
681 * segment load time.
682 *
683 * Use ->sh_offset to reflect section address in memory. It will
684 * point to original read only copy if section is not allocatable.
685 * Otherwise it will point to temporary copy which will be relocated.
686 *
687 * Use ->sh_addr to contain final address of the section where it
688 * will go during execution time.
689 */
690 for (i = 0; i < pi->ehdr->e_shnum; i++) {
691 if (sechdrs[i].sh_type == SHT_NOBITS)
692 continue;
693
694 sechdrs[i].sh_offset = (unsigned long)pi->ehdr +
695 sechdrs[i].sh_offset;
696 }
697
698 /*
699 * Identify entry point section and make entry relative to section
700 * start.
701 */
702 entry = pi->ehdr->e_entry;
703 for (i = 0; i < pi->ehdr->e_shnum; i++) {
704 if (!(sechdrs[i].sh_flags & SHF_ALLOC))
705 continue;
706
707 if (!(sechdrs[i].sh_flags & SHF_EXECINSTR))
708 continue;
709
710 /* Make entry section relative */
711 if (sechdrs[i].sh_addr <= pi->ehdr->e_entry &&
712 ((sechdrs[i].sh_addr + sechdrs[i].sh_size) >
713 pi->ehdr->e_entry)) {
714 entry_sidx = i;
715 entry -= sechdrs[i].sh_addr;
716 break;
717 }
718 }
719
720 /* Determine how much memory is needed to load relocatable object. */
Dave Younga43cac02015-09-09 15:38:51 -0700721 bss_align = 1;
Dave Younga43cac02015-09-09 15:38:51 -0700722 bss_sz = 0;
723
724 for (i = 0; i < pi->ehdr->e_shnum; i++) {
725 if (!(sechdrs[i].sh_flags & SHF_ALLOC))
726 continue;
727
728 align = sechdrs[i].sh_addralign;
729 if (sechdrs[i].sh_type != SHT_NOBITS) {
Thiago Jung Bauermannec2b9bf2016-11-29 23:45:48 +1100730 if (kbuf.buf_align < align)
731 kbuf.buf_align = align;
732 kbuf.bufsz = ALIGN(kbuf.bufsz, align);
733 kbuf.bufsz += sechdrs[i].sh_size;
Dave Younga43cac02015-09-09 15:38:51 -0700734 } else {
735 /* bss section */
736 if (bss_align < align)
737 bss_align = align;
738 bss_sz = ALIGN(bss_sz, align);
739 bss_sz += sechdrs[i].sh_size;
740 }
741 }
742
743 /* Determine the bss padding required to align bss properly */
744 bss_pad = 0;
Thiago Jung Bauermannec2b9bf2016-11-29 23:45:48 +1100745 if (kbuf.bufsz & (bss_align - 1))
746 bss_pad = bss_align - (kbuf.bufsz & (bss_align - 1));
Dave Younga43cac02015-09-09 15:38:51 -0700747
Thiago Jung Bauermannec2b9bf2016-11-29 23:45:48 +1100748 kbuf.memsz = kbuf.bufsz + bss_pad + bss_sz;
Dave Younga43cac02015-09-09 15:38:51 -0700749
750 /* Allocate buffer for purgatory */
Thiago Jung Bauermannec2b9bf2016-11-29 23:45:48 +1100751 kbuf.buffer = vzalloc(kbuf.bufsz);
752 if (!kbuf.buffer) {
Dave Younga43cac02015-09-09 15:38:51 -0700753 ret = -ENOMEM;
754 goto out;
755 }
756
Thiago Jung Bauermannec2b9bf2016-11-29 23:45:48 +1100757 if (kbuf.buf_align < bss_align)
758 kbuf.buf_align = bss_align;
Dave Younga43cac02015-09-09 15:38:51 -0700759
760 /* Add buffer to segment list */
Thiago Jung Bauermannec2b9bf2016-11-29 23:45:48 +1100761 ret = kexec_add_buffer(&kbuf);
Dave Younga43cac02015-09-09 15:38:51 -0700762 if (ret)
763 goto out;
Thiago Jung Bauermannec2b9bf2016-11-29 23:45:48 +1100764 pi->purgatory_load_addr = kbuf.mem;
Dave Younga43cac02015-09-09 15:38:51 -0700765
766 /* Load SHF_ALLOC sections */
Thiago Jung Bauermannec2b9bf2016-11-29 23:45:48 +1100767 buf_addr = kbuf.buffer;
Dave Younga43cac02015-09-09 15:38:51 -0700768 load_addr = curr_load_addr = pi->purgatory_load_addr;
Thiago Jung Bauermannec2b9bf2016-11-29 23:45:48 +1100769 bss_addr = load_addr + kbuf.bufsz + bss_pad;
Dave Younga43cac02015-09-09 15:38:51 -0700770
771 for (i = 0; i < pi->ehdr->e_shnum; i++) {
772 if (!(sechdrs[i].sh_flags & SHF_ALLOC))
773 continue;
774
775 align = sechdrs[i].sh_addralign;
776 if (sechdrs[i].sh_type != SHT_NOBITS) {
777 curr_load_addr = ALIGN(curr_load_addr, align);
778 offset = curr_load_addr - load_addr;
779 /* We already modifed ->sh_offset to keep src addr */
780 src = (char *) sechdrs[i].sh_offset;
781 memcpy(buf_addr + offset, src, sechdrs[i].sh_size);
782
783 /* Store load address and source address of section */
784 sechdrs[i].sh_addr = curr_load_addr;
785
786 /*
787 * This section got copied to temporary buffer. Update
788 * ->sh_offset accordingly.
789 */
790 sechdrs[i].sh_offset = (unsigned long)(buf_addr + offset);
791
792 /* Advance to the next address */
793 curr_load_addr += sechdrs[i].sh_size;
794 } else {
795 bss_addr = ALIGN(bss_addr, align);
796 sechdrs[i].sh_addr = bss_addr;
797 bss_addr += sechdrs[i].sh_size;
798 }
799 }
800
801 /* Update entry point based on load address of text section */
802 if (entry_sidx >= 0)
803 entry += sechdrs[entry_sidx].sh_addr;
804
805 /* Make kernel jump to purgatory after shutdown */
806 image->start = entry;
807
808 /* Used later to get/set symbol values */
809 pi->sechdrs = sechdrs;
810
811 /*
812 * Used later to identify which section is purgatory and skip it
813 * from checksumming.
814 */
Thiago Jung Bauermannec2b9bf2016-11-29 23:45:48 +1100815 pi->purgatory_buf = kbuf.buffer;
Dave Younga43cac02015-09-09 15:38:51 -0700816 return ret;
817out:
818 vfree(sechdrs);
Thiago Jung Bauermannec2b9bf2016-11-29 23:45:48 +1100819 vfree(kbuf.buffer);
Dave Younga43cac02015-09-09 15:38:51 -0700820 return ret;
821}
822
823static int kexec_apply_relocations(struct kimage *image)
824{
825 int i, ret;
826 struct purgatory_info *pi = &image->purgatory_info;
827 Elf_Shdr *sechdrs = pi->sechdrs;
828
829 /* Apply relocations */
830 for (i = 0; i < pi->ehdr->e_shnum; i++) {
831 Elf_Shdr *section, *symtab;
832
833 if (sechdrs[i].sh_type != SHT_RELA &&
834 sechdrs[i].sh_type != SHT_REL)
835 continue;
836
837 /*
838 * For section of type SHT_RELA/SHT_REL,
839 * ->sh_link contains section header index of associated
840 * symbol table. And ->sh_info contains section header
841 * index of section to which relocations apply.
842 */
843 if (sechdrs[i].sh_info >= pi->ehdr->e_shnum ||
844 sechdrs[i].sh_link >= pi->ehdr->e_shnum)
845 return -ENOEXEC;
846
847 section = &sechdrs[sechdrs[i].sh_info];
848 symtab = &sechdrs[sechdrs[i].sh_link];
849
850 if (!(section->sh_flags & SHF_ALLOC))
851 continue;
852
853 /*
854 * symtab->sh_link contain section header index of associated
855 * string table.
856 */
857 if (symtab->sh_link >= pi->ehdr->e_shnum)
858 /* Invalid section number? */
859 continue;
860
861 /*
862 * Respective architecture needs to provide support for applying
863 * relocations of type SHT_RELA/SHT_REL.
864 */
865 if (sechdrs[i].sh_type == SHT_RELA)
866 ret = arch_kexec_apply_relocations_add(pi->ehdr,
867 sechdrs, i);
868 else if (sechdrs[i].sh_type == SHT_REL)
869 ret = arch_kexec_apply_relocations(pi->ehdr,
870 sechdrs, i);
871 if (ret)
872 return ret;
873 }
874
875 return 0;
876}
877
878/* Load relocatable purgatory object and relocate it appropriately */
879int kexec_load_purgatory(struct kimage *image, unsigned long min,
880 unsigned long max, int top_down,
881 unsigned long *load_addr)
882{
883 struct purgatory_info *pi = &image->purgatory_info;
884 int ret;
885
886 if (kexec_purgatory_size <= 0)
887 return -EINVAL;
888
889 if (kexec_purgatory_size < sizeof(Elf_Ehdr))
890 return -ENOEXEC;
891
892 pi->ehdr = (Elf_Ehdr *)kexec_purgatory;
893
894 if (memcmp(pi->ehdr->e_ident, ELFMAG, SELFMAG) != 0
895 || pi->ehdr->e_type != ET_REL
896 || !elf_check_arch(pi->ehdr)
897 || pi->ehdr->e_shentsize != sizeof(Elf_Shdr))
898 return -ENOEXEC;
899
900 if (pi->ehdr->e_shoff >= kexec_purgatory_size
901 || (pi->ehdr->e_shnum * sizeof(Elf_Shdr) >
902 kexec_purgatory_size - pi->ehdr->e_shoff))
903 return -ENOEXEC;
904
905 ret = __kexec_load_purgatory(image, min, max, top_down);
906 if (ret)
907 return ret;
908
909 ret = kexec_apply_relocations(image);
910 if (ret)
911 goto out;
912
913 *load_addr = pi->purgatory_load_addr;
914 return 0;
915out:
916 vfree(pi->sechdrs);
Thiago Jung Bauermann070c43e2016-09-01 16:14:44 -0700917 pi->sechdrs = NULL;
918
Dave Younga43cac02015-09-09 15:38:51 -0700919 vfree(pi->purgatory_buf);
Thiago Jung Bauermann070c43e2016-09-01 16:14:44 -0700920 pi->purgatory_buf = NULL;
Dave Younga43cac02015-09-09 15:38:51 -0700921 return ret;
922}
923
924static Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi,
925 const char *name)
926{
927 Elf_Sym *syms;
928 Elf_Shdr *sechdrs;
929 Elf_Ehdr *ehdr;
930 int i, k;
931 const char *strtab;
932
933 if (!pi->sechdrs || !pi->ehdr)
934 return NULL;
935
936 sechdrs = pi->sechdrs;
937 ehdr = pi->ehdr;
938
939 for (i = 0; i < ehdr->e_shnum; i++) {
940 if (sechdrs[i].sh_type != SHT_SYMTAB)
941 continue;
942
943 if (sechdrs[i].sh_link >= ehdr->e_shnum)
944 /* Invalid strtab section number */
945 continue;
946 strtab = (char *)sechdrs[sechdrs[i].sh_link].sh_offset;
947 syms = (Elf_Sym *)sechdrs[i].sh_offset;
948
949 /* Go through symbols for a match */
950 for (k = 0; k < sechdrs[i].sh_size/sizeof(Elf_Sym); k++) {
951 if (ELF_ST_BIND(syms[k].st_info) != STB_GLOBAL)
952 continue;
953
954 if (strcmp(strtab + syms[k].st_name, name) != 0)
955 continue;
956
957 if (syms[k].st_shndx == SHN_UNDEF ||
958 syms[k].st_shndx >= ehdr->e_shnum) {
959 pr_debug("Symbol: %s has bad section index %d.\n",
960 name, syms[k].st_shndx);
961 return NULL;
962 }
963
964 /* Found the symbol we are looking for */
965 return &syms[k];
966 }
967 }
968
969 return NULL;
970}
971
972void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name)
973{
974 struct purgatory_info *pi = &image->purgatory_info;
975 Elf_Sym *sym;
976 Elf_Shdr *sechdr;
977
978 sym = kexec_purgatory_find_symbol(pi, name);
979 if (!sym)
980 return ERR_PTR(-EINVAL);
981
982 sechdr = &pi->sechdrs[sym->st_shndx];
983
984 /*
985 * Returns the address where symbol will finally be loaded after
986 * kexec_load_segment()
987 */
988 return (void *)(sechdr->sh_addr + sym->st_value);
989}
990
991/*
992 * Get or set value of a symbol. If "get_value" is true, symbol value is
993 * returned in buf otherwise symbol value is set based on value in buf.
994 */
995int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name,
996 void *buf, unsigned int size, bool get_value)
997{
998 Elf_Sym *sym;
999 Elf_Shdr *sechdrs;
1000 struct purgatory_info *pi = &image->purgatory_info;
1001 char *sym_buf;
1002
1003 sym = kexec_purgatory_find_symbol(pi, name);
1004 if (!sym)
1005 return -EINVAL;
1006
1007 if (sym->st_size != size) {
1008 pr_err("symbol %s size mismatch: expected %lu actual %u\n",
1009 name, (unsigned long)sym->st_size, size);
1010 return -EINVAL;
1011 }
1012
1013 sechdrs = pi->sechdrs;
1014
1015 if (sechdrs[sym->st_shndx].sh_type == SHT_NOBITS) {
1016 pr_err("symbol %s is in a bss section. Cannot %s\n", name,
1017 get_value ? "get" : "set");
1018 return -EINVAL;
1019 }
1020
1021 sym_buf = (unsigned char *)sechdrs[sym->st_shndx].sh_offset +
1022 sym->st_value;
1023
1024 if (get_value)
1025 memcpy((void *)buf, sym_buf, size);
1026 else
1027 memcpy((void *)sym_buf, buf, size);
1028
1029 return 0;
1030}