blob: 390596b761e347686127e193b7205cdb3412b5ea [file] [log] [blame]
Thomas Gleixner457c8992019-05-19 13:08:55 +01001// SPDX-License-Identifier: GPL-2.0-only
Joe Perchesc767a542012-05-21 19:50:07 -07002#define pr_fmt(fmt) "SMP alternatives: " fmt
3
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -08004#include <linux/module.h>
Al Virof6a57032006-10-18 01:47:25 -04005#include <linux/sched.h>
Pekka Paalanen2f1dafe2008-05-12 21:21:01 +02006#include <linux/mutex.h>
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -08007#include <linux/list.h>
Jan Beulich8b5a10f2009-08-19 08:40:48 +01008#include <linux/stringify.h>
Andi Kleen19d36cc2007-07-22 11:12:31 +02009#include <linux/mm.h>
10#include <linux/vmalloc.h>
Masami Hiramatsu3945dab2009-03-06 10:37:22 -050011#include <linux/memory.h>
Masami Hiramatsu3d55cc82010-02-25 08:34:38 -050012#include <linux/stop_machine.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090013#include <linux/slab.h>
Jiri Kosinafd4363f2013-07-12 11:21:48 +020014#include <linux/kdebug.h>
Masami Hiramatsuc13324a2019-02-13 01:12:15 +090015#include <linux/kprobes.h>
Nadav Amitb3fd8e82019-04-25 17:11:27 -070016#include <linux/mmu_context.h>
Andy Lutomirski35de5b02016-04-26 12:23:24 -070017#include <asm/text-patching.h>
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -080018#include <asm/alternative.h>
19#include <asm/sections.h>
Andi Kleen19d36cc2007-07-22 11:12:31 +020020#include <asm/pgtable.h>
Andi Kleen8f4e9562007-07-22 11:12:32 +020021#include <asm/mce.h>
22#include <asm/nmi.h>
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -050023#include <asm/cacheflush.h>
Masami Hiramatsu78ff7fa2009-03-06 10:37:54 -050024#include <asm/tlbflush.h>
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -050025#include <asm/io.h>
Masami Hiramatsu78ff7fa2009-03-06 10:37:54 -050026#include <asm/fixmap.h>
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -080027
Ingo Molnar5e907bb2015-04-30 09:09:26 +020028int __read_mostly alternatives_patched;
29
30EXPORT_SYMBOL_GPL(alternatives_patched);
31
Andi Kleenab144f52007-08-10 22:31:03 +020032#define MAX_PATCH_LEN (255-1)
33
Jan Beulich8b5a10f2009-08-19 08:40:48 +010034static int __initdata_or_module debug_alternative;
Jeremy Fitzhardingeb7fb4af2007-05-02 19:27:13 +020035
Gerd Hoffmannd167a512006-06-26 13:56:16 +020036static int __init debug_alt(char *str)
37{
38 debug_alternative = 1;
39 return 1;
40}
Gerd Hoffmannd167a512006-06-26 13:56:16 +020041__setup("debug-alternative", debug_alt);
42
Jan Beulich09488162007-07-21 17:10:25 +020043static int noreplace_smp;
44
Jeremy Fitzhardingeb7fb4af2007-05-02 19:27:13 +020045static int __init setup_noreplace_smp(char *str)
46{
47 noreplace_smp = 1;
48 return 1;
49}
50__setup("noreplace-smp", setup_noreplace_smp);
51
Borislav Petkovdb477a32014-12-30 20:27:09 +010052#define DPRINTK(fmt, args...) \
53do { \
54 if (debug_alternative) \
55 printk(KERN_DEBUG "%s: " fmt "\n", __func__, ##args); \
Joe Perchesc767a542012-05-21 19:50:07 -070056} while (0)
Gerd Hoffmannd167a512006-06-26 13:56:16 +020057
Borislav Petkov48c7a252015-01-05 13:48:41 +010058#define DUMP_BYTES(buf, len, fmt, args...) \
59do { \
60 if (unlikely(debug_alternative)) { \
61 int j; \
62 \
63 if (!(len)) \
64 break; \
65 \
66 printk(KERN_DEBUG fmt, ##args); \
67 for (j = 0; j < (len) - 1; j++) \
68 printk(KERN_CONT "%02hhx ", buf[j]); \
69 printk(KERN_CONT "%02hhx\n", buf[j]); \
70 } \
71} while (0)
72
H. Peter Anvindc326fc2011-04-18 15:19:51 -070073/*
74 * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes
75 * that correspond to that nop. Getting from one nop to the next, we
76 * add to the array the offset that is equal to the sum of all sizes of
77 * nops preceding the one we are after.
78 *
79 * Note: The GENERIC_NOP5_ATOMIC is at the end, as it breaks the
80 * nice symmetry of sizes of the previous nops.
81 */
Jan Beulich8b5a10f2009-08-19 08:40:48 +010082#if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64)
H. Peter Anvindc326fc2011-04-18 15:19:51 -070083static const unsigned char intelnops[] =
84{
85 GENERIC_NOP1,
86 GENERIC_NOP2,
87 GENERIC_NOP3,
88 GENERIC_NOP4,
89 GENERIC_NOP5,
90 GENERIC_NOP6,
91 GENERIC_NOP7,
92 GENERIC_NOP8,
93 GENERIC_NOP5_ATOMIC
94};
95static const unsigned char * const intel_nops[ASM_NOP_MAX+2] =
96{
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -080097 NULL,
98 intelnops,
99 intelnops + 1,
100 intelnops + 1 + 2,
101 intelnops + 1 + 2 + 3,
102 intelnops + 1 + 2 + 3 + 4,
103 intelnops + 1 + 2 + 3 + 4 + 5,
104 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
105 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
H. Peter Anvindc326fc2011-04-18 15:19:51 -0700106 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800107};
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200108#endif
109
110#ifdef K8_NOP1
H. Peter Anvindc326fc2011-04-18 15:19:51 -0700111static const unsigned char k8nops[] =
112{
113 K8_NOP1,
114 K8_NOP2,
115 K8_NOP3,
116 K8_NOP4,
117 K8_NOP5,
118 K8_NOP6,
119 K8_NOP7,
120 K8_NOP8,
121 K8_NOP5_ATOMIC
122};
123static const unsigned char * const k8_nops[ASM_NOP_MAX+2] =
124{
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800125 NULL,
126 k8nops,
127 k8nops + 1,
128 k8nops + 1 + 2,
129 k8nops + 1 + 2 + 3,
130 k8nops + 1 + 2 + 3 + 4,
131 k8nops + 1 + 2 + 3 + 4 + 5,
132 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
133 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
H. Peter Anvindc326fc2011-04-18 15:19:51 -0700134 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800135};
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200136#endif
137
Jan Beulich8b5a10f2009-08-19 08:40:48 +0100138#if defined(K7_NOP1) && !defined(CONFIG_X86_64)
H. Peter Anvindc326fc2011-04-18 15:19:51 -0700139static const unsigned char k7nops[] =
140{
141 K7_NOP1,
142 K7_NOP2,
143 K7_NOP3,
144 K7_NOP4,
145 K7_NOP5,
146 K7_NOP6,
147 K7_NOP7,
148 K7_NOP8,
149 K7_NOP5_ATOMIC
150};
151static const unsigned char * const k7_nops[ASM_NOP_MAX+2] =
152{
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800153 NULL,
154 k7nops,
155 k7nops + 1,
156 k7nops + 1 + 2,
157 k7nops + 1 + 2 + 3,
158 k7nops + 1 + 2 + 3 + 4,
159 k7nops + 1 + 2 + 3 + 4 + 5,
160 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
161 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
H. Peter Anvindc326fc2011-04-18 15:19:51 -0700162 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800163};
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200164#endif
165
Jan Beulich32c464f2007-10-17 18:04:41 +0200166#ifdef P6_NOP1
Avi Kivitycb09cad2012-08-22 13:03:48 +0300167static const unsigned char p6nops[] =
H. Peter Anvindc326fc2011-04-18 15:19:51 -0700168{
169 P6_NOP1,
170 P6_NOP2,
171 P6_NOP3,
172 P6_NOP4,
173 P6_NOP5,
174 P6_NOP6,
175 P6_NOP7,
176 P6_NOP8,
177 P6_NOP5_ATOMIC
178};
179static const unsigned char * const p6_nops[ASM_NOP_MAX+2] =
180{
Jan Beulich32c464f2007-10-17 18:04:41 +0200181 NULL,
182 p6nops,
183 p6nops + 1,
184 p6nops + 1 + 2,
185 p6nops + 1 + 2 + 3,
186 p6nops + 1 + 2 + 3 + 4,
187 p6nops + 1 + 2 + 3 + 4 + 5,
188 p6nops + 1 + 2 + 3 + 4 + 5 + 6,
189 p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
H. Peter Anvindc326fc2011-04-18 15:19:51 -0700190 p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
Jan Beulich32c464f2007-10-17 18:04:41 +0200191};
192#endif
193
H. Peter Anvindc326fc2011-04-18 15:19:51 -0700194/* Initialize these to a safe default */
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200195#ifdef CONFIG_X86_64
H. Peter Anvindc326fc2011-04-18 15:19:51 -0700196const unsigned char * const *ideal_nops = p6_nops;
197#else
198const unsigned char * const *ideal_nops = intel_nops;
199#endif
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200200
H. Peter Anvindc326fc2011-04-18 15:19:51 -0700201void __init arch_init_ideal_nops(void)
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200202{
H. Peter Anvindc326fc2011-04-18 15:19:51 -0700203 switch (boot_cpu_data.x86_vendor) {
204 case X86_VENDOR_INTEL:
H. Peter Anvind8d97662011-04-18 15:31:57 -0700205 /*
206 * Due to a decoder implementation quirk, some
207 * specific Intel CPUs actually perform better with
208 * the "k8_nops" than with the SDM-recommended NOPs.
209 */
210 if (boot_cpu_data.x86 == 6 &&
211 boot_cpu_data.x86_model >= 0x0f &&
212 boot_cpu_data.x86_model != 0x1c &&
213 boot_cpu_data.x86_model != 0x26 &&
214 boot_cpu_data.x86_model != 0x27 &&
215 boot_cpu_data.x86_model < 0x30) {
216 ideal_nops = k8_nops;
217 } else if (boot_cpu_has(X86_FEATURE_NOPL)) {
H. Peter Anvindc326fc2011-04-18 15:19:51 -0700218 ideal_nops = p6_nops;
219 } else {
220#ifdef CONFIG_X86_64
221 ideal_nops = k8_nops;
222#else
223 ideal_nops = intel_nops;
224#endif
225 }
Alan Coxd6250a32012-07-25 16:28:19 +0100226 break;
Borislav Petkovf21262b2015-05-11 10:15:46 +0200227
Pu Wenc3fecca2018-09-23 17:35:01 +0800228 case X86_VENDOR_HYGON:
229 ideal_nops = p6_nops;
230 return;
231
Borislav Petkovf21262b2015-05-11 10:15:46 +0200232 case X86_VENDOR_AMD:
233 if (boot_cpu_data.x86 > 0xf) {
234 ideal_nops = p6_nops;
235 return;
236 }
237
238 /* fall through */
239
H. Peter Anvindc326fc2011-04-18 15:19:51 -0700240 default:
241#ifdef CONFIG_X86_64
242 ideal_nops = k8_nops;
243#else
244 if (boot_cpu_has(X86_FEATURE_K8))
245 ideal_nops = k8_nops;
246 else if (boot_cpu_has(X86_FEATURE_K7))
247 ideal_nops = k7_nops;
248 else
249 ideal_nops = intel_nops;
250#endif
251 }
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200252}
253
Andi Kleenab144f52007-08-10 22:31:03 +0200254/* Use this to add nops to a buffer, then text_poke the whole buffer. */
Jan Beulich8b5a10f2009-08-19 08:40:48 +0100255static void __init_or_module add_nops(void *insns, unsigned int len)
Rusty Russell139ec7c2006-12-07 02:14:08 +0100256{
Rusty Russell139ec7c2006-12-07 02:14:08 +0100257 while (len > 0) {
258 unsigned int noplen = len;
259 if (noplen > ASM_NOP_MAX)
260 noplen = ASM_NOP_MAX;
H. Peter Anvindc326fc2011-04-18 15:19:51 -0700261 memcpy(insns, ideal_nops[noplen], noplen);
Rusty Russell139ec7c2006-12-07 02:14:08 +0100262 insns += noplen;
263 len -= noplen;
264 }
265}
266
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200267extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
Jan Beulich5967ed82010-04-21 16:08:14 +0100268extern s32 __smp_locks[], __smp_locks_end[];
Nadav Amit0a203df2019-04-25 17:11:33 -0700269void text_poke_early(void *addr, const void *opcode, size_t len);
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200270
Borislav Petkovdb477a32014-12-30 20:27:09 +0100271/*
Borislav Petkov48c7a252015-01-05 13:48:41 +0100272 * Are we looking at a near JMP with a 1 or 4-byte displacement.
273 */
274static inline bool is_jmp(const u8 opcode)
275{
276 return opcode == 0xeb || opcode == 0xe9;
277}
278
279static void __init_or_module
280recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
281{
282 u8 *next_rip, *tgt_rip;
283 s32 n_dspl, o_dspl;
284 int repl_len;
285
286 if (a->replacementlen != 5)
287 return;
288
289 o_dspl = *(s32 *)(insnbuf + 1);
290
291 /* next_rip of the replacement JMP */
292 next_rip = repl_insn + a->replacementlen;
293 /* target rip of the replacement JMP */
294 tgt_rip = next_rip + o_dspl;
295 n_dspl = tgt_rip - orig_insn;
296
Borislav Petkov0e6c16c2018-01-26 13:11:36 +0100297 DPRINTK("target RIP: %px, new_displ: 0x%x", tgt_rip, n_dspl);
Borislav Petkov48c7a252015-01-05 13:48:41 +0100298
299 if (tgt_rip - orig_insn >= 0) {
300 if (n_dspl - 2 <= 127)
301 goto two_byte_jmp;
302 else
303 goto five_byte_jmp;
304 /* negative offset */
305 } else {
306 if (((n_dspl - 2) & 0xff) == (n_dspl - 2))
307 goto two_byte_jmp;
308 else
309 goto five_byte_jmp;
310 }
311
312two_byte_jmp:
313 n_dspl -= 2;
314
315 insnbuf[0] = 0xeb;
316 insnbuf[1] = (s8)n_dspl;
317 add_nops(insnbuf + 2, 3);
318
319 repl_len = 2;
320 goto done;
321
322five_byte_jmp:
323 n_dspl -= 5;
324
325 insnbuf[0] = 0xe9;
326 *(s32 *)&insnbuf[1] = n_dspl;
327
328 repl_len = 5;
329
330done:
331
332 DPRINTK("final displ: 0x%08x, JMP 0x%lx",
333 n_dspl, (unsigned long)orig_insn + n_dspl + repl_len);
334}
335
Borislav Petkov34bfab02016-12-03 16:02:58 +0100336/*
337 * "noinline" to cause control flow change and thus invalidate I$ and
338 * cause refetch after modification.
339 */
340static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
Borislav Petkov4fd4b6e2015-01-10 20:34:07 +0100341{
Thomas Gleixner66c117d2015-09-03 12:34:55 +0200342 unsigned long flags;
Borislav Petkov612e8e92018-01-10 12:28:16 +0100343 int i;
Thomas Gleixner66c117d2015-09-03 12:34:55 +0200344
Borislav Petkov612e8e92018-01-10 12:28:16 +0100345 for (i = 0; i < a->padlen; i++) {
346 if (instr[i] != 0x90)
347 return;
348 }
Borislav Petkov69df3532015-04-04 23:07:42 +0200349
Thomas Gleixner66c117d2015-09-03 12:34:55 +0200350 local_irq_save(flags);
Borislav Petkov4fd4b6e2015-01-10 20:34:07 +0100351 add_nops(instr + (a->instrlen - a->padlen), a->padlen);
Thomas Gleixner66c117d2015-09-03 12:34:55 +0200352 local_irq_restore(flags);
Borislav Petkov4fd4b6e2015-01-10 20:34:07 +0100353
Borislav Petkov0e6c16c2018-01-26 13:11:36 +0100354 DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
Borislav Petkov4fd4b6e2015-01-10 20:34:07 +0100355 instr, a->instrlen - a->padlen, a->padlen);
356}
357
Borislav Petkov48c7a252015-01-05 13:48:41 +0100358/*
Borislav Petkovdb477a32014-12-30 20:27:09 +0100359 * Replace instructions with better alternatives for this CPU type. This runs
360 * before SMP is initialized to avoid SMP problems with self modifying code.
361 * This implies that asymmetric systems where APs have less capabilities than
362 * the boot processor are not handled. Tough. Make sure you disable such
363 * features by hand.
Borislav Petkov34bfab02016-12-03 16:02:58 +0100364 *
365 * Marked "noinline" to cause control flow change and thus insn cache
366 * to refetch changed I$ lines.
Borislav Petkovdb477a32014-12-30 20:27:09 +0100367 */
Borislav Petkov34bfab02016-12-03 16:02:58 +0100368void __init_or_module noinline apply_alternatives(struct alt_instr *start,
369 struct alt_instr *end)
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800370{
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800371 struct alt_instr *a;
Andy Lutomirski59e97e42011-07-13 09:24:10 -0400372 u8 *instr, *replacement;
Jan Beulich1b1d9252009-12-18 16:12:56 +0000373 u8 insnbuf[MAX_PATCH_LEN];
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800374
Borislav Petkov0e6c16c2018-01-26 13:11:36 +0100375 DPRINTK("alt table %px, -> %px", start, end);
Fenghua Yu50973132011-05-17 15:29:12 -0700376 /*
377 * The scan order should be from start to end. A later scanned
Borislav Petkovdb477a32014-12-30 20:27:09 +0100378 * alternative code can overwrite previously scanned alternative code.
Fenghua Yu50973132011-05-17 15:29:12 -0700379 * Some kernel functions (e.g. memcpy, memset, etc) use this order to
380 * patch code.
381 *
382 * So be careful if you want to change the scan order to any other
383 * order.
384 */
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800385 for (a = start; a < end; a++) {
Borislav Petkov48c7a252015-01-05 13:48:41 +0100386 int insnbuf_sz = 0;
387
Andy Lutomirski59e97e42011-07-13 09:24:10 -0400388 instr = (u8 *)&a->instr_offset + a->instr_offset;
389 replacement = (u8 *)&a->repl_offset + a->repl_offset;
Andi Kleenab144f52007-08-10 22:31:03 +0200390 BUG_ON(a->instrlen > sizeof(insnbuf));
Borislav Petkov65fc9852013-03-20 15:07:23 +0100391 BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
Borislav Petkov4fd4b6e2015-01-10 20:34:07 +0100392 if (!boot_cpu_has(a->cpuid)) {
393 if (a->padlen > 1)
394 optimize_nops(a, instr);
395
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800396 continue;
Borislav Petkov4fd4b6e2015-01-10 20:34:07 +0100397 }
Andy Lutomirski59e97e42011-07-13 09:24:10 -0400398
Borislav Petkovc1d4e412018-12-10 12:30:30 +0100399 DPRINTK("feat: %d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d",
Borislav Petkovdb477a32014-12-30 20:27:09 +0100400 a->cpuid >> 5,
401 a->cpuid & 0x1f,
Borislav Petkovc1d4e412018-12-10 12:30:30 +0100402 instr, instr, a->instrlen,
Borislav Petkovdbe40582015-04-04 15:34:43 +0200403 replacement, a->replacementlen, a->padlen);
Borislav Petkovdb477a32014-12-30 20:27:09 +0100404
Borislav Petkov0e6c16c2018-01-26 13:11:36 +0100405 DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
406 DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
Borislav Petkov48c7a252015-01-05 13:48:41 +0100407
Andy Lutomirski59e97e42011-07-13 09:24:10 -0400408 memcpy(insnbuf, replacement, a->replacementlen);
Borislav Petkov48c7a252015-01-05 13:48:41 +0100409 insnbuf_sz = a->replacementlen;
Andy Lutomirski59e97e42011-07-13 09:24:10 -0400410
Mateusz Jurczykfc152d222017-05-24 15:55:00 +0200411 /*
412 * 0xe8 is a relative jump; fix the offset.
413 *
414 * Instruction length is checked before the opcode to avoid
415 * accessing uninitialized bytes for zero-length replacements.
416 */
417 if (a->replacementlen == 5 && *insnbuf == 0xe8) {
Borislav Petkovdb477a32014-12-30 20:27:09 +0100418 *(s32 *)(insnbuf + 1) += replacement - instr;
Borislav Petkov48c7a252015-01-05 13:48:41 +0100419 DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx",
420 *(s32 *)(insnbuf + 1),
421 (unsigned long)instr + *(s32 *)(insnbuf + 1) + 5);
Borislav Petkovdb477a32014-12-30 20:27:09 +0100422 }
Andy Lutomirski59e97e42011-07-13 09:24:10 -0400423
Borislav Petkov48c7a252015-01-05 13:48:41 +0100424 if (a->replacementlen && is_jmp(replacement[0]))
425 recompute_jump(a, instr, replacement, insnbuf);
426
427 if (a->instrlen > a->replacementlen) {
Borislav Petkov43321952014-12-27 10:41:52 +0100428 add_nops(insnbuf + a->replacementlen,
429 a->instrlen - a->replacementlen);
Borislav Petkov48c7a252015-01-05 13:48:41 +0100430 insnbuf_sz += a->instrlen - a->replacementlen;
431 }
Borislav Petkov0e6c16c2018-01-26 13:11:36 +0100432 DUMP_BYTES(insnbuf, insnbuf_sz, "%px: final_insn: ", instr);
Andy Lutomirski59e97e42011-07-13 09:24:10 -0400433
Borislav Petkov48c7a252015-01-05 13:48:41 +0100434 text_poke_early(instr, insnbuf, insnbuf_sz);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800435 }
436}
437
Gerd Hoffmann8ec4d412006-07-01 04:36:18 -0700438#ifdef CONFIG_SMP
Jan Beulich5967ed82010-04-21 16:08:14 +0100439static void alternatives_smp_lock(const s32 *start, const s32 *end,
440 u8 *text, u8 *text_end)
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800441{
Jan Beulich5967ed82010-04-21 16:08:14 +0100442 const s32 *poff;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800443
Jan Beulich5967ed82010-04-21 16:08:14 +0100444 for (poff = start; poff < end; poff++) {
445 u8 *ptr = (u8 *)poff + *poff;
446
447 if (!*poff || ptr < text || ptr >= text_end)
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800448 continue;
Mathieu Desnoyersf88f07e2008-08-14 16:58:15 -0400449 /* turn DS segment override prefix into lock prefix */
H. Peter Anvind9c58412010-04-29 16:53:17 -0700450 if (*ptr == 0x3e)
451 text_poke(ptr, ((unsigned char []){0xf0}), 1);
Peter Senna Tschudin4b8073e2012-09-18 18:36:14 +0200452 }
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800453}
454
Jan Beulich5967ed82010-04-21 16:08:14 +0100455static void alternatives_smp_unlock(const s32 *start, const s32 *end,
456 u8 *text, u8 *text_end)
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800457{
Jan Beulich5967ed82010-04-21 16:08:14 +0100458 const s32 *poff;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800459
Jan Beulich5967ed82010-04-21 16:08:14 +0100460 for (poff = start; poff < end; poff++) {
461 u8 *ptr = (u8 *)poff + *poff;
462
463 if (!*poff || ptr < text || ptr >= text_end)
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800464 continue;
Mathieu Desnoyersf88f07e2008-08-14 16:58:15 -0400465 /* turn lock prefix into DS segment override prefix */
H. Peter Anvind9c58412010-04-29 16:53:17 -0700466 if (*ptr == 0xf0)
467 text_poke(ptr, ((unsigned char []){0x3E}), 1);
Peter Senna Tschudin4b8073e2012-09-18 18:36:14 +0200468 }
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800469}
470
471struct smp_alt_module {
472 /* what is this ??? */
473 struct module *mod;
474 char *name;
475
476 /* ptrs to lock prefixes */
Jan Beulich5967ed82010-04-21 16:08:14 +0100477 const s32 *locks;
478 const s32 *locks_end;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800479
480 /* .text segment, needed to avoid patching init code ;) */
481 u8 *text;
482 u8 *text_end;
483
484 struct list_head next;
485};
486static LIST_HEAD(smp_alt_modules);
Zhou Chengminge846d132017-11-02 09:18:21 +0800487static bool uniproc_patched = false; /* protected by text_mutex */
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800488
Jan Beulich8b5a10f2009-08-19 08:40:48 +0100489void __init_or_module alternatives_smp_module_add(struct module *mod,
490 char *name,
491 void *locks, void *locks_end,
492 void *text, void *text_end)
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800493{
494 struct smp_alt_module *smp;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800495
Zhou Chengminge846d132017-11-02 09:18:21 +0800496 mutex_lock(&text_mutex);
Rusty Russell816afe42012-08-06 17:29:49 +0930497 if (!uniproc_patched)
498 goto unlock;
Jeremy Fitzhardingeb7fb4af2007-05-02 19:27:13 +0200499
Rusty Russell816afe42012-08-06 17:29:49 +0930500 if (num_possible_cpus() == 1)
501 /* Don't bother remembering, we'll never have to undo it. */
502 goto smp_unlock;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800503
504 smp = kzalloc(sizeof(*smp), GFP_KERNEL);
505 if (NULL == smp)
Rusty Russell816afe42012-08-06 17:29:49 +0930506 /* we'll run the (safe but slow) SMP code then ... */
507 goto unlock;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800508
509 smp->mod = mod;
510 smp->name = name;
511 smp->locks = locks;
512 smp->locks_end = locks_end;
513 smp->text = text;
514 smp->text_end = text_end;
Borislav Petkovdb477a32014-12-30 20:27:09 +0100515 DPRINTK("locks %p -> %p, text %p -> %p, name %s\n",
516 smp->locks, smp->locks_end,
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800517 smp->text, smp->text_end, smp->name);
518
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800519 list_add_tail(&smp->next, &smp_alt_modules);
Rusty Russell816afe42012-08-06 17:29:49 +0930520smp_unlock:
521 alternatives_smp_unlock(locks, locks_end, text, text_end);
522unlock:
Zhou Chengminge846d132017-11-02 09:18:21 +0800523 mutex_unlock(&text_mutex);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800524}
525
Jan Beulich8b5a10f2009-08-19 08:40:48 +0100526void __init_or_module alternatives_smp_module_del(struct module *mod)
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800527{
528 struct smp_alt_module *item;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800529
Zhou Chengminge846d132017-11-02 09:18:21 +0800530 mutex_lock(&text_mutex);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800531 list_for_each_entry(item, &smp_alt_modules, next) {
532 if (mod != item->mod)
533 continue;
534 list_del(&item->next);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800535 kfree(item);
Rusty Russell816afe42012-08-06 17:29:49 +0930536 break;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800537 }
Zhou Chengminge846d132017-11-02 09:18:21 +0800538 mutex_unlock(&text_mutex);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800539}
540
Rusty Russell816afe42012-08-06 17:29:49 +0930541void alternatives_enable_smp(void)
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800542{
543 struct smp_alt_module *mod;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800544
Rusty Russell816afe42012-08-06 17:29:49 +0930545 /* Why bother if there are no other CPUs? */
546 BUG_ON(num_possible_cpus() == 1);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800547
Zhou Chengminge846d132017-11-02 09:18:21 +0800548 mutex_lock(&text_mutex);
Andi Kleenca74a6f2008-01-30 13:33:17 +0100549
Rusty Russell816afe42012-08-06 17:29:49 +0930550 if (uniproc_patched) {
Joe Perchesc767a542012-05-21 19:50:07 -0700551 pr_info("switching to SMP code\n");
Rusty Russell816afe42012-08-06 17:29:49 +0930552 BUG_ON(num_online_cpus() != 1);
Jeremy Fitzhardinge53756d32008-01-30 13:30:55 +0100553 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
554 clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800555 list_for_each_entry(mod, &smp_alt_modules, next)
556 alternatives_smp_lock(mod->locks, mod->locks_end,
557 mod->text, mod->text_end);
Rusty Russell816afe42012-08-06 17:29:49 +0930558 uniproc_patched = false;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800559 }
Zhou Chengminge846d132017-11-02 09:18:21 +0800560 mutex_unlock(&text_mutex);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800561}
562
Zhou Chengminge846d132017-11-02 09:18:21 +0800563/*
564 * Return 1 if the address range is reserved for SMP-alternatives.
565 * Must hold text_mutex.
566 */
Masami Hiramatsu2cfa1972010-02-02 16:49:11 -0500567int alternatives_text_reserved(void *start, void *end)
568{
569 struct smp_alt_module *mod;
Jan Beulich5967ed82010-04-21 16:08:14 +0100570 const s32 *poff;
Masami Hiramatsu076dc4a2010-02-05 12:16:47 -0500571 u8 *text_start = start;
572 u8 *text_end = end;
Masami Hiramatsu2cfa1972010-02-02 16:49:11 -0500573
Zhou Chengminge846d132017-11-02 09:18:21 +0800574 lockdep_assert_held(&text_mutex);
575
Masami Hiramatsu2cfa1972010-02-02 16:49:11 -0500576 list_for_each_entry(mod, &smp_alt_modules, next) {
Masami Hiramatsu076dc4a2010-02-05 12:16:47 -0500577 if (mod->text > text_end || mod->text_end < text_start)
Masami Hiramatsu2cfa1972010-02-02 16:49:11 -0500578 continue;
Jan Beulich5967ed82010-04-21 16:08:14 +0100579 for (poff = mod->locks; poff < mod->locks_end; poff++) {
580 const u8 *ptr = (const u8 *)poff + *poff;
581
582 if (text_start <= ptr && text_end > ptr)
Masami Hiramatsu2cfa1972010-02-02 16:49:11 -0500583 return 1;
Jan Beulich5967ed82010-04-21 16:08:14 +0100584 }
Masami Hiramatsu2cfa1972010-02-02 16:49:11 -0500585 }
586
587 return 0;
588}
Borislav Petkov48c7a252015-01-05 13:48:41 +0100589#endif /* CONFIG_SMP */
Gerd Hoffmann8ec4d412006-07-01 04:36:18 -0700590
Rusty Russell139ec7c2006-12-07 02:14:08 +0100591#ifdef CONFIG_PARAVIRT
Jan Beulich8b5a10f2009-08-19 08:40:48 +0100592void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
593 struct paravirt_patch_site *end)
Rusty Russell139ec7c2006-12-07 02:14:08 +0100594{
Jeremy Fitzhardinge98de032b2007-05-02 19:27:14 +0200595 struct paravirt_patch_site *p;
Andi Kleenab144f52007-08-10 22:31:03 +0200596 char insnbuf[MAX_PATCH_LEN];
Rusty Russell139ec7c2006-12-07 02:14:08 +0100597
598 for (p = start; p < end; p++) {
599 unsigned int used;
600
Andi Kleenab144f52007-08-10 22:31:03 +0200601 BUG_ON(p->len > MAX_PATCH_LEN);
Chris Wrightd34fda42007-08-18 14:31:41 -0700602 /* prep the buffer with the original instructions */
603 memcpy(insnbuf, p->instr, p->len);
Juergen Gross5c835112018-08-28 09:40:19 +0200604 used = pv_ops.init.patch(p->instrtype, insnbuf,
Jeremy Fitzhardinge93b1eab2007-10-16 11:51:29 -0700605 (unsigned long)p->instr, p->len);
Jeremy Fitzhardinge7f63c412007-05-02 19:27:13 +0200606
Jeremy Fitzhardinge63f70272007-05-02 19:27:14 +0200607 BUG_ON(used > p->len);
608
Rusty Russell139ec7c2006-12-07 02:14:08 +0100609 /* Pad the rest with nops */
Andi Kleenab144f52007-08-10 22:31:03 +0200610 add_nops(insnbuf + used, p->len - used);
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500611 text_poke_early(p->instr, insnbuf, p->len);
Rusty Russell139ec7c2006-12-07 02:14:08 +0100612 }
Rusty Russell139ec7c2006-12-07 02:14:08 +0100613}
Jeremy Fitzhardinge98de032b2007-05-02 19:27:14 +0200614extern struct paravirt_patch_site __start_parainstructions[],
Rusty Russell139ec7c2006-12-07 02:14:08 +0100615 __stop_parainstructions[];
616#endif /* CONFIG_PARAVIRT */
617
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800618void __init alternative_instructions(void)
619{
Andi Kleen8f4e9562007-07-22 11:12:32 +0200620 /* The patching is not fully atomic, so try to avoid local interruptions
621 that might execute the to be patched code.
622 Other CPUs are not running. */
623 stop_nmi();
Andi Kleen123aa762009-02-12 13:39:27 +0100624
625 /*
626 * Don't stop machine check exceptions while patching.
627 * MCEs only happen when something got corrupted and in this
628 * case we must do something about the corruption.
629 * Ignoring it is worse than a unlikely patching race.
630 * Also machine checks tend to be broadcast and if one CPU
631 * goes into machine check the others follow quickly, so we don't
632 * expect a machine check to cause undue problems during to code
633 * patching.
634 */
Andi Kleen8f4e9562007-07-22 11:12:32 +0200635
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800636 apply_alternatives(__alt_instructions, __alt_instructions_end);
637
Gerd Hoffmann8ec4d412006-07-01 04:36:18 -0700638#ifdef CONFIG_SMP
Rusty Russell816afe42012-08-06 17:29:49 +0930639 /* Patch to UP if other cpus not imminent. */
640 if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) {
641 uniproc_patched = true;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800642 alternatives_smp_module_add(NULL, "core kernel",
643 __smp_locks, __smp_locks_end,
644 _text, _etext);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800645 }
Andi Kleen8f4e9562007-07-22 11:12:32 +0200646
Rusty Russell816afe42012-08-06 17:29:49 +0930647 if (!uniproc_patched || num_possible_cpus() == 1)
Fengguang Wuf68fd5f2007-10-17 18:04:34 +0200648 free_init_pages("SMP alternatives",
649 (unsigned long)__smp_locks,
650 (unsigned long)__smp_locks_end);
Rusty Russell816afe42012-08-06 17:29:49 +0930651#endif
652
653 apply_paravirt(__parainstructions, __parainstructions_end);
Fengguang Wuf68fd5f2007-10-17 18:04:34 +0200654
Andi Kleen8f4e9562007-07-22 11:12:32 +0200655 restart_nmi();
Ingo Molnar5e907bb2015-04-30 09:09:26 +0200656 alternatives_patched = 1;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800657}
Andi Kleen19d36cc2007-07-22 11:12:31 +0200658
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500659/**
660 * text_poke_early - Update instructions on a live kernel at boot time
661 * @addr: address to modify
662 * @opcode: source of the copy
663 * @len: length to copy
664 *
Andi Kleen19d36cc2007-07-22 11:12:31 +0200665 * When you use this code to patch more than one byte of an instruction
666 * you need to make sure that other CPUs cannot execute this code in parallel.
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500667 * Also no thread must be currently preempted in the middle of these
668 * instructions. And on the local CPU you need to be protected again NMI or MCE
669 * handlers seeing an inconsistent instruction while you patch.
Andi Kleen19d36cc2007-07-22 11:12:31 +0200670 */
Nadav Amit0a203df2019-04-25 17:11:33 -0700671void __init_or_module text_poke_early(void *addr, const void *opcode,
672 size_t len)
Andi Kleen19d36cc2007-07-22 11:12:31 +0200673{
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500674 unsigned long flags;
Nadav Amitf2c65fb2019-04-25 17:11:31 -0700675
676 if (boot_cpu_has(X86_FEATURE_NX) &&
677 is_module_text_address((unsigned long)addr)) {
678 /*
679 * Modules text is marked initially as non-executable, so the
680 * code cannot be running and speculative code-fetches are
681 * prevented. Just change the code.
682 */
683 memcpy(addr, opcode, len);
684 } else {
685 local_irq_save(flags);
686 memcpy(addr, opcode, len);
687 local_irq_restore(flags);
688 sync_core();
689
690 /*
691 * Could also do a CLFLUSH here to speed up CPU recovery; but
692 * that causes hangs on some VIA CPUs.
693 */
694 }
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500695}
696
Nadav Amit4fc19702019-04-26 16:22:46 -0700697__ro_after_init struct mm_struct *poking_mm;
698__ro_after_init unsigned long poking_addr;
699
Nadav Amite8366732019-04-25 17:11:21 -0700700static void *__text_poke(void *addr, const void *opcode, size_t len)
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500701{
Nadav Amitb3fd8e82019-04-25 17:11:27 -0700702 bool cross_page_boundary = offset_in_page(addr) + len > PAGE_SIZE;
703 struct page *pages[2] = {NULL};
704 temp_mm_state_t prev;
Masami Hiramatsu78ff7fa2009-03-06 10:37:54 -0500705 unsigned long flags;
Nadav Amitb3fd8e82019-04-25 17:11:27 -0700706 pte_t pte, *ptep;
707 spinlock_t *ptl;
708 pgprot_t pgprot;
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500709
Pavel Tatashin6fffacb2018-07-19 16:55:27 -0400710 /*
Nadav Amitb3fd8e82019-04-25 17:11:27 -0700711 * While boot memory allocator is running we cannot use struct pages as
712 * they are not yet initialized. There is no way to recover.
Pavel Tatashin6fffacb2018-07-19 16:55:27 -0400713 */
714 BUG_ON(!after_bootmem);
715
Mathieu Desnoyersb7b66ba2008-04-24 11:03:33 -0400716 if (!core_kernel_text((unsigned long)addr)) {
717 pages[0] = vmalloc_to_page(addr);
Nadav Amitb3fd8e82019-04-25 17:11:27 -0700718 if (cross_page_boundary)
719 pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
Mathieu Desnoyers15a601e2008-03-12 11:54:16 -0400720 } else {
Mathieu Desnoyersb7b66ba2008-04-24 11:03:33 -0400721 pages[0] = virt_to_page(addr);
Ingo Molnar00c6b2d2008-04-25 17:07:03 +0200722 WARN_ON(!PageReserved(pages[0]));
Nadav Amitb3fd8e82019-04-25 17:11:27 -0700723 if (cross_page_boundary)
724 pages[1] = virt_to_page(addr + PAGE_SIZE);
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500725 }
Nadav Amitb3fd8e82019-04-25 17:11:27 -0700726 /*
727 * If something went wrong, crash and burn since recovery paths are not
728 * implemented.
729 */
730 BUG_ON(!pages[0] || (cross_page_boundary && !pages[1]));
731
Masami Hiramatsu7cf49422009-03-09 12:40:40 -0400732 local_irq_save(flags);
Nadav Amitb3fd8e82019-04-25 17:11:27 -0700733
734 /*
735 * Map the page without the global bit, as TLB flushing is done with
736 * flush_tlb_mm_range(), which is intended for non-global PTEs.
737 */
738 pgprot = __pgprot(pgprot_val(PAGE_KERNEL) & ~_PAGE_GLOBAL);
739
740 /*
741 * The lock is not really needed, but this allows to avoid open-coding.
742 */
743 ptep = get_locked_pte(poking_mm, poking_addr, &ptl);
744
745 /*
746 * This must not fail; preallocated in poking_init().
747 */
748 VM_BUG_ON(!ptep);
749
750 pte = mk_pte(pages[0], pgprot);
751 set_pte_at(poking_mm, poking_addr, ptep, pte);
752
753 if (cross_page_boundary) {
754 pte = mk_pte(pages[1], pgprot);
755 set_pte_at(poking_mm, poking_addr + PAGE_SIZE, ptep + 1, pte);
756 }
757
758 /*
759 * Loading the temporary mm behaves as a compiler barrier, which
760 * guarantees that the PTE will be set at the time memcpy() is done.
761 */
762 prev = use_temporary_mm(poking_mm);
763
764 kasan_disable_current();
765 memcpy((u8 *)poking_addr + offset_in_page(addr), opcode, len);
766 kasan_enable_current();
767
768 /*
769 * Ensure that the PTE is only cleared after the instructions of memcpy
770 * were issued by using a compiler barrier.
771 */
772 barrier();
773
774 pte_clear(poking_mm, poking_addr, ptep);
775 if (cross_page_boundary)
776 pte_clear(poking_mm, poking_addr + PAGE_SIZE, ptep + 1);
777
778 /*
779 * Loading the previous page-table hierarchy requires a serializing
780 * instruction that already allows the core to see the updated version.
781 * Xen-PV is assumed to serialize execution in a similar manner.
782 */
783 unuse_temporary_mm(prev);
784
785 /*
786 * Flushing the TLB might involve IPIs, which would require enabled
787 * IRQs, but not if the mm is not used, as it is in this point.
788 */
789 flush_tlb_mm_range(poking_mm, poking_addr, poking_addr +
790 (cross_page_boundary ? 2 : 1) * PAGE_SIZE,
791 PAGE_SHIFT, false);
792
793 /*
794 * If the text does not match what we just wrote then something is
795 * fundamentally screwy; there's nothing we can really do about that.
796 */
797 BUG_ON(memcmp(addr, opcode, len));
798
799 pte_unmap_unlock(ptep, ptl);
Masami Hiramatsu7cf49422009-03-09 12:40:40 -0400800 local_irq_restore(flags);
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500801 return addr;
Andi Kleen19d36cc2007-07-22 11:12:31 +0200802}
Masami Hiramatsu3d55cc82010-02-25 08:34:38 -0500803
Nadav Amite8366732019-04-25 17:11:21 -0700804/**
805 * text_poke - Update instructions on a live kernel
806 * @addr: address to modify
807 * @opcode: source of the copy
808 * @len: length to copy
809 *
810 * Only atomic text poke/set should be allowed when not doing early patching.
811 * It means the size must be writable atomically and the address must be aligned
812 * in a way that permits an atomic write. It also makes sure we fit on a single
813 * page.
Nadav Amit39507462019-04-25 17:11:41 -0700814 *
815 * Note that the caller must ensure that if the modified code is part of a
816 * module, the module would not be removed during poking. This can be achieved
817 * by registering a module notifier, and ordering module removal and patching
818 * trough a mutex.
Nadav Amite8366732019-04-25 17:11:21 -0700819 */
820void *text_poke(void *addr, const void *opcode, size_t len)
821{
822 lockdep_assert_held(&text_mutex);
823
824 return __text_poke(addr, opcode, len);
825}
826
827/**
828 * text_poke_kgdb - Update instructions on a live kernel by kgdb
829 * @addr: address to modify
830 * @opcode: source of the copy
831 * @len: length to copy
832 *
833 * Only atomic text poke/set should be allowed when not doing early patching.
834 * It means the size must be writable atomically and the address must be aligned
835 * in a way that permits an atomic write. It also makes sure we fit on a single
836 * page.
837 *
838 * Context: should only be used by kgdb, which ensures no other core is running,
839 * despite the fact it does not hold the text_mutex.
840 */
841void *text_poke_kgdb(void *addr, const void *opcode, size_t len)
842{
843 return __text_poke(addr, opcode, len);
844}
845
Jiri Kosinafd4363f2013-07-12 11:21:48 +0200846static void do_sync_core(void *info)
847{
848 sync_core();
849}
850
851static bool bp_patching_in_progress;
852static void *bp_int3_handler, *bp_int3_addr;
853
Jiri Kosina17f41572013-07-23 10:09:28 +0200854int poke_int3_handler(struct pt_regs *regs)
Jiri Kosinafd4363f2013-07-12 11:21:48 +0200855{
Peter Zijlstra01651322017-07-31 12:21:54 +0200856 /*
857 * Having observed our INT3 instruction, we now must observe
858 * bp_patching_in_progress.
859 *
860 * in_progress = TRUE INT3
861 * WMB RMB
862 * write INT3 if (in_progress)
863 *
864 * Idem for bp_int3_handler.
865 */
Jiri Kosinafd4363f2013-07-12 11:21:48 +0200866 smp_rmb();
867
868 if (likely(!bp_patching_in_progress))
Jiri Kosina17f41572013-07-23 10:09:28 +0200869 return 0;
Jiri Kosinafd4363f2013-07-12 11:21:48 +0200870
Andy Lutomirskif39b6f02015-03-18 18:33:33 -0700871 if (user_mode(regs) || regs->ip != (unsigned long)bp_int3_addr)
Jiri Kosina17f41572013-07-23 10:09:28 +0200872 return 0;
Jiri Kosinafd4363f2013-07-12 11:21:48 +0200873
874 /* set up the specified breakpoint handler */
Jiri Kosina17f41572013-07-23 10:09:28 +0200875 regs->ip = (unsigned long) bp_int3_handler;
Jiri Kosinafd4363f2013-07-12 11:21:48 +0200876
Jiri Kosina17f41572013-07-23 10:09:28 +0200877 return 1;
Jiri Kosinafd4363f2013-07-12 11:21:48 +0200878}
Masami Hiramatsuc13324a2019-02-13 01:12:15 +0900879NOKPROBE_SYMBOL(poke_int3_handler);
Jiri Kosina17f41572013-07-23 10:09:28 +0200880
Jiri Kosinafd4363f2013-07-12 11:21:48 +0200881/**
882 * text_poke_bp() -- update instructions on live kernel on SMP
883 * @addr: address to patch
884 * @opcode: opcode of new instruction
885 * @len: length to copy
886 * @handler: address to jump to when the temporary breakpoint is hit
887 *
888 * Modify multi-byte instruction by using int3 breakpoint on SMP.
Masami Hiramatsuea8596b2013-07-18 20:47:53 +0900889 * We completely avoid stop_machine() here, and achieve the
890 * synchronization using int3 breakpoint.
Jiri Kosinafd4363f2013-07-12 11:21:48 +0200891 *
892 * The way it is done:
893 * - add a int3 trap to the address that will be patched
894 * - sync cores
895 * - update all but the first byte of the patched range
896 * - sync cores
897 * - replace the first byte (int3) by the first byte of
898 * replacing opcode
899 * - sync cores
Jiri Kosinafd4363f2013-07-12 11:21:48 +0200900 */
Nadav Amit0a203df2019-04-25 17:11:33 -0700901void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
Jiri Kosinafd4363f2013-07-12 11:21:48 +0200902{
903 unsigned char int3 = 0xcc;
904
905 bp_int3_handler = handler;
906 bp_int3_addr = (u8 *)addr + sizeof(int3);
907 bp_patching_in_progress = true;
Jiri Kosina9222f602018-08-28 08:55:14 +0200908
909 lockdep_assert_held(&text_mutex);
910
Jiri Kosinafd4363f2013-07-12 11:21:48 +0200911 /*
Peter Zijlstra01651322017-07-31 12:21:54 +0200912 * Corresponding read barrier in int3 notifier for making sure the
913 * in_progress and handler are correctly ordered wrt. patching.
Jiri Kosinafd4363f2013-07-12 11:21:48 +0200914 */
915 smp_wmb();
916
917 text_poke(addr, &int3, sizeof(int3));
918
919 on_each_cpu(do_sync_core, NULL, 1);
920
921 if (len - sizeof(int3) > 0) {
922 /* patch all but the first byte */
923 text_poke((char *)addr + sizeof(int3),
924 (const char *) opcode + sizeof(int3),
925 len - sizeof(int3));
926 /*
927 * According to Intel, this core syncing is very likely
928 * not necessary and we'd be safe even without it. But
929 * better safe than sorry (plus there's not only Intel).
930 */
931 on_each_cpu(do_sync_core, NULL, 1);
932 }
933
934 /* patch the first byte */
935 text_poke(addr, opcode, sizeof(int3));
936
937 on_each_cpu(do_sync_core, NULL, 1);
Peter Zijlstra01651322017-07-31 12:21:54 +0200938 /*
939 * sync_core() implies an smp_mb() and orders this store against
940 * the writing of the new instruction.
941 */
Jiri Kosinafd4363f2013-07-12 11:21:48 +0200942 bp_patching_in_progress = false;
Jiri Kosinafd4363f2013-07-12 11:21:48 +0200943}
944