blob: 0ab49366a7a677b5fdd13e81ba48c829c42cff66 [file] [log] [blame]
Joe Perchesc767a542012-05-21 19:50:07 -07001#define pr_fmt(fmt) "SMP alternatives: " fmt
2
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -08003#include <linux/module.h>
Al Virof6a57032006-10-18 01:47:25 -04004#include <linux/sched.h>
Pekka Paalanen2f1dafe2008-05-12 21:21:01 +02005#include <linux/mutex.h>
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -08006#include <linux/list.h>
Jan Beulich8b5a10f2009-08-19 08:40:48 +01007#include <linux/stringify.h>
Andi Kleen19d36cc2007-07-22 11:12:31 +02008#include <linux/kprobes.h>
9#include <linux/mm.h>
10#include <linux/vmalloc.h>
Masami Hiramatsu3945dab2009-03-06 10:37:22 -050011#include <linux/memory.h>
Masami Hiramatsu3d55cc82010-02-25 08:34:38 -050012#include <linux/stop_machine.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090013#include <linux/slab.h>
Jiri Kosinafd4363f2013-07-12 11:21:48 +020014#include <linux/kdebug.h>
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -080015#include <asm/alternative.h>
16#include <asm/sections.h>
Andi Kleen19d36cc2007-07-22 11:12:31 +020017#include <asm/pgtable.h>
Andi Kleen8f4e9562007-07-22 11:12:32 +020018#include <asm/mce.h>
19#include <asm/nmi.h>
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -050020#include <asm/cacheflush.h>
Masami Hiramatsu78ff7fa2009-03-06 10:37:54 -050021#include <asm/tlbflush.h>
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -050022#include <asm/io.h>
Masami Hiramatsu78ff7fa2009-03-06 10:37:54 -050023#include <asm/fixmap.h>
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -080024
Andi Kleenab144f52007-08-10 22:31:03 +020025#define MAX_PATCH_LEN (255-1)
26
Jan Beulich8b5a10f2009-08-19 08:40:48 +010027static int __initdata_or_module debug_alternative;
Jeremy Fitzhardingeb7fb4af2007-05-02 19:27:13 +020028
Gerd Hoffmannd167a512006-06-26 13:56:16 +020029static int __init debug_alt(char *str)
30{
31 debug_alternative = 1;
32 return 1;
33}
Gerd Hoffmannd167a512006-06-26 13:56:16 +020034__setup("debug-alternative", debug_alt);
35
Jan Beulich09488162007-07-21 17:10:25 +020036static int noreplace_smp;
37
Jeremy Fitzhardingeb7fb4af2007-05-02 19:27:13 +020038static int __init setup_noreplace_smp(char *str)
39{
40 noreplace_smp = 1;
41 return 1;
42}
43__setup("noreplace-smp", setup_noreplace_smp);
44
Jeremy Fitzhardinge959b4fd2007-05-02 19:27:16 +020045#ifdef CONFIG_PARAVIRT
Jan Beulich8b5a10f2009-08-19 08:40:48 +010046static int __initdata_or_module noreplace_paravirt = 0;
Jeremy Fitzhardinge959b4fd2007-05-02 19:27:16 +020047
48static int __init setup_noreplace_paravirt(char *str)
49{
50 noreplace_paravirt = 1;
51 return 1;
52}
53__setup("noreplace-paravirt", setup_noreplace_paravirt);
54#endif
Jeremy Fitzhardingeb7fb4af2007-05-02 19:27:13 +020055
Joe Perchesc767a542012-05-21 19:50:07 -070056#define DPRINTK(fmt, ...) \
57do { \
58 if (debug_alternative) \
59 printk(KERN_DEBUG fmt, ##__VA_ARGS__); \
60} while (0)
Gerd Hoffmannd167a512006-06-26 13:56:16 +020061
H. Peter Anvindc326fc2011-04-18 15:19:51 -070062/*
63 * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes
64 * that correspond to that nop. Getting from one nop to the next, we
65 * add to the array the offset that is equal to the sum of all sizes of
66 * nops preceding the one we are after.
67 *
68 * Note: The GENERIC_NOP5_ATOMIC is at the end, as it breaks the
69 * nice symmetry of sizes of the previous nops.
70 */
Jan Beulich8b5a10f2009-08-19 08:40:48 +010071#if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64)
H. Peter Anvindc326fc2011-04-18 15:19:51 -070072static const unsigned char intelnops[] =
73{
74 GENERIC_NOP1,
75 GENERIC_NOP2,
76 GENERIC_NOP3,
77 GENERIC_NOP4,
78 GENERIC_NOP5,
79 GENERIC_NOP6,
80 GENERIC_NOP7,
81 GENERIC_NOP8,
82 GENERIC_NOP5_ATOMIC
83};
84static const unsigned char * const intel_nops[ASM_NOP_MAX+2] =
85{
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -080086 NULL,
87 intelnops,
88 intelnops + 1,
89 intelnops + 1 + 2,
90 intelnops + 1 + 2 + 3,
91 intelnops + 1 + 2 + 3 + 4,
92 intelnops + 1 + 2 + 3 + 4 + 5,
93 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
94 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
H. Peter Anvindc326fc2011-04-18 15:19:51 -070095 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -080096};
Gerd Hoffmannd167a512006-06-26 13:56:16 +020097#endif
98
99#ifdef K8_NOP1
H. Peter Anvindc326fc2011-04-18 15:19:51 -0700100static const unsigned char k8nops[] =
101{
102 K8_NOP1,
103 K8_NOP2,
104 K8_NOP3,
105 K8_NOP4,
106 K8_NOP5,
107 K8_NOP6,
108 K8_NOP7,
109 K8_NOP8,
110 K8_NOP5_ATOMIC
111};
112static const unsigned char * const k8_nops[ASM_NOP_MAX+2] =
113{
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800114 NULL,
115 k8nops,
116 k8nops + 1,
117 k8nops + 1 + 2,
118 k8nops + 1 + 2 + 3,
119 k8nops + 1 + 2 + 3 + 4,
120 k8nops + 1 + 2 + 3 + 4 + 5,
121 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
122 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
H. Peter Anvindc326fc2011-04-18 15:19:51 -0700123 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800124};
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200125#endif
126
Jan Beulich8b5a10f2009-08-19 08:40:48 +0100127#if defined(K7_NOP1) && !defined(CONFIG_X86_64)
H. Peter Anvindc326fc2011-04-18 15:19:51 -0700128static const unsigned char k7nops[] =
129{
130 K7_NOP1,
131 K7_NOP2,
132 K7_NOP3,
133 K7_NOP4,
134 K7_NOP5,
135 K7_NOP6,
136 K7_NOP7,
137 K7_NOP8,
138 K7_NOP5_ATOMIC
139};
140static const unsigned char * const k7_nops[ASM_NOP_MAX+2] =
141{
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800142 NULL,
143 k7nops,
144 k7nops + 1,
145 k7nops + 1 + 2,
146 k7nops + 1 + 2 + 3,
147 k7nops + 1 + 2 + 3 + 4,
148 k7nops + 1 + 2 + 3 + 4 + 5,
149 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
150 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
H. Peter Anvindc326fc2011-04-18 15:19:51 -0700151 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800152};
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200153#endif
154
Jan Beulich32c464f2007-10-17 18:04:41 +0200155#ifdef P6_NOP1
Avi Kivitycb09cad2012-08-22 13:03:48 +0300156static const unsigned char p6nops[] =
H. Peter Anvindc326fc2011-04-18 15:19:51 -0700157{
158 P6_NOP1,
159 P6_NOP2,
160 P6_NOP3,
161 P6_NOP4,
162 P6_NOP5,
163 P6_NOP6,
164 P6_NOP7,
165 P6_NOP8,
166 P6_NOP5_ATOMIC
167};
168static const unsigned char * const p6_nops[ASM_NOP_MAX+2] =
169{
Jan Beulich32c464f2007-10-17 18:04:41 +0200170 NULL,
171 p6nops,
172 p6nops + 1,
173 p6nops + 1 + 2,
174 p6nops + 1 + 2 + 3,
175 p6nops + 1 + 2 + 3 + 4,
176 p6nops + 1 + 2 + 3 + 4 + 5,
177 p6nops + 1 + 2 + 3 + 4 + 5 + 6,
178 p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
H. Peter Anvindc326fc2011-04-18 15:19:51 -0700179 p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
Jan Beulich32c464f2007-10-17 18:04:41 +0200180};
181#endif
182
H. Peter Anvindc326fc2011-04-18 15:19:51 -0700183/* Initialize these to a safe default */
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200184#ifdef CONFIG_X86_64
H. Peter Anvindc326fc2011-04-18 15:19:51 -0700185const unsigned char * const *ideal_nops = p6_nops;
186#else
187const unsigned char * const *ideal_nops = intel_nops;
188#endif
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200189
H. Peter Anvindc326fc2011-04-18 15:19:51 -0700190void __init arch_init_ideal_nops(void)
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200191{
H. Peter Anvindc326fc2011-04-18 15:19:51 -0700192 switch (boot_cpu_data.x86_vendor) {
193 case X86_VENDOR_INTEL:
H. Peter Anvind8d97662011-04-18 15:31:57 -0700194 /*
195 * Due to a decoder implementation quirk, some
196 * specific Intel CPUs actually perform better with
197 * the "k8_nops" than with the SDM-recommended NOPs.
198 */
199 if (boot_cpu_data.x86 == 6 &&
200 boot_cpu_data.x86_model >= 0x0f &&
201 boot_cpu_data.x86_model != 0x1c &&
202 boot_cpu_data.x86_model != 0x26 &&
203 boot_cpu_data.x86_model != 0x27 &&
204 boot_cpu_data.x86_model < 0x30) {
205 ideal_nops = k8_nops;
206 } else if (boot_cpu_has(X86_FEATURE_NOPL)) {
H. Peter Anvindc326fc2011-04-18 15:19:51 -0700207 ideal_nops = p6_nops;
208 } else {
209#ifdef CONFIG_X86_64
210 ideal_nops = k8_nops;
211#else
212 ideal_nops = intel_nops;
213#endif
214 }
Alan Coxd6250a32012-07-25 16:28:19 +0100215 break;
H. Peter Anvindc326fc2011-04-18 15:19:51 -0700216 default:
217#ifdef CONFIG_X86_64
218 ideal_nops = k8_nops;
219#else
220 if (boot_cpu_has(X86_FEATURE_K8))
221 ideal_nops = k8_nops;
222 else if (boot_cpu_has(X86_FEATURE_K7))
223 ideal_nops = k7_nops;
224 else
225 ideal_nops = intel_nops;
226#endif
227 }
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200228}
229
Andi Kleenab144f52007-08-10 22:31:03 +0200230/* Use this to add nops to a buffer, then text_poke the whole buffer. */
Jan Beulich8b5a10f2009-08-19 08:40:48 +0100231static void __init_or_module add_nops(void *insns, unsigned int len)
Rusty Russell139ec7c2006-12-07 02:14:08 +0100232{
Rusty Russell139ec7c2006-12-07 02:14:08 +0100233 while (len > 0) {
234 unsigned int noplen = len;
235 if (noplen > ASM_NOP_MAX)
236 noplen = ASM_NOP_MAX;
H. Peter Anvindc326fc2011-04-18 15:19:51 -0700237 memcpy(insns, ideal_nops[noplen], noplen);
Rusty Russell139ec7c2006-12-07 02:14:08 +0100238 insns += noplen;
239 len -= noplen;
240 }
241}
242
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200243extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
Jan Beulich5967ed82010-04-21 16:08:14 +0100244extern s32 __smp_locks[], __smp_locks_end[];
Jason Baronfa6f2cc2010-09-17 11:08:56 -0400245void *text_poke_early(void *addr, const void *opcode, size_t len);
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200246
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800247/* Replace instructions with better alternatives for this CPU type.
248 This runs before SMP is initialized to avoid SMP problems with
Lucas De Marchi0d2eb442011-03-17 16:24:16 -0300249 self modifying code. This implies that asymmetric systems where
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800250 APs have less capabilities than the boot processor are not handled.
251 Tough. Make sure you disable such features by hand. */
252
Jan Beulich8b5a10f2009-08-19 08:40:48 +0100253void __init_or_module apply_alternatives(struct alt_instr *start,
254 struct alt_instr *end)
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800255{
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800256 struct alt_instr *a;
Andy Lutomirski59e97e42011-07-13 09:24:10 -0400257 u8 *instr, *replacement;
Jan Beulich1b1d9252009-12-18 16:12:56 +0000258 u8 insnbuf[MAX_PATCH_LEN];
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800259
Harvey Harrison77bf90e2008-03-03 11:37:23 -0800260 DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
Fenghua Yu50973132011-05-17 15:29:12 -0700261 /*
262 * The scan order should be from start to end. A later scanned
263 * alternative code can overwrite a previous scanned alternative code.
264 * Some kernel functions (e.g. memcpy, memset, etc) use this order to
265 * patch code.
266 *
267 * So be careful if you want to change the scan order to any other
268 * order.
269 */
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800270 for (a = start; a < end; a++) {
Andy Lutomirski59e97e42011-07-13 09:24:10 -0400271 instr = (u8 *)&a->instr_offset + a->instr_offset;
272 replacement = (u8 *)&a->repl_offset + a->repl_offset;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800273 BUG_ON(a->replacementlen > a->instrlen);
Andi Kleenab144f52007-08-10 22:31:03 +0200274 BUG_ON(a->instrlen > sizeof(insnbuf));
Borislav Petkov65fc9852013-03-20 15:07:23 +0100275 BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800276 if (!boot_cpu_has(a->cpuid))
277 continue;
Andy Lutomirski59e97e42011-07-13 09:24:10 -0400278
279 memcpy(insnbuf, replacement, a->replacementlen);
280
281 /* 0xe8 is a relative jump; fix the offset. */
282 if (*insnbuf == 0xe8 && a->replacementlen == 5)
283 *(s32 *)(insnbuf + 1) += replacement - instr;
284
285 add_nops(insnbuf + a->replacementlen,
286 a->instrlen - a->replacementlen);
287
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500288 text_poke_early(instr, insnbuf, a->instrlen);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800289 }
290}
291
Gerd Hoffmann8ec4d412006-07-01 04:36:18 -0700292#ifdef CONFIG_SMP
293
Jan Beulich5967ed82010-04-21 16:08:14 +0100294static void alternatives_smp_lock(const s32 *start, const s32 *end,
295 u8 *text, u8 *text_end)
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800296{
Jan Beulich5967ed82010-04-21 16:08:14 +0100297 const s32 *poff;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800298
Masami Hiramatsu3945dab2009-03-06 10:37:22 -0500299 mutex_lock(&text_mutex);
Jan Beulich5967ed82010-04-21 16:08:14 +0100300 for (poff = start; poff < end; poff++) {
301 u8 *ptr = (u8 *)poff + *poff;
302
303 if (!*poff || ptr < text || ptr >= text_end)
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800304 continue;
Mathieu Desnoyersf88f07e2008-08-14 16:58:15 -0400305 /* turn DS segment override prefix into lock prefix */
H. Peter Anvind9c58412010-04-29 16:53:17 -0700306 if (*ptr == 0x3e)
307 text_poke(ptr, ((unsigned char []){0xf0}), 1);
Peter Senna Tschudin4b8073e2012-09-18 18:36:14 +0200308 }
Masami Hiramatsu3945dab2009-03-06 10:37:22 -0500309 mutex_unlock(&text_mutex);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800310}
311
Jan Beulich5967ed82010-04-21 16:08:14 +0100312static void alternatives_smp_unlock(const s32 *start, const s32 *end,
313 u8 *text, u8 *text_end)
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800314{
Jan Beulich5967ed82010-04-21 16:08:14 +0100315 const s32 *poff;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800316
Masami Hiramatsu3945dab2009-03-06 10:37:22 -0500317 mutex_lock(&text_mutex);
Jan Beulich5967ed82010-04-21 16:08:14 +0100318 for (poff = start; poff < end; poff++) {
319 u8 *ptr = (u8 *)poff + *poff;
320
321 if (!*poff || ptr < text || ptr >= text_end)
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800322 continue;
Mathieu Desnoyersf88f07e2008-08-14 16:58:15 -0400323 /* turn lock prefix into DS segment override prefix */
H. Peter Anvind9c58412010-04-29 16:53:17 -0700324 if (*ptr == 0xf0)
325 text_poke(ptr, ((unsigned char []){0x3E}), 1);
Peter Senna Tschudin4b8073e2012-09-18 18:36:14 +0200326 }
Masami Hiramatsu3945dab2009-03-06 10:37:22 -0500327 mutex_unlock(&text_mutex);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800328}
329
330struct smp_alt_module {
331 /* what is this ??? */
332 struct module *mod;
333 char *name;
334
335 /* ptrs to lock prefixes */
Jan Beulich5967ed82010-04-21 16:08:14 +0100336 const s32 *locks;
337 const s32 *locks_end;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800338
339 /* .text segment, needed to avoid patching init code ;) */
340 u8 *text;
341 u8 *text_end;
342
343 struct list_head next;
344};
345static LIST_HEAD(smp_alt_modules);
Pekka Paalanen2f1dafe2008-05-12 21:21:01 +0200346static DEFINE_MUTEX(smp_alt);
Rusty Russell816afe42012-08-06 17:29:49 +0930347static bool uniproc_patched = false; /* protected by smp_alt */
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800348
Jan Beulich8b5a10f2009-08-19 08:40:48 +0100349void __init_or_module alternatives_smp_module_add(struct module *mod,
350 char *name,
351 void *locks, void *locks_end,
352 void *text, void *text_end)
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800353{
354 struct smp_alt_module *smp;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800355
Rusty Russell816afe42012-08-06 17:29:49 +0930356 mutex_lock(&smp_alt);
357 if (!uniproc_patched)
358 goto unlock;
Jeremy Fitzhardingeb7fb4af2007-05-02 19:27:13 +0200359
Rusty Russell816afe42012-08-06 17:29:49 +0930360 if (num_possible_cpus() == 1)
361 /* Don't bother remembering, we'll never have to undo it. */
362 goto smp_unlock;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800363
364 smp = kzalloc(sizeof(*smp), GFP_KERNEL);
365 if (NULL == smp)
Rusty Russell816afe42012-08-06 17:29:49 +0930366 /* we'll run the (safe but slow) SMP code then ... */
367 goto unlock;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800368
369 smp->mod = mod;
370 smp->name = name;
371 smp->locks = locks;
372 smp->locks_end = locks_end;
373 smp->text = text;
374 smp->text_end = text_end;
375 DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
Harvey Harrison77bf90e2008-03-03 11:37:23 -0800376 __func__, smp->locks, smp->locks_end,
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800377 smp->text, smp->text_end, smp->name);
378
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800379 list_add_tail(&smp->next, &smp_alt_modules);
Rusty Russell816afe42012-08-06 17:29:49 +0930380smp_unlock:
381 alternatives_smp_unlock(locks, locks_end, text, text_end);
382unlock:
Pekka Paalanen2f1dafe2008-05-12 21:21:01 +0200383 mutex_unlock(&smp_alt);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800384}
385
Jan Beulich8b5a10f2009-08-19 08:40:48 +0100386void __init_or_module alternatives_smp_module_del(struct module *mod)
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800387{
388 struct smp_alt_module *item;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800389
Pekka Paalanen2f1dafe2008-05-12 21:21:01 +0200390 mutex_lock(&smp_alt);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800391 list_for_each_entry(item, &smp_alt_modules, next) {
392 if (mod != item->mod)
393 continue;
394 list_del(&item->next);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800395 kfree(item);
Rusty Russell816afe42012-08-06 17:29:49 +0930396 break;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800397 }
Pekka Paalanen2f1dafe2008-05-12 21:21:01 +0200398 mutex_unlock(&smp_alt);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800399}
400
Rusty Russell816afe42012-08-06 17:29:49 +0930401void alternatives_enable_smp(void)
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800402{
403 struct smp_alt_module *mod;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800404
Ingo Molnar3047e992006-07-03 00:24:57 -0700405#ifdef CONFIG_LOCKDEP
406 /*
Ingo Molnar17abecf2008-01-30 13:33:24 +0100407 * Older binutils section handling bug prevented
408 * alternatives-replacement from working reliably.
409 *
410 * If this still occurs then you should see a hang
411 * or crash shortly after this line:
Ingo Molnar3047e992006-07-03 00:24:57 -0700412 */
Joe Perchesc767a542012-05-21 19:50:07 -0700413 pr_info("lockdep: fixing up alternatives\n");
Ingo Molnar3047e992006-07-03 00:24:57 -0700414#endif
415
Rusty Russell816afe42012-08-06 17:29:49 +0930416 /* Why bother if there are no other CPUs? */
417 BUG_ON(num_possible_cpus() == 1);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800418
Pekka Paalanen2f1dafe2008-05-12 21:21:01 +0200419 mutex_lock(&smp_alt);
Andi Kleenca74a6f2008-01-30 13:33:17 +0100420
Rusty Russell816afe42012-08-06 17:29:49 +0930421 if (uniproc_patched) {
Joe Perchesc767a542012-05-21 19:50:07 -0700422 pr_info("switching to SMP code\n");
Rusty Russell816afe42012-08-06 17:29:49 +0930423 BUG_ON(num_online_cpus() != 1);
Jeremy Fitzhardinge53756d32008-01-30 13:30:55 +0100424 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
425 clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800426 list_for_each_entry(mod, &smp_alt_modules, next)
427 alternatives_smp_lock(mod->locks, mod->locks_end,
428 mod->text, mod->text_end);
Rusty Russell816afe42012-08-06 17:29:49 +0930429 uniproc_patched = false;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800430 }
Pekka Paalanen2f1dafe2008-05-12 21:21:01 +0200431 mutex_unlock(&smp_alt);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800432}
433
Masami Hiramatsu2cfa1972010-02-02 16:49:11 -0500434/* Return 1 if the address range is reserved for smp-alternatives */
435int alternatives_text_reserved(void *start, void *end)
436{
437 struct smp_alt_module *mod;
Jan Beulich5967ed82010-04-21 16:08:14 +0100438 const s32 *poff;
Masami Hiramatsu076dc4a2010-02-05 12:16:47 -0500439 u8 *text_start = start;
440 u8 *text_end = end;
Masami Hiramatsu2cfa1972010-02-02 16:49:11 -0500441
442 list_for_each_entry(mod, &smp_alt_modules, next) {
Masami Hiramatsu076dc4a2010-02-05 12:16:47 -0500443 if (mod->text > text_end || mod->text_end < text_start)
Masami Hiramatsu2cfa1972010-02-02 16:49:11 -0500444 continue;
Jan Beulich5967ed82010-04-21 16:08:14 +0100445 for (poff = mod->locks; poff < mod->locks_end; poff++) {
446 const u8 *ptr = (const u8 *)poff + *poff;
447
448 if (text_start <= ptr && text_end > ptr)
Masami Hiramatsu2cfa1972010-02-02 16:49:11 -0500449 return 1;
Jan Beulich5967ed82010-04-21 16:08:14 +0100450 }
Masami Hiramatsu2cfa1972010-02-02 16:49:11 -0500451 }
452
453 return 0;
454}
Gerd Hoffmann8ec4d412006-07-01 04:36:18 -0700455#endif
456
Rusty Russell139ec7c2006-12-07 02:14:08 +0100457#ifdef CONFIG_PARAVIRT
Jan Beulich8b5a10f2009-08-19 08:40:48 +0100458void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
459 struct paravirt_patch_site *end)
Rusty Russell139ec7c2006-12-07 02:14:08 +0100460{
Jeremy Fitzhardinge98de032b2007-05-02 19:27:14 +0200461 struct paravirt_patch_site *p;
Andi Kleenab144f52007-08-10 22:31:03 +0200462 char insnbuf[MAX_PATCH_LEN];
Rusty Russell139ec7c2006-12-07 02:14:08 +0100463
Jeremy Fitzhardinge959b4fd2007-05-02 19:27:16 +0200464 if (noreplace_paravirt)
465 return;
466
Rusty Russell139ec7c2006-12-07 02:14:08 +0100467 for (p = start; p < end; p++) {
468 unsigned int used;
469
Andi Kleenab144f52007-08-10 22:31:03 +0200470 BUG_ON(p->len > MAX_PATCH_LEN);
Chris Wrightd34fda42007-08-18 14:31:41 -0700471 /* prep the buffer with the original instructions */
472 memcpy(insnbuf, p->instr, p->len);
Jeremy Fitzhardinge93b1eab2007-10-16 11:51:29 -0700473 used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
474 (unsigned long)p->instr, p->len);
Jeremy Fitzhardinge7f63c412007-05-02 19:27:13 +0200475
Jeremy Fitzhardinge63f70272007-05-02 19:27:14 +0200476 BUG_ON(used > p->len);
477
Rusty Russell139ec7c2006-12-07 02:14:08 +0100478 /* Pad the rest with nops */
Andi Kleenab144f52007-08-10 22:31:03 +0200479 add_nops(insnbuf + used, p->len - used);
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500480 text_poke_early(p->instr, insnbuf, p->len);
Rusty Russell139ec7c2006-12-07 02:14:08 +0100481 }
Rusty Russell139ec7c2006-12-07 02:14:08 +0100482}
Jeremy Fitzhardinge98de032b2007-05-02 19:27:14 +0200483extern struct paravirt_patch_site __start_parainstructions[],
Rusty Russell139ec7c2006-12-07 02:14:08 +0100484 __stop_parainstructions[];
485#endif /* CONFIG_PARAVIRT */
486
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800487void __init alternative_instructions(void)
488{
Andi Kleen8f4e9562007-07-22 11:12:32 +0200489 /* The patching is not fully atomic, so try to avoid local interruptions
490 that might execute the to be patched code.
491 Other CPUs are not running. */
492 stop_nmi();
Andi Kleen123aa762009-02-12 13:39:27 +0100493
494 /*
495 * Don't stop machine check exceptions while patching.
496 * MCEs only happen when something got corrupted and in this
497 * case we must do something about the corruption.
498 * Ignoring it is worse than a unlikely patching race.
499 * Also machine checks tend to be broadcast and if one CPU
500 * goes into machine check the others follow quickly, so we don't
501 * expect a machine check to cause undue problems during to code
502 * patching.
503 */
Andi Kleen8f4e9562007-07-22 11:12:32 +0200504
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800505 apply_alternatives(__alt_instructions, __alt_instructions_end);
506
Gerd Hoffmann8ec4d412006-07-01 04:36:18 -0700507#ifdef CONFIG_SMP
Rusty Russell816afe42012-08-06 17:29:49 +0930508 /* Patch to UP if other cpus not imminent. */
509 if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) {
510 uniproc_patched = true;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800511 alternatives_smp_module_add(NULL, "core kernel",
512 __smp_locks, __smp_locks_end,
513 _text, _etext);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800514 }
Andi Kleen8f4e9562007-07-22 11:12:32 +0200515
Rusty Russell816afe42012-08-06 17:29:49 +0930516 if (!uniproc_patched || num_possible_cpus() == 1)
Fengguang Wuf68fd5f2007-10-17 18:04:34 +0200517 free_init_pages("SMP alternatives",
518 (unsigned long)__smp_locks,
519 (unsigned long)__smp_locks_end);
Rusty Russell816afe42012-08-06 17:29:49 +0930520#endif
521
522 apply_paravirt(__parainstructions, __parainstructions_end);
Fengguang Wuf68fd5f2007-10-17 18:04:34 +0200523
Andi Kleen8f4e9562007-07-22 11:12:32 +0200524 restart_nmi();
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800525}
Andi Kleen19d36cc2007-07-22 11:12:31 +0200526
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500527/**
528 * text_poke_early - Update instructions on a live kernel at boot time
529 * @addr: address to modify
530 * @opcode: source of the copy
531 * @len: length to copy
532 *
Andi Kleen19d36cc2007-07-22 11:12:31 +0200533 * When you use this code to patch more than one byte of an instruction
534 * you need to make sure that other CPUs cannot execute this code in parallel.
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500535 * Also no thread must be currently preempted in the middle of these
536 * instructions. And on the local CPU you need to be protected again NMI or MCE
537 * handlers seeing an inconsistent instruction while you patch.
Andi Kleen19d36cc2007-07-22 11:12:31 +0200538 */
Jason Baronfa6f2cc2010-09-17 11:08:56 -0400539void *__init_or_module text_poke_early(void *addr, const void *opcode,
Jan Beulich8b5a10f2009-08-19 08:40:48 +0100540 size_t len)
Andi Kleen19d36cc2007-07-22 11:12:31 +0200541{
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500542 unsigned long flags;
543 local_irq_save(flags);
Andi Kleen19d36cc2007-07-22 11:12:31 +0200544 memcpy(addr, opcode, len);
545 sync_core();
Ben Hutchings5367b682009-09-10 02:53:50 +0100546 local_irq_restore(flags);
Andi Kleena534b672007-09-06 16:59:52 +0200547 /* Could also do a CLFLUSH here to speed up CPU recovery; but
548 that causes hangs on some VIA CPUs. */
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500549 return addr;
550}
551
552/**
553 * text_poke - Update instructions on a live kernel
554 * @addr: address to modify
555 * @opcode: source of the copy
556 * @len: length to copy
557 *
558 * Only atomic text poke/set should be allowed when not doing early patching.
559 * It means the size must be writable atomically and the address must be aligned
560 * in a way that permits an atomic write. It also makes sure we fit on a single
561 * page.
Masami Hiramatsu78ff7fa2009-03-06 10:37:54 -0500562 *
563 * Note: Must be called under text_mutex.
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500564 */
565void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
566{
Masami Hiramatsu78ff7fa2009-03-06 10:37:54 -0500567 unsigned long flags;
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500568 char *vaddr;
Mathieu Desnoyersb7b66ba2008-04-24 11:03:33 -0400569 struct page *pages[2];
570 int i;
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500571
Mathieu Desnoyersb7b66ba2008-04-24 11:03:33 -0400572 if (!core_kernel_text((unsigned long)addr)) {
573 pages[0] = vmalloc_to_page(addr);
574 pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
Mathieu Desnoyers15a601e2008-03-12 11:54:16 -0400575 } else {
Mathieu Desnoyersb7b66ba2008-04-24 11:03:33 -0400576 pages[0] = virt_to_page(addr);
Ingo Molnar00c6b2d2008-04-25 17:07:03 +0200577 WARN_ON(!PageReserved(pages[0]));
Mathieu Desnoyersb7b66ba2008-04-24 11:03:33 -0400578 pages[1] = virt_to_page(addr + PAGE_SIZE);
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500579 }
Mathieu Desnoyersb7b66ba2008-04-24 11:03:33 -0400580 BUG_ON(!pages[0]);
Masami Hiramatsu7cf49422009-03-09 12:40:40 -0400581 local_irq_save(flags);
Masami Hiramatsu78ff7fa2009-03-06 10:37:54 -0500582 set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0]));
583 if (pages[1])
584 set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1]));
585 vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0);
Mathieu Desnoyersb7b66ba2008-04-24 11:03:33 -0400586 memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
Masami Hiramatsu78ff7fa2009-03-06 10:37:54 -0500587 clear_fixmap(FIX_TEXT_POKE0);
588 if (pages[1])
589 clear_fixmap(FIX_TEXT_POKE1);
590 local_flush_tlb();
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500591 sync_core();
592 /* Could also do a CLFLUSH here to speed up CPU recovery; but
593 that causes hangs on some VIA CPUs. */
Mathieu Desnoyersb7b66ba2008-04-24 11:03:33 -0400594 for (i = 0; i < len; i++)
595 BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
Masami Hiramatsu7cf49422009-03-09 12:40:40 -0400596 local_irq_restore(flags);
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500597 return addr;
Andi Kleen19d36cc2007-07-22 11:12:31 +0200598}
Masami Hiramatsu3d55cc82010-02-25 08:34:38 -0500599
Jiri Kosinafd4363f2013-07-12 11:21:48 +0200600static void do_sync_core(void *info)
601{
602 sync_core();
603}
604
605static bool bp_patching_in_progress;
606static void *bp_int3_handler, *bp_int3_addr;
607
608static int int3_notify(struct notifier_block *self, unsigned long val, void *data)
609{
610 struct die_args *args = data;
611
612 /* bp_patching_in_progress */
613 smp_rmb();
614
615 if (likely(!bp_patching_in_progress))
616 return NOTIFY_DONE;
617
618 /* we are not interested in non-int3 faults and ring > 0 faults */
619 if (val != DIE_INT3 || !args->regs || user_mode_vm(args->regs)
620 || args->regs->ip != (unsigned long)bp_int3_addr)
621 return NOTIFY_DONE;
622
623 /* set up the specified breakpoint handler */
624 args->regs->ip = (unsigned long) bp_int3_handler;
625
626 return NOTIFY_STOP;
627}
628/**
629 * text_poke_bp() -- update instructions on live kernel on SMP
630 * @addr: address to patch
631 * @opcode: opcode of new instruction
632 * @len: length to copy
633 * @handler: address to jump to when the temporary breakpoint is hit
634 *
635 * Modify multi-byte instruction by using int3 breakpoint on SMP.
636 * In contrary to text_poke_smp(), we completely avoid stop_machine() here,
637 * and achieve the synchronization using int3 breakpoint.
638 *
639 * The way it is done:
640 * - add a int3 trap to the address that will be patched
641 * - sync cores
642 * - update all but the first byte of the patched range
643 * - sync cores
644 * - replace the first byte (int3) by the first byte of
645 * replacing opcode
646 * - sync cores
647 *
648 * Note: must be called under text_mutex.
649 */
650void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
651{
652 unsigned char int3 = 0xcc;
653
654 bp_int3_handler = handler;
655 bp_int3_addr = (u8 *)addr + sizeof(int3);
656 bp_patching_in_progress = true;
657 /*
658 * Corresponding read barrier in int3 notifier for
659 * making sure the in_progress flags is correctly ordered wrt.
660 * patching
661 */
662 smp_wmb();
663
664 text_poke(addr, &int3, sizeof(int3));
665
666 on_each_cpu(do_sync_core, NULL, 1);
667
668 if (len - sizeof(int3) > 0) {
669 /* patch all but the first byte */
670 text_poke((char *)addr + sizeof(int3),
671 (const char *) opcode + sizeof(int3),
672 len - sizeof(int3));
673 /*
674 * According to Intel, this core syncing is very likely
675 * not necessary and we'd be safe even without it. But
676 * better safe than sorry (plus there's not only Intel).
677 */
678 on_each_cpu(do_sync_core, NULL, 1);
679 }
680
681 /* patch the first byte */
682 text_poke(addr, opcode, sizeof(int3));
683
684 on_each_cpu(do_sync_core, NULL, 1);
685
686 bp_patching_in_progress = false;
687 smp_wmb();
688
689 return addr;
690}
691
692/* this one needs to run before anything else handles it as a
693 * regular exception */
694static struct notifier_block int3_nb = {
695 .priority = 0x7fffffff,
696 .notifier_call = int3_notify
697};
698
699static int __init int3_init(void)
700{
701 return register_die_notifier(&int3_nb);
702}
703
704arch_initcall(int3_init);
Masami Hiramatsu3d55cc82010-02-25 08:34:38 -0500705/*
706 * Cross-modifying kernel text with stop_machine().
707 * This code originally comes from immediate value.
708 */
709static atomic_t stop_machine_first;
710static int wrote_text;
711
712struct text_poke_params {
Masami Hiramatsu7deb18d2010-12-03 18:54:22 +0900713 struct text_poke_param *params;
714 int nparams;
Masami Hiramatsu3d55cc82010-02-25 08:34:38 -0500715};
716
717static int __kprobes stop_machine_text_poke(void *data)
718{
719 struct text_poke_params *tpp = data;
Masami Hiramatsu7deb18d2010-12-03 18:54:22 +0900720 struct text_poke_param *p;
721 int i;
Masami Hiramatsu3d55cc82010-02-25 08:34:38 -0500722
OGAWA Hirofumi2f747592012-06-07 22:20:18 +0900723 if (atomic_xchg(&stop_machine_first, 0)) {
Masami Hiramatsu7deb18d2010-12-03 18:54:22 +0900724 for (i = 0; i < tpp->nparams; i++) {
725 p = &tpp->params[i];
726 text_poke(p->addr, p->opcode, p->len);
727 }
Masami Hiramatsu3d55cc82010-02-25 08:34:38 -0500728 smp_wmb(); /* Make sure other cpus see that this has run */
729 wrote_text = 1;
730 } else {
731 while (!wrote_text)
Masami Hiramatsue5a11012010-03-03 22:38:50 -0500732 cpu_relax();
733 smp_mb(); /* Load wrote_text before following execution */
Masami Hiramatsu3d55cc82010-02-25 08:34:38 -0500734 }
735
Masami Hiramatsu7deb18d2010-12-03 18:54:22 +0900736 for (i = 0; i < tpp->nparams; i++) {
737 p = &tpp->params[i];
738 flush_icache_range((unsigned long)p->addr,
739 (unsigned long)p->addr + p->len);
740 }
Mathieu Desnoyers0e00f7a2011-03-03 11:01:37 -0500741 /*
742 * Intel Archiecture Software Developer's Manual section 7.1.3 specifies
743 * that a core serializing instruction such as "cpuid" should be
744 * executed on _each_ core before the new instruction is made visible.
745 */
746 sync_core();
Masami Hiramatsu3d55cc82010-02-25 08:34:38 -0500747 return 0;
748}
749
750/**
751 * text_poke_smp - Update instructions on a live kernel on SMP
752 * @addr: address to modify
753 * @opcode: source of the copy
754 * @len: length to copy
755 *
756 * Modify multi-byte instruction by using stop_machine() on SMP. This allows
757 * user to poke/set multi-byte text on SMP. Only non-NMI/MCE code modifying
758 * should be allowed, since stop_machine() does _not_ protect code against
759 * NMI and MCE.
760 *
761 * Note: Must be called under get_online_cpus() and text_mutex.
762 */
763void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
764{
765 struct text_poke_params tpp;
Masami Hiramatsu7deb18d2010-12-03 18:54:22 +0900766 struct text_poke_param p;
Masami Hiramatsu3d55cc82010-02-25 08:34:38 -0500767
Masami Hiramatsu7deb18d2010-12-03 18:54:22 +0900768 p.addr = addr;
769 p.opcode = opcode;
770 p.len = len;
771 tpp.params = &p;
772 tpp.nparams = 1;
Masami Hiramatsu3d55cc82010-02-25 08:34:38 -0500773 atomic_set(&stop_machine_first, 1);
774 wrote_text = 0;
Masami Hiramatsu3caa37512010-10-14 12:10:36 +0900775 /* Use __stop_machine() because the caller already got online_cpus. */
Jason Baron404ba5d2010-10-28 11:20:27 -0400776 __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
Masami Hiramatsu3d55cc82010-02-25 08:34:38 -0500777 return addr;
778}
779
Masami Hiramatsu7deb18d2010-12-03 18:54:22 +0900780/**
781 * text_poke_smp_batch - Update instructions on a live kernel on SMP
782 * @params: an array of text_poke parameters
783 * @n: the number of elements in params.
784 *
785 * Modify multi-byte instruction by using stop_machine() on SMP. Since the
786 * stop_machine() is heavy task, it is better to aggregate text_poke requests
787 * and do it once if possible.
788 *
789 * Note: Must be called under get_online_cpus() and text_mutex.
790 */
791void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
792{
793 struct text_poke_params tpp = {.params = params, .nparams = n};
794
795 atomic_set(&stop_machine_first, 1);
796 wrote_text = 0;
Rabin Vincent78345d22011-10-27 13:24:32 +0530797 __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
Masami Hiramatsu7deb18d2010-12-03 18:54:22 +0900798}