blob: d85b63211759011607506224bb8cec3d6c0244e6 [file] [log] [blame]
Florian Fainellif6f9be12017-12-01 01:10:09 +01001/*
2 * Broadcom Brahma-B15 CPU read-ahead cache management functions
3 *
4 * Copyright (C) 2015-2016 Broadcom
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/err.h>
12#include <linux/spinlock.h>
13#include <linux/io.h>
14#include <linux/bitops.h>
15#include <linux/of_address.h>
Florian Fainelli55de8872017-12-01 01:10:11 +010016#include <linux/notifier.h>
17#include <linux/cpu.h>
Florian Fainellif6f9be12017-12-01 01:10:09 +010018
19#include <asm/cacheflush.h>
20#include <asm/hardware/cache-b15-rac.h>
21
22extern void v7_flush_kern_cache_all(void);
23
24/* RAC register offsets, relative to the HIF_CPU_BIUCTRL register base */
25#define RAC_CONFIG0_REG (0x78)
26#define RACENPREF_MASK (0x3)
27#define RACPREFINST_SHIFT (0)
28#define RACENINST_SHIFT (2)
29#define RACPREFDATA_SHIFT (4)
30#define RACENDATA_SHIFT (6)
31#define RAC_CPU_SHIFT (8)
32#define RACCFG_MASK (0xff)
33#define RAC_CONFIG1_REG (0x7c)
34#define RAC_FLUSH_REG (0x80)
35#define FLUSH_RAC (1 << 0)
36
37/* Bitmask to enable instruction and data prefetching with a 256-bytes stride */
38#define RAC_DATA_INST_EN_MASK (1 << RACPREFINST_SHIFT | \
39 RACENPREF_MASK << RACENINST_SHIFT | \
40 1 << RACPREFDATA_SHIFT | \
41 RACENPREF_MASK << RACENDATA_SHIFT)
42
43#define RAC_ENABLED 0
44
45static void __iomem *b15_rac_base;
46static DEFINE_SPINLOCK(rac_lock);
Florian Fainelli55de8872017-12-01 01:10:11 +010047static u32 rac_config0_reg;
Florian Fainellif6f9be12017-12-01 01:10:09 +010048
49/* Initialization flag to avoid checking for b15_rac_base, and to prevent
50 * multi-platform kernels from crashing here as well.
51 */
52static unsigned long b15_rac_flags;
53
54static inline u32 __b15_rac_disable(void)
55{
56 u32 val = __raw_readl(b15_rac_base + RAC_CONFIG0_REG);
57 __raw_writel(0, b15_rac_base + RAC_CONFIG0_REG);
58 dmb();
59 return val;
60}
61
62static inline void __b15_rac_flush(void)
63{
64 u32 reg;
65
66 __raw_writel(FLUSH_RAC, b15_rac_base + RAC_FLUSH_REG);
67 do {
68 /* This dmb() is required to force the Bus Interface Unit
69 * to clean oustanding writes, and forces an idle cycle
70 * to be inserted.
71 */
72 dmb();
73 reg = __raw_readl(b15_rac_base + RAC_FLUSH_REG);
74 } while (reg & FLUSH_RAC);
75}
76
77static inline u32 b15_rac_disable_and_flush(void)
78{
79 u32 reg;
80
81 reg = __b15_rac_disable();
82 __b15_rac_flush();
83 return reg;
84}
85
86static inline void __b15_rac_enable(u32 val)
87{
88 __raw_writel(val, b15_rac_base + RAC_CONFIG0_REG);
89 /* dsb() is required here to be consistent with __flush_icache_all() */
90 dsb();
91}
92
93#define BUILD_RAC_CACHE_OP(name, bar) \
94void b15_flush_##name(void) \
95{ \
96 unsigned int do_flush; \
97 u32 val = 0; \
98 \
99 spin_lock(&rac_lock); \
100 do_flush = test_bit(RAC_ENABLED, &b15_rac_flags); \
101 if (do_flush) \
102 val = b15_rac_disable_and_flush(); \
103 v7_flush_##name(); \
104 if (!do_flush) \
105 bar; \
106 else \
107 __b15_rac_enable(val); \
108 spin_unlock(&rac_lock); \
109}
110
111#define nobarrier
112
113/* The readahead cache present in the Brahma-B15 CPU is a special piece of
114 * hardware after the integrated L2 cache of the B15 CPU complex whose purpose
115 * is to prefetch instruction and/or data with a line size of either 64 bytes
116 * or 256 bytes. The rationale is that the data-bus of the CPU interface is
117 * optimized for 256-bytes transactions, and enabling the readahead cache
118 * provides a significant performance boost we want it enabled (typically
119 * twice the performance for a memcpy benchmark application).
120 *
121 * The readahead cache is transparent for Modified Virtual Addresses
122 * cache maintenance operations: ICIMVAU, DCIMVAC, DCCMVAC, DCCMVAU and
123 * DCCIMVAC.
124 *
125 * It is however not transparent for the following cache maintenance
126 * operations: DCISW, DCCSW, DCCISW, ICIALLUIS and ICIALLU which is precisely
127 * what we are patching here with our BUILD_RAC_CACHE_OP here.
128 */
129BUILD_RAC_CACHE_OP(kern_cache_all, nobarrier);
130
131static void b15_rac_enable(void)
132{
133 unsigned int cpu;
134 u32 enable = 0;
135
136 for_each_possible_cpu(cpu)
137 enable |= (RAC_DATA_INST_EN_MASK << (cpu * RAC_CPU_SHIFT));
138
139 b15_rac_disable_and_flush();
140 __b15_rac_enable(enable);
141}
142
Florian Fainelli55de8872017-12-01 01:10:11 +0100143#ifdef CONFIG_HOTPLUG_CPU
144/* The CPU hotplug case is the most interesting one, we basically need to make
145 * sure that the RAC is disabled for the entire system prior to having a CPU
146 * die, in particular prior to this dying CPU having exited the coherency
147 * domain.
148 *
149 * Once this CPU is marked dead, we can safely re-enable the RAC for the
150 * remaining CPUs in the system which are still online.
151 *
152 * Offlining a CPU is the problematic case, onlining a CPU is not much of an
153 * issue since the CPU and its cache-level hierarchy will start filling with
154 * the RAC disabled, so L1 and L2 only.
155 *
156 * In this function, we should NOT have to verify any unsafe setting/condition
157 * b15_rac_base:
158 *
159 * It is protected by the RAC_ENABLED flag which is cleared by default, and
160 * being cleared when initial procedure is done. b15_rac_base had been set at
161 * that time.
162 *
163 * RAC_ENABLED:
164 * There is a small timing windows, in b15_rac_init(), between
165 * cpuhp_setup_state_*()
166 * ...
167 * set RAC_ENABLED
168 * However, there is no hotplug activity based on the Linux booting procedure.
169 *
170 * Since we have to disable RAC for all cores, we keep RAC on as long as as
171 * possible (disable it as late as possible) to gain the cache benefit.
172 *
173 * Thus, dying/dead states are chosen here
174 *
175 * We are choosing not do disable the RAC on a per-CPU basis, here, if we did
176 * we would want to consider disabling it as early as possible to benefit the
177 * other active CPUs.
178 */
179
180/* Running on the dying CPU */
181static int b15_rac_dying_cpu(unsigned int cpu)
182{
183 spin_lock(&rac_lock);
184
185 /* Indicate that we are starting a hotplug procedure */
186 __clear_bit(RAC_ENABLED, &b15_rac_flags);
187
188 /* Disable the readahead cache and save its value to a global */
189 rac_config0_reg = b15_rac_disable_and_flush();
190
191 spin_unlock(&rac_lock);
192
193 return 0;
194}
195
196/* Running on a non-dying CPU */
197static int b15_rac_dead_cpu(unsigned int cpu)
198{
199 spin_lock(&rac_lock);
200
201 /* And enable it */
202 __b15_rac_enable(rac_config0_reg);
203 __set_bit(RAC_ENABLED, &b15_rac_flags);
204
205 spin_unlock(&rac_lock);
206
207 return 0;
208}
209#endif /* CONFIG_HOTPLUG_CPU */
210
Florian Fainellif6f9be12017-12-01 01:10:09 +0100211static int __init b15_rac_init(void)
212{
213 struct device_node *dn;
214 int ret = 0, cpu;
215 u32 reg, en_mask = 0;
216
217 dn = of_find_compatible_node(NULL, NULL, "brcm,brcmstb-cpu-biu-ctrl");
218 if (!dn)
219 return -ENODEV;
220
221 if (WARN(num_possible_cpus() > 4, "RAC only supports 4 CPUs\n"))
222 goto out;
223
224 b15_rac_base = of_iomap(dn, 0);
225 if (!b15_rac_base) {
226 pr_err("failed to remap BIU control base\n");
227 ret = -ENOMEM;
228 goto out;
229 }
230
Florian Fainelli55de8872017-12-01 01:10:11 +0100231#ifdef CONFIG_HOTPLUG_CPU
232 ret = cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CACHE_B15_RAC_DEAD,
233 "arm/cache-b15-rac:dead",
234 NULL, b15_rac_dead_cpu);
235 if (ret)
236 goto out_unmap;
237
238 ret = cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CACHE_B15_RAC_DYING,
239 "arm/cache-b15-rac:dying",
240 NULL, b15_rac_dying_cpu);
241 if (ret)
242 goto out_cpu_dead;
243#endif
244
Florian Fainellif6f9be12017-12-01 01:10:09 +0100245 spin_lock(&rac_lock);
246 reg = __raw_readl(b15_rac_base + RAC_CONFIG0_REG);
247 for_each_possible_cpu(cpu)
248 en_mask |= ((1 << RACPREFDATA_SHIFT) << (cpu * RAC_CPU_SHIFT));
249 WARN(reg & en_mask, "Read-ahead cache not previously disabled\n");
250
251 b15_rac_enable();
252 set_bit(RAC_ENABLED, &b15_rac_flags);
253 spin_unlock(&rac_lock);
254
255 pr_info("Broadcom Brahma-B15 readahead cache at: 0x%p\n",
256 b15_rac_base + RAC_CONFIG0_REG);
257
Florian Fainelli55de8872017-12-01 01:10:11 +0100258 goto out;
259
260out_cpu_dead:
261 cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CACHE_B15_RAC_DYING);
262out_unmap:
263 iounmap(b15_rac_base);
Florian Fainellif6f9be12017-12-01 01:10:09 +0100264out:
265 of_node_put(dn);
266 return ret;
267}
268arch_initcall(b15_rac_init);