blob: 838cf8a32c49728f12e1a06d248909e23f82703a [file] [log] [blame]
Thomas Gleixnerb886d83c2019-06-01 10:08:55 +02001// SPDX-License-Identifier: GPL-2.0-only
Tim Chen5e76b2a2016-11-22 12:23:55 -08002/*
3 * itmt.c: Support Intel Turbo Boost Max Technology 3.0
4 *
5 * (C) Copyright 2016 Intel Corporation
6 * Author: Tim Chen <tim.c.chen@linux.intel.com>
7 *
Tim Chen5e76b2a2016-11-22 12:23:55 -08008 * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
9 * the maximum turbo frequencies of some cores in a CPU package may be
10 * higher than for the other cores in the same package. In that case,
11 * better performance can be achieved by making the scheduler prefer
12 * to run tasks on the CPUs with higher max turbo frequencies.
13 *
14 * This file provides functions and data structures for enabling the
15 * scheduler to favor scheduling on cores can be boosted to a higher
16 * frequency under ITMT.
17 */
18
19#include <linux/sched.h>
20#include <linux/cpumask.h>
21#include <linux/cpuset.h>
Ingo Molnara293b392016-11-28 09:43:49 +010022#include <linux/mutex.h>
Tim Chen5e76b2a2016-11-22 12:23:55 -080023#include <linux/sysctl.h>
24#include <linux/nodemask.h>
25
26static DEFINE_MUTEX(itmt_update_mutex);
27DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
28
29/* Boolean to track if system has ITMT capabilities */
30static bool __read_mostly sched_itmt_capable;
31
Tim Chenf9793e342016-11-22 12:23:56 -080032/*
33 * Boolean to control whether we want to move processes to cpu capable
34 * of higher turbo frequency for cpus supporting Intel Turbo Boost Max
35 * Technology 3.0.
36 *
37 * It can be set via /proc/sys/kernel/sched_itmt_enabled
38 */
39unsigned int __read_mostly sysctl_sched_itmt_enabled;
40
41static int sched_itmt_update_handler(struct ctl_table *table, int write,
42 void __user *buffer, size_t *lenp,
43 loff_t *ppos)
44{
45 unsigned int old_sysctl;
46 int ret;
47
48 mutex_lock(&itmt_update_mutex);
49
50 if (!sched_itmt_capable) {
51 mutex_unlock(&itmt_update_mutex);
52 return -EINVAL;
53 }
54
55 old_sysctl = sysctl_sched_itmt_enabled;
56 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
57
58 if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
59 x86_topology_update = true;
60 rebuild_sched_domains();
61 }
62
63 mutex_unlock(&itmt_update_mutex);
64
65 return ret;
66}
67
68static unsigned int zero;
69static unsigned int one = 1;
70static struct ctl_table itmt_kern_table[] = {
71 {
72 .procname = "sched_itmt_enabled",
73 .data = &sysctl_sched_itmt_enabled,
74 .maxlen = sizeof(unsigned int),
75 .mode = 0644,
76 .proc_handler = sched_itmt_update_handler,
77 .extra1 = &zero,
78 .extra2 = &one,
79 },
80 {}
81};
82
83static struct ctl_table itmt_root_table[] = {
84 {
85 .procname = "kernel",
86 .mode = 0555,
87 .child = itmt_kern_table,
88 },
89 {}
90};
91
92static struct ctl_table_header *itmt_sysctl_header;
93
Tim Chen5e76b2a2016-11-22 12:23:55 -080094/**
95 * sched_set_itmt_support() - Indicate platform supports ITMT
96 *
97 * This function is used by the OS to indicate to scheduler that the platform
98 * is capable of supporting the ITMT feature.
99 *
100 * The current scheme has the pstate driver detects if the system
101 * is ITMT capable and call sched_set_itmt_support.
102 *
103 * This must be done only after sched_set_itmt_core_prio
104 * has been called to set the cpus' priorities.
Tim Chenf9793e342016-11-22 12:23:56 -0800105 * It must not be called with cpu hot plug lock
106 * held as we need to acquire the lock to rebuild sched domains
107 * later.
108 *
109 * Return: 0 on success
Tim Chen5e76b2a2016-11-22 12:23:55 -0800110 */
Tim Chenf9793e342016-11-22 12:23:56 -0800111int sched_set_itmt_support(void)
Tim Chen5e76b2a2016-11-22 12:23:55 -0800112{
113 mutex_lock(&itmt_update_mutex);
114
Tim Chenf9793e342016-11-22 12:23:56 -0800115 if (sched_itmt_capable) {
116 mutex_unlock(&itmt_update_mutex);
117 return 0;
118 }
119
120 itmt_sysctl_header = register_sysctl_table(itmt_root_table);
121 if (!itmt_sysctl_header) {
122 mutex_unlock(&itmt_update_mutex);
123 return -ENOMEM;
124 }
125
Tim Chen5e76b2a2016-11-22 12:23:55 -0800126 sched_itmt_capable = true;
127
Tim Chenf9793e342016-11-22 12:23:56 -0800128 sysctl_sched_itmt_enabled = 1;
129
Tim Chen02cfdc92017-01-18 14:30:29 -0800130 x86_topology_update = true;
131 rebuild_sched_domains();
Tim Chenf9793e342016-11-22 12:23:56 -0800132
Tim Chen5e76b2a2016-11-22 12:23:55 -0800133 mutex_unlock(&itmt_update_mutex);
Tim Chenf9793e342016-11-22 12:23:56 -0800134
135 return 0;
Tim Chen5e76b2a2016-11-22 12:23:55 -0800136}
137
138/**
139 * sched_clear_itmt_support() - Revoke platform's support of ITMT
140 *
141 * This function is used by the OS to indicate that it has
142 * revoked the platform's support of ITMT feature.
143 *
Tim Chenf9793e342016-11-22 12:23:56 -0800144 * It must not be called with cpu hot plug lock
145 * held as we need to acquire the lock to rebuild sched domains
146 * later.
Tim Chen5e76b2a2016-11-22 12:23:55 -0800147 */
148void sched_clear_itmt_support(void)
149{
150 mutex_lock(&itmt_update_mutex);
151
Tim Chenf9793e342016-11-22 12:23:56 -0800152 if (!sched_itmt_capable) {
153 mutex_unlock(&itmt_update_mutex);
154 return;
155 }
Tim Chen5e76b2a2016-11-22 12:23:55 -0800156 sched_itmt_capable = false;
157
Tim Chenf9793e342016-11-22 12:23:56 -0800158 if (itmt_sysctl_header) {
159 unregister_sysctl_table(itmt_sysctl_header);
160 itmt_sysctl_header = NULL;
161 }
162
163 if (sysctl_sched_itmt_enabled) {
164 /* disable sched_itmt if we are no longer ITMT capable */
165 sysctl_sched_itmt_enabled = 0;
166 x86_topology_update = true;
167 rebuild_sched_domains();
168 }
169
Tim Chen5e76b2a2016-11-22 12:23:55 -0800170 mutex_unlock(&itmt_update_mutex);
171}
172
173int arch_asym_cpu_priority(int cpu)
174{
175 return per_cpu(sched_core_priority, cpu);
176}
177
178/**
179 * sched_set_itmt_core_prio() - Set CPU priority based on ITMT
180 * @prio: Priority of cpu core
181 * @core_cpu: The cpu number associated with the core
182 *
183 * The pstate driver will find out the max boost frequency
184 * and call this function to set a priority proportional
185 * to the max boost frequency. CPU with higher boost
186 * frequency will receive higher priority.
187 *
188 * No need to rebuild sched domain after updating
189 * the CPU priorities. The sched domains have no
190 * dependency on CPU priorities.
191 */
192void sched_set_itmt_core_prio(int prio, int core_cpu)
193{
194 int cpu, i = 1;
195
196 for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
197 int smt_prio;
198
199 /*
200 * Ensure that the siblings are moved to the end
201 * of the priority chain and only used when
202 * all other high priority cpus are out of capacity.
203 */
204 smt_prio = prio * smp_num_siblings / i;
205 per_cpu(sched_core_priority, cpu) = smt_prio;
206 i++;
207 }
208}