Blame - kernel/sched/deadline.c - github.com/raspberrypi/raspberrypi-kernel

blob: d80523fb1de5baac74e7af2858337fd9dfd1f93d [file] [log] [blame]

Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1	/*
				2	* Deadline Scheduling Class (SCHED_DEADLINE)
				3	*
				4	* Earliest Deadline First (EDF) + Constant Bandwidth Server (CBS).
				5	*
				6	* Tasks that periodically executes their instances for less than their
				7	* runtime won't miss any of their deadlines.
				8	* Tasks that are not periodic or sporadic or that tries to execute more
				9	* than their reserved bandwidth will be slowed down (and may potentially
				10	* miss some of their deadlines), and won't affect any other task.
				11	*
				12	* Copyright (C) 2012 Dario Faggioli <raistlin@linux.it>,
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	13	* Juri Lelli <juri.lelli@gmail.com>,
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	14	* Michael Trimarchi <michael@amarulasolutions.com>,
				15	* Fabio Checconi <fchecconi@gmail.com>
				16	*/
				17	#include "sched.h"
				18
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	19	#include <linux/slab.h>
				20
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	21	struct dl_bandwidth def_dl_bandwidth;
				22
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	23	static inline struct task_struct dl_task_of(struct sched_dl_entity dl_se)
				24	{
				25	return container_of(dl_se, struct task_struct, dl);
				26	}
				27
				28	static inline struct rq rq_of_dl_rq(struct dl_rq dl_rq)
				29	{
				30	return container_of(dl_rq, struct rq, dl);
				31	}
				32
				33	static inline struct dl_rq dl_rq_of_se(struct sched_dl_entity dl_se)
				34	{
				35	struct task_struct *p = dl_task_of(dl_se);
				36	struct rq *rq = task_rq(p);
				37
				38	return &rq->dl;
				39	}
				40
				41	static inline int on_dl_rq(struct sched_dl_entity *dl_se)
				42	{
				43	return !RB_EMPTY_NODE(&dl_se->rb_node);
				44	}
				45
				46	static inline int is_leftmost(struct task_struct p, struct dl_rq dl_rq)
				47	{
				48	struct sched_dl_entity *dl_se = &p->dl;
				49
				50	return dl_rq->rb_leftmost == &dl_se->rb_node;
				51	}
				52
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	53	void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime)
				54	{
				55	raw_spin_lock_init(&dl_b->dl_runtime_lock);
				56	dl_b->dl_period = period;
				57	dl_b->dl_runtime = runtime;
				58	}
				59
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	60	void init_dl_bw(struct dl_bw *dl_b)
				61	{
				62	raw_spin_lock_init(&dl_b->lock);
				63	raw_spin_lock(&def_dl_bandwidth.dl_runtime_lock);
Peter Zijlstra	1724813	2013-12-17 12:44:49 +0100	[diff] [blame]	64	if (global_rt_runtime() == RUNTIME_INF)
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	65	dl_b->bw = -1;
				66	else
Peter Zijlstra	1724813	2013-12-17 12:44:49 +0100	[diff] [blame]	67	dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime());
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	68	raw_spin_unlock(&def_dl_bandwidth.dl_runtime_lock);
				69	dl_b->total_bw = 0;
				70	}
				71
Abel Vesa	07c54f7	2015-03-03 13:50:27 +0200	[diff] [blame]	72	void init_dl_rq(struct dl_rq *dl_rq)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	73	{
				74	dl_rq->rb_root = RB_ROOT;
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	75
				76	#ifdef CONFIG_SMP
				77	/* zero means no -deadline tasks */
				78	dl_rq->earliest_dl.curr = dl_rq->earliest_dl.next = 0;
				79
				80	dl_rq->dl_nr_migratory = 0;
				81	dl_rq->overloaded = 0;
				82	dl_rq->pushable_dl_tasks_root = RB_ROOT;
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	83	#else
				84	init_dl_bw(&dl_rq->dl_bw);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	85	#endif
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	86	}
				87
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	88	#ifdef CONFIG_SMP
				89
				90	static inline int dl_overloaded(struct rq *rq)
				91	{
				92	return atomic_read(&rq->rd->dlo_count);
				93	}
				94
				95	static inline void dl_set_overload(struct rq *rq)
				96	{
				97	if (!rq->online)
				98	return;
				99
				100	cpumask_set_cpu(rq->cpu, rq->rd->dlo_mask);
				101	/*
				102	* Must be visible before the overload count is
				103	* set (as in sched_rt.c).
				104	*
				105	* Matched by the barrier in pull_dl_task().
				106	*/
				107	smp_wmb();
				108	atomic_inc(&rq->rd->dlo_count);
				109	}
				110
				111	static inline void dl_clear_overload(struct rq *rq)
				112	{
				113	if (!rq->online)
				114	return;
				115
				116	atomic_dec(&rq->rd->dlo_count);
				117	cpumask_clear_cpu(rq->cpu, rq->rd->dlo_mask);
				118	}
				119
				120	static void update_dl_migration(struct dl_rq *dl_rq)
				121	{
Kirill Tkhai	995b9ea	2014-02-18 02:24:13 +0400	[diff] [blame]	122	if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_running > 1) {
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	123	if (!dl_rq->overloaded) {
				124	dl_set_overload(rq_of_dl_rq(dl_rq));
				125	dl_rq->overloaded = 1;
				126	}
				127	} else if (dl_rq->overloaded) {
				128	dl_clear_overload(rq_of_dl_rq(dl_rq));
				129	dl_rq->overloaded = 0;
				130	}
				131	}
				132
				133	static void inc_dl_migration(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				134	{
				135	struct task_struct *p = dl_task_of(dl_se);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	136
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	137	if (p->nr_cpus_allowed > 1)
				138	dl_rq->dl_nr_migratory++;
				139
				140	update_dl_migration(dl_rq);
				141	}
				142
				143	static void dec_dl_migration(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				144	{
				145	struct task_struct *p = dl_task_of(dl_se);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	146
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	147	if (p->nr_cpus_allowed > 1)
				148	dl_rq->dl_nr_migratory--;
				149
				150	update_dl_migration(dl_rq);
				151	}
				152
				153	/*
				154	* The list of pushable -deadline task is not a plist, like in
				155	* sched_rt.c, it is an rb-tree with tasks ordered by deadline.
				156	*/
				157	static void enqueue_pushable_dl_task(struct rq rq, struct task_struct p)
				158	{
				159	struct dl_rq *dl_rq = &rq->dl;
				160	struct rb_node **link = &dl_rq->pushable_dl_tasks_root.rb_node;
				161	struct rb_node *parent = NULL;
				162	struct task_struct *entry;
				163	int leftmost = 1;
				164
				165	BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks));
				166
				167	while (*link) {
				168	parent = *link;
				169	entry = rb_entry(parent, struct task_struct,
				170	pushable_dl_tasks);
				171	if (dl_entity_preempt(&p->dl, &entry->dl))
				172	link = &parent->rb_left;
				173	else {
				174	link = &parent->rb_right;
				175	leftmost = 0;
				176	}
				177	}
				178
				179	if (leftmost)
				180	dl_rq->pushable_dl_tasks_leftmost = &p->pushable_dl_tasks;
				181
				182	rb_link_node(&p->pushable_dl_tasks, parent, link);
				183	rb_insert_color(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);
				184	}
				185
				186	static void dequeue_pushable_dl_task(struct rq rq, struct task_struct p)
				187	{
				188	struct dl_rq *dl_rq = &rq->dl;
				189
				190	if (RB_EMPTY_NODE(&p->pushable_dl_tasks))
				191	return;
				192
				193	if (dl_rq->pushable_dl_tasks_leftmost == &p->pushable_dl_tasks) {
				194	struct rb_node *next_node;
				195
				196	next_node = rb_next(&p->pushable_dl_tasks);
				197	dl_rq->pushable_dl_tasks_leftmost = next_node;
				198	}
				199
				200	rb_erase(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);
				201	RB_CLEAR_NODE(&p->pushable_dl_tasks);
				202	}
				203
				204	static inline int has_pushable_dl_tasks(struct rq *rq)
				205	{
				206	return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root);
				207	}
				208
				209	static int push_dl_task(struct rq *rq);
				210
Peter Zijlstra	dc87734	2014-02-12 15:47:29 +0100	[diff] [blame]	211	static inline bool need_pull_dl_task(struct rq rq, struct task_struct prev)
				212	{
				213	return dl_task(prev);
				214	}
				215
Peter Zijlstra	e3fca9e	2015-06-11 14:46:37 +0200	[diff] [blame^]	216	static DEFINE_PER_CPU(struct callback_head, dl_balance_head);
				217
				218	static void push_dl_tasks(struct rq *);
				219
				220	static inline void queue_push_tasks(struct rq *rq)
Peter Zijlstra	dc87734	2014-02-12 15:47:29 +0100	[diff] [blame]	221	{
Peter Zijlstra	e3fca9e	2015-06-11 14:46:37 +0200	[diff] [blame^]	222	if (!has_pushable_dl_tasks(rq))
				223	return;
				224
				225	queue_balance_callback(rq, &per_cpu(dl_balance_head, rq->cpu), push_dl_tasks);
Peter Zijlstra	dc87734	2014-02-12 15:47:29 +0100	[diff] [blame]	226	}
				227
Wanpeng Li	fa9c9d1	2015-03-27 07:08:35 +0800	[diff] [blame]	228	static struct rq find_lock_later_rq(struct task_struct task, struct rq *rq);
				229
				230	static void dl_task_offline_migration(struct rq rq, struct task_struct p)
				231	{
				232	struct rq *later_rq = NULL;
				233	bool fallback = false;
				234
				235	later_rq = find_lock_later_rq(p, rq);
				236
				237	if (!later_rq) {
				238	int cpu;
				239
				240	/*
				241	* If we cannot preempt any rq, fall back to pick any
				242	* online cpu.
				243	*/
				244	fallback = true;
				245	cpu = cpumask_any_and(cpu_active_mask, tsk_cpus_allowed(p));
				246	if (cpu >= nr_cpu_ids) {
				247	/*
				248	* Fail to find any suitable cpu.
				249	* The task will never come back!
				250	*/
				251	BUG_ON(dl_bandwidth_enabled());
				252
				253	/*
				254	* If admission control is disabled we
				255	* try a little harder to let the task
				256	* run.
				257	*/
				258	cpu = cpumask_any(cpu_active_mask);
				259	}
				260	later_rq = cpu_rq(cpu);
				261	double_lock_balance(rq, later_rq);
				262	}
				263
				264	deactivate_task(rq, p, 0);
				265	set_task_cpu(p, later_rq->cpu);
				266	activate_task(later_rq, p, ENQUEUE_REPLENISH);
				267
				268	if (!fallback)
				269	resched_curr(later_rq);
				270
				271	double_unlock_balance(rq, later_rq);
				272	}
				273
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	274	#else
				275
				276	static inline
				277	void enqueue_pushable_dl_task(struct rq rq, struct task_struct p)
				278	{
				279	}
				280
				281	static inline
				282	void dequeue_pushable_dl_task(struct rq rq, struct task_struct p)
				283	{
				284	}
				285
				286	static inline
				287	void inc_dl_migration(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				288	{
				289	}
				290
				291	static inline
				292	void dec_dl_migration(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				293	{
				294	}
				295
Peter Zijlstra	dc87734	2014-02-12 15:47:29 +0100	[diff] [blame]	296	static inline bool need_pull_dl_task(struct rq rq, struct task_struct prev)
				297	{
				298	return false;
				299	}
				300
				301	static inline int pull_dl_task(struct rq *rq)
				302	{
				303	return 0;
				304	}
				305
Peter Zijlstra	e3fca9e	2015-06-11 14:46:37 +0200	[diff] [blame^]	306	static inline void queue_push_tasks(struct rq *rq)
Peter Zijlstra	dc87734	2014-02-12 15:47:29 +0100	[diff] [blame]	307	{
				308	}
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	309	#endif /* CONFIG_SMP */
				310
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	311	static void enqueue_task_dl(struct rq rq, struct task_struct p, int flags);
				312	static void __dequeue_task_dl(struct rq rq, struct task_struct p, int flags);
				313	static void check_preempt_curr_dl(struct rq rq, struct task_struct p,
				314	int flags);
				315
				316	/*
				317	* We are being explicitly informed that a new instance is starting,
				318	* and this means that:
				319	* - the absolute deadline of the entity has to be placed at
				320	* current time + relative deadline;
				321	* - the runtime of the entity has to be set to the maximum value.
				322	*
				323	* The capability of specifying such event is useful whenever a -deadline
				324	* entity wants to (try to!) synchronize its behaviour with the scheduler's
				325	* one, and to (try to!) reconcile itself with its own scheduling
				326	* parameters.
				327	*/
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	328	static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se,
				329	struct sched_dl_entity *pi_se)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	330	{
				331	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				332	struct rq *rq = rq_of_dl_rq(dl_rq);
				333
				334	WARN_ON(!dl_se->dl_new \|\| dl_se->dl_throttled);
				335
				336	/*
				337	* We use the regular wall clock time to set deadlines in the
				338	* future; in fact, we must consider execution overheads (time
				339	* spent on hardirq context, etc.).
				340	*/
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	341	dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
				342	dl_se->runtime = pi_se->dl_runtime;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	343	dl_se->dl_new = 0;
				344	}
				345
				346	/*
				347	* Pure Earliest Deadline First (EDF) scheduling does not deal with the
				348	* possibility of a entity lasting more than what it declared, and thus
				349	* exhausting its runtime.
				350	*
				351	* Here we are interested in making runtime overrun possible, but we do
				352	* not want a entity which is misbehaving to affect the scheduling of all
				353	* other entities.
				354	* Therefore, a budgeting strategy called Constant Bandwidth Server (CBS)
				355	* is used, in order to confine each entity within its own bandwidth.
				356	*
				357	* This function deals exactly with that, and ensures that when the runtime
				358	* of a entity is replenished, its deadline is also postponed. That ensures
				359	* the overrunning entity can't interfere with other entity in the system and
				360	* can't make them miss their deadlines. Reasons why this kind of overruns
				361	* could happen are, typically, a entity voluntarily trying to overcome its
xiaofeng.yan	1b09d29	2014-07-07 05:59:04 +0000	[diff] [blame]	362	* runtime, or it just underestimated it during sched_setattr().
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	363	*/
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	364	static void replenish_dl_entity(struct sched_dl_entity *dl_se,
				365	struct sched_dl_entity *pi_se)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	366	{
				367	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				368	struct rq *rq = rq_of_dl_rq(dl_rq);
				369
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	370	BUG_ON(pi_se->dl_runtime <= 0);
				371
				372	/*
				373	* This could be the case for a !-dl task that is boosted.
				374	* Just go with full inherited parameters.
				375	*/
				376	if (dl_se->dl_deadline == 0) {
				377	dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
				378	dl_se->runtime = pi_se->dl_runtime;
				379	}
				380
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	381	/*
				382	* We keep moving the deadline away until we get some
				383	* available runtime for the entity. This ensures correct
				384	* handling of situations where the runtime overrun is
				385	* arbitrary large.
				386	*/
				387	while (dl_se->runtime <= 0) {
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	388	dl_se->deadline += pi_se->dl_period;
				389	dl_se->runtime += pi_se->dl_runtime;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	390	}
				391
				392	/*
				393	* At this point, the deadline really should be "in
				394	* the future" with respect to rq->clock. If it's
				395	* not, we are, for some reason, lagging too much!
				396	* Anyway, after having warn userspace abut that,
				397	* we still try to keep the things running by
				398	* resetting the deadline and the budget of the
				399	* entity.
				400	*/
				401	if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
John Stultz	c224815	2014-06-04 16:11:41 -0700	[diff] [blame]	402	printk_deferred_once("sched: DL replenish lagged to much\n");
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	403	dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
				404	dl_se->runtime = pi_se->dl_runtime;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	405	}
Peter Zijlstra	1019a35	2014-11-26 08:44:03 +0800	[diff] [blame]	406
				407	if (dl_se->dl_yielded)
				408	dl_se->dl_yielded = 0;
				409	if (dl_se->dl_throttled)
				410	dl_se->dl_throttled = 0;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	411	}
				412
				413	/*
				414	* Here we check if --at time t-- an entity (which is probably being
				415	* [re]activated or, in general, enqueued) can use its remaining runtime
				416	* and its current deadline _without_ exceeding the bandwidth it is
				417	* assigned (function returns true if it can't). We are in fact applying
				418	* one of the CBS rules: when a task wakes up, if the residual runtime
				419	* over residual deadline fits within the allocated bandwidth, then we
				420	* can keep the current (absolute) deadline and residual budget without
				421	* disrupting the schedulability of the system. Otherwise, we should
				422	* refill the runtime and set the deadline a period in the future,
				423	* because keeping the current (absolute) deadline of the task would
Dario Faggioli	712e5e3	2014-01-27 12:20:15 +0100	[diff] [blame]	424	* result in breaking guarantees promised to other tasks (refer to
				425	* Documentation/scheduler/sched-deadline.txt for more informations).
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	426	*
				427	* This function returns true if:
				428	*
Harald Gustafsson	755378a	2013-11-07 14:43:40 +0100	[diff] [blame]	429	* runtime / (deadline - t) > dl_runtime / dl_period ,
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	430	*
				431	* IOW we can't recycle current parameters.
Harald Gustafsson	755378a	2013-11-07 14:43:40 +0100	[diff] [blame]	432	*
				433	* Notice that the bandwidth check is done against the period. For
				434	* task with deadline equal to period this is the same of using
				435	* dl_deadline instead of dl_period in the equation above.
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	436	*/
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	437	static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
				438	struct sched_dl_entity *pi_se, u64 t)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	439	{
				440	u64 left, right;
				441
				442	/*
				443	* left and right are the two sides of the equation above,
				444	* after a bit of shuffling to use multiplications instead
				445	* of divisions.
				446	*
				447	* Note that none of the time values involved in the two
				448	* multiplications are absolute: dl_deadline and dl_runtime
				449	* are the relative deadline and the maximum runtime of each
				450	* instance, runtime is the runtime left for the last instance
				451	* and (deadline - t), since t is rq->clock, is the time left
				452	* to the (absolute) deadline. Even if overflowing the u64 type
				453	* is very unlikely to occur in both cases, here we scale down
				454	* as we want to avoid that risk at all. Scaling down by 10
				455	* means that we reduce granularity to 1us. We are fine with it,
				456	* since this is only a true/false check and, anyway, thinking
				457	* of anything below microseconds resolution is actually fiction
				458	* (but still we want to give the user that illusion >;).
				459	*/
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	460	left = (pi_se->dl_period >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
				461	right = ((dl_se->deadline - t) >> DL_SCALE) *
				462	(pi_se->dl_runtime >> DL_SCALE);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	463
				464	return dl_time_before(right, left);
				465	}
				466
				467	/*
				468	* When a -deadline entity is queued back on the runqueue, its runtime and
				469	* deadline might need updating.
				470	*
				471	* The policy here is that we update the deadline of the entity only if:
				472	* - the current deadline is in the past,
				473	* - using the remaining runtime with the current deadline would make
				474	* the entity exceed its bandwidth.
				475	*/
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	476	static void update_dl_entity(struct sched_dl_entity *dl_se,
				477	struct sched_dl_entity *pi_se)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	478	{
				479	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				480	struct rq *rq = rq_of_dl_rq(dl_rq);
				481
				482	/*
				483	* The arrival of a new instance needs special treatment, i.e.,
				484	* the actual scheduling parameters have to be "renewed".
				485	*/
				486	if (dl_se->dl_new) {
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	487	setup_new_dl_entity(dl_se, pi_se);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	488	return;
				489	}
				490
				491	if (dl_time_before(dl_se->deadline, rq_clock(rq)) \|\|
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	492	dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
				493	dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
				494	dl_se->runtime = pi_se->dl_runtime;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	495	}
				496	}
				497
				498	/*
				499	* If the entity depleted all its runtime, and if we want it to sleep
				500	* while waiting for some new execution time to become available, we
				501	* set the bandwidth enforcement timer to the replenishment instant
				502	* and try to activate it.
				503	*
				504	* Notice that it is important for the caller to know if the timer
				505	* actually started or not (i.e., the replenishment instant is in
				506	* the future or in the past).
				507	*/
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	508	static int start_dl_timer(struct sched_dl_entity *dl_se, bool boosted)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	509	{
				510	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				511	struct rq *rq = rq_of_dl_rq(dl_rq);
				512	ktime_t now, act;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	513	s64 delta;
				514
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	515	if (boosted)
				516	return 0;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	517	/*
				518	* We want the timer to fire at the deadline, but considering
				519	* that it is actually coming from rq->clock and not from
				520	* hrtimer's time base reading.
				521	*/
				522	act = ns_to_ktime(dl_se->deadline);
				523	now = hrtimer_cb_get_time(&dl_se->dl_timer);
				524	delta = ktime_to_ns(now) - rq_clock(rq);
				525	act = ktime_add_ns(act, delta);
				526
				527	/*
				528	* If the expiry time already passed, e.g., because the value
				529	* chosen as the deadline is too small, don't even try to
				530	* start the timer in the past!
				531	*/
				532	if (ktime_us_delta(act, now) < 0)
				533	return 0;
				534
Thomas Gleixner	cc9684d	2015-04-14 21:09:06 +0000	[diff] [blame]	535	hrtimer_start(&dl_se->dl_timer, act, HRTIMER_MODE_ABS);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	536
Thomas Gleixner	cc9684d	2015-04-14 21:09:06 +0000	[diff] [blame]	537	return 1;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	538	}
				539
				540	/*
				541	* This is the bandwidth enforcement timer callback. If here, we know
				542	* a task is not on its dl_rq, since the fact that the timer was running
				543	* means the task is throttled and needs a runtime replenishment.
				544	*
				545	* However, what we actually do depends on the fact the task is active,
				546	* (it is on its rq) or has been removed from there by a call to
				547	* dequeue_task_dl(). In the former case we must issue the runtime
				548	* replenishment and add the task back to the dl_rq; in the latter, we just
				549	* do nothing but clearing dl_throttled, so that runtime and deadline
				550	* updating (and the queueing back to dl_rq) will be done by the
				551	* next call to enqueue_task_dl().
				552	*/
				553	static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
				554	{
				555	struct sched_dl_entity *dl_se = container_of(timer,
				556	struct sched_dl_entity,
				557	dl_timer);
				558	struct task_struct *p = dl_task_of(dl_se);
Peter Zijlstra	3960c8c	2015-02-17 13:22:25 +0100	[diff] [blame]	559	unsigned long flags;
Kirill Tkhai	0f397f2	2014-05-20 13:33:42 +0400	[diff] [blame]	560	struct rq *rq;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	561
Juri Lelli	4cd57f9	2015-03-31 09:53:36 +0100	[diff] [blame]	562	rq = task_rq_lock(p, &flags);
Kirill Tkhai	0f397f2	2014-05-20 13:33:42 +0400	[diff] [blame]	563
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	564	/*
Juri Lelli	aee38ea	2014-10-24 10:16:38 +0100	[diff] [blame]	565	* We need to take care of several possible races here:
				566	*
				567	* - the task might have changed its scheduling policy
				568	* to something different than SCHED_DEADLINE
				569	* - the task might have changed its reservation parameters
				570	* (through sched_setattr())
				571	* - the task might have been boosted by someone else and
				572	* might be in the boosting/deboosting path
				573	*
				574	* In all this cases we bail out, as the task is already
				575	* in the runqueue or is going to be enqueued back anyway.
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	576	*/
Juri Lelli	aee38ea	2014-10-24 10:16:38 +0100	[diff] [blame]	577	if (!dl_task(p) \|\| dl_se->dl_new \|\|
				578	dl_se->dl_boosted \|\| !dl_se->dl_throttled)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	579	goto unlock;
				580
				581	sched_clock_tick();
				582	update_rq_clock(rq);
Kirill Tkhai	a79ec89	2015-02-16 15:38:34 +0300	[diff] [blame]	583
Wanpeng Li	fa9c9d1	2015-03-27 07:08:35 +0800	[diff] [blame]	584	#ifdef CONFIG_SMP
				585	/*
				586	* If we find that the rq the task was on is no longer
				587	* available, we need to select a new rq.
				588	*/
				589	if (unlikely(!rq->online)) {
				590	dl_task_offline_migration(rq, p);
				591	goto unlock;
				592	}
				593	#endif
				594
Kirill Tkhai	a79ec89	2015-02-16 15:38:34 +0300	[diff] [blame]	595	/*
				596	* If the throttle happened during sched-out; like:
				597	*
				598	* schedule()
				599	* deactivate_task()
				600	* dequeue_task_dl()
				601	* update_curr_dl()
				602	* start_dl_timer()
				603	* __dequeue_task_dl()
				604	* prev->on_rq = 0;
				605	*
				606	* We can be both throttled and !queued. Replenish the counter
				607	* but do not enqueue -- wait for our wakeup to do that.
				608	*/
				609	if (!task_on_rq_queued(p)) {
				610	replenish_dl_entity(dl_se, dl_se);
				611	goto unlock;
				612	}
				613
Peter Zijlstra	1019a35	2014-11-26 08:44:03 +0800	[diff] [blame]	614	enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
				615	if (dl_task(rq->curr))
				616	check_preempt_curr_dl(rq, p, 0);
				617	else
				618	resched_curr(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	619	#ifdef CONFIG_SMP
Peter Zijlstra	1019a35	2014-11-26 08:44:03 +0800	[diff] [blame]	620	/*
				621	* Queueing this task back might have overloaded rq,
				622	* check if we need to kick someone away.
				623	*/
				624	if (has_pushable_dl_tasks(rq))
				625	push_dl_task(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	626	#endif
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	627	unlock:
Juri Lelli	4cd57f9	2015-03-31 09:53:36 +0100	[diff] [blame]	628	task_rq_unlock(rq, p, &flags);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	629
				630	return HRTIMER_NORESTART;
				631	}
				632
				633	void init_dl_task_timer(struct sched_dl_entity *dl_se)
				634	{
				635	struct hrtimer *timer = &dl_se->dl_timer;
				636
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	637	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
				638	timer->function = dl_task_timer;
				639	}
				640
				641	static
				642	int dl_runtime_exceeded(struct rq rq, struct sched_dl_entity dl_se)
				643	{
Luca Abeni	269ad80	2014-12-17 11:50:32 +0100	[diff] [blame]	644	return (dl_se->runtime <= 0);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	645	}
				646
Juri Lelli	faa5993	2014-02-21 11:37:15 +0100	[diff] [blame]	647	extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
				648
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	649	/*
				650	* Update the current task's runtime statistics (provided it is still
				651	* a -deadline task and has not been removed from the dl_rq).
				652	*/
				653	static void update_curr_dl(struct rq *rq)
				654	{
				655	struct task_struct *curr = rq->curr;
				656	struct sched_dl_entity *dl_se = &curr->dl;
				657	u64 delta_exec;
				658
				659	if (!dl_task(curr) \|\| !on_dl_rq(dl_se))
				660	return;
				661
				662	/*
				663	* Consumed budget is computed considering the time as
				664	* observed by schedulable tasks (excluding time spent
				665	* in hardirq context, etc.). Deadlines are instead
				666	* computed using hard walltime. This seems to be the more
				667	* natural solution, but the full ramifications of this
				668	* approach need further study.
				669	*/
				670	delta_exec = rq_clock_task(rq) - curr->se.exec_start;
Kirill Tkhai	734ff2a	2014-03-04 19:25:46 +0400	[diff] [blame]	671	if (unlikely((s64)delta_exec <= 0))
				672	return;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	673
				674	schedstat_set(curr->se.statistics.exec_max,
				675	max(curr->se.statistics.exec_max, delta_exec));
				676
				677	curr->se.sum_exec_runtime += delta_exec;
				678	account_group_exec_runtime(curr, delta_exec);
				679
				680	curr->se.exec_start = rq_clock_task(rq);
				681	cpuacct_charge(curr, delta_exec);
				682
Dario Faggioli	239be4a	2013-11-07 14:43:39 +0100	[diff] [blame]	683	sched_rt_avg_update(rq, delta_exec);
				684
Wanpeng Li	8049688	2014-10-31 06:39:32 +0800	[diff] [blame]	685	dl_se->runtime -= dl_se->dl_yielded ? 0 : delta_exec;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	686	if (dl_runtime_exceeded(rq, dl_se)) {
Peter Zijlstra	1019a35	2014-11-26 08:44:03 +0800	[diff] [blame]	687	dl_se->dl_throttled = 1;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	688	__dequeue_task_dl(rq, curr, 0);
Peter Zijlstra	1019a35	2014-11-26 08:44:03 +0800	[diff] [blame]	689	if (unlikely(!start_dl_timer(dl_se, curr->dl.dl_boosted)))
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	690	enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
				691
				692	if (!is_leftmost(curr, &rq->dl))
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	693	resched_curr(rq);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	694	}
Peter Zijlstra	1724813	2013-12-17 12:44:49 +0100	[diff] [blame]	695
				696	/*
				697	* Because -- for now -- we share the rt bandwidth, we need to
				698	* account our runtime there too, otherwise actual rt tasks
				699	* would be able to exceed the shared quota.
				700	*
				701	* Account to the root rt group for now.
				702	*
				703	* The solution we're working towards is having the RT groups scheduled
				704	* using deadline servers -- however there's a few nasties to figure
				705	* out before that can happen.
				706	*/
				707	if (rt_bandwidth_enabled()) {
				708	struct rt_rq *rt_rq = &rq->rt;
				709
				710	raw_spin_lock(&rt_rq->rt_runtime_lock);
Peter Zijlstra	1724813	2013-12-17 12:44:49 +0100	[diff] [blame]	711	/*
				712	* We'll let actual RT tasks worry about the overflow here, we
Juri Lelli	faa5993	2014-02-21 11:37:15 +0100	[diff] [blame]	713	* have our own CBS to keep us inline; only account when RT
				714	* bandwidth is relevant.
Peter Zijlstra	1724813	2013-12-17 12:44:49 +0100	[diff] [blame]	715	*/
Juri Lelli	faa5993	2014-02-21 11:37:15 +0100	[diff] [blame]	716	if (sched_rt_bandwidth_account(rt_rq))
				717	rt_rq->rt_time += delta_exec;
Peter Zijlstra	1724813	2013-12-17 12:44:49 +0100	[diff] [blame]	718	raw_spin_unlock(&rt_rq->rt_runtime_lock);
				719	}
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	720	}
				721
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	722	#ifdef CONFIG_SMP
				723
				724	static struct task_struct pick_next_earliest_dl_task(struct rq rq, int cpu);
				725
				726	static inline u64 next_deadline(struct rq *rq)
				727	{
				728	struct task_struct *next = pick_next_earliest_dl_task(rq, rq->cpu);
				729
				730	if (next && dl_prio(next->prio))
				731	return next->dl.deadline;
				732	else
				733	return 0;
				734	}
				735
				736	static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
				737	{
				738	struct rq *rq = rq_of_dl_rq(dl_rq);
				739
				740	if (dl_rq->earliest_dl.curr == 0 \|\|
				741	dl_time_before(deadline, dl_rq->earliest_dl.curr)) {
				742	/*
				743	* If the dl_rq had no -deadline tasks, or if the new task
				744	* has shorter deadline than the current one on dl_rq, we
				745	* know that the previous earliest becomes our next earliest,
				746	* as the new task becomes the earliest itself.
				747	*/
				748	dl_rq->earliest_dl.next = dl_rq->earliest_dl.curr;
				749	dl_rq->earliest_dl.curr = deadline;
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	750	cpudl_set(&rq->rd->cpudl, rq->cpu, deadline, 1);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	751	} else if (dl_rq->earliest_dl.next == 0 \|\|
				752	dl_time_before(deadline, dl_rq->earliest_dl.next)) {
				753	/*
				754	* On the other hand, if the new -deadline task has a
				755	* a later deadline than the earliest one on dl_rq, but
				756	* it is earlier than the next (if any), we must
				757	* recompute the next-earliest.
				758	*/
				759	dl_rq->earliest_dl.next = next_deadline(rq);
				760	}
				761	}
				762
				763	static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
				764	{
				765	struct rq *rq = rq_of_dl_rq(dl_rq);
				766
				767	/*
				768	* Since we may have removed our earliest (and/or next earliest)
				769	* task we must recompute them.
				770	*/
				771	if (!dl_rq->dl_nr_running) {
				772	dl_rq->earliest_dl.curr = 0;
				773	dl_rq->earliest_dl.next = 0;
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	774	cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	775	} else {
				776	struct rb_node *leftmost = dl_rq->rb_leftmost;
				777	struct sched_dl_entity *entry;
				778
				779	entry = rb_entry(leftmost, struct sched_dl_entity, rb_node);
				780	dl_rq->earliest_dl.curr = entry->deadline;
				781	dl_rq->earliest_dl.next = next_deadline(rq);
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	782	cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline, 1);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	783	}
				784	}
				785
				786	#else
				787
				788	static inline void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
				789	static inline void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
				790
				791	#endif /* CONFIG_SMP */
				792
				793	static inline
				794	void inc_dl_tasks(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				795	{
				796	int prio = dl_task_of(dl_se)->prio;
				797	u64 deadline = dl_se->deadline;
				798
				799	WARN_ON(!dl_prio(prio));
				800	dl_rq->dl_nr_running++;
Kirill Tkhai	7246544	2014-05-09 03:00:14 +0400	[diff] [blame]	801	add_nr_running(rq_of_dl_rq(dl_rq), 1);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	802
				803	inc_dl_deadline(dl_rq, deadline);
				804	inc_dl_migration(dl_se, dl_rq);
				805	}
				806
				807	static inline
				808	void dec_dl_tasks(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				809	{
				810	int prio = dl_task_of(dl_se)->prio;
				811
				812	WARN_ON(!dl_prio(prio));
				813	WARN_ON(!dl_rq->dl_nr_running);
				814	dl_rq->dl_nr_running--;
Kirill Tkhai	7246544	2014-05-09 03:00:14 +0400	[diff] [blame]	815	sub_nr_running(rq_of_dl_rq(dl_rq), 1);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	816
				817	dec_dl_deadline(dl_rq, dl_se->deadline);
				818	dec_dl_migration(dl_se, dl_rq);
				819	}
				820
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	821	static void __enqueue_dl_entity(struct sched_dl_entity *dl_se)
				822	{
				823	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				824	struct rb_node **link = &dl_rq->rb_root.rb_node;
				825	struct rb_node *parent = NULL;
				826	struct sched_dl_entity *entry;
				827	int leftmost = 1;
				828
				829	BUG_ON(!RB_EMPTY_NODE(&dl_se->rb_node));
				830
				831	while (*link) {
				832	parent = *link;
				833	entry = rb_entry(parent, struct sched_dl_entity, rb_node);
				834	if (dl_time_before(dl_se->deadline, entry->deadline))
				835	link = &parent->rb_left;
				836	else {
				837	link = &parent->rb_right;
				838	leftmost = 0;
				839	}
				840	}
				841
				842	if (leftmost)
				843	dl_rq->rb_leftmost = &dl_se->rb_node;
				844
				845	rb_link_node(&dl_se->rb_node, parent, link);
				846	rb_insert_color(&dl_se->rb_node, &dl_rq->rb_root);
				847
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	848	inc_dl_tasks(dl_se, dl_rq);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	849	}
				850
				851	static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
				852	{
				853	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				854
				855	if (RB_EMPTY_NODE(&dl_se->rb_node))
				856	return;
				857
				858	if (dl_rq->rb_leftmost == &dl_se->rb_node) {
				859	struct rb_node *next_node;
				860
				861	next_node = rb_next(&dl_se->rb_node);
				862	dl_rq->rb_leftmost = next_node;
				863	}
				864
				865	rb_erase(&dl_se->rb_node, &dl_rq->rb_root);
				866	RB_CLEAR_NODE(&dl_se->rb_node);
				867
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	868	dec_dl_tasks(dl_se, dl_rq);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	869	}
				870
				871	static void
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	872	enqueue_dl_entity(struct sched_dl_entity *dl_se,
				873	struct sched_dl_entity *pi_se, int flags)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	874	{
				875	BUG_ON(on_dl_rq(dl_se));
				876
				877	/*
				878	* If this is a wakeup or a new instance, the scheduling
				879	* parameters of the task might need updating. Otherwise,
				880	* we want a replenishment of its runtime.
				881	*/
Luca Abeni	6a503c3	2014-12-17 11:50:31 +0100	[diff] [blame]	882	if (dl_se->dl_new \|\| flags & ENQUEUE_WAKEUP)
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	883	update_dl_entity(dl_se, pi_se);
Luca Abeni	6a503c3	2014-12-17 11:50:31 +0100	[diff] [blame]	884	else if (flags & ENQUEUE_REPLENISH)
				885	replenish_dl_entity(dl_se, pi_se);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	886
				887	__enqueue_dl_entity(dl_se);
				888	}
				889
				890	static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
				891	{
				892	__dequeue_dl_entity(dl_se);
				893	}
				894
				895	static void enqueue_task_dl(struct rq rq, struct task_struct p, int flags)
				896	{
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	897	struct task_struct *pi_task = rt_mutex_get_top_task(p);
				898	struct sched_dl_entity *pi_se = &p->dl;
				899
				900	/*
				901	* Use the scheduling parameters of the top pi-waiter
				902	* task if we have one and its (relative) deadline is
				903	* smaller than our one... OTW we keep our runtime and
				904	* deadline.
				905	*/
Juri Lelli	64be6f1	2014-10-24 10:16:37 +0100	[diff] [blame]	906	if (pi_task && p->dl.dl_boosted && dl_prio(pi_task->normal_prio)) {
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	907	pi_se = &pi_task->dl;
Juri Lelli	64be6f1	2014-10-24 10:16:37 +0100	[diff] [blame]	908	} else if (!dl_prio(p->normal_prio)) {
				909	/*
				910	* Special case in which we have a !SCHED_DEADLINE task
				911	* that is going to be deboosted, but exceedes its
				912	* runtime while doing so. No point in replenishing
				913	* it, as it's going to return back to its original
				914	* scheduling class after this.
				915	*/
				916	BUG_ON(!p->dl.dl_boosted \|\| flags != ENQUEUE_REPLENISH);
				917	return;
				918	}
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	919
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	920	/*
				921	* If p is throttled, we do nothing. In fact, if it exhausted
				922	* its budget it needs a replenishment and, since it now is on
				923	* its rq, the bandwidth timer callback (which clearly has not
				924	* run yet) will take care of this.
				925	*/
Peter Zijlstra	1019a35	2014-11-26 08:44:03 +0800	[diff] [blame]	926	if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH))
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	927	return;
				928
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	929	enqueue_dl_entity(&p->dl, pi_se, flags);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	930
				931	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
				932	enqueue_pushable_dl_task(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	933	}
				934
				935	static void __dequeue_task_dl(struct rq rq, struct task_struct p, int flags)
				936	{
				937	dequeue_dl_entity(&p->dl);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	938	dequeue_pushable_dl_task(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	939	}
				940
				941	static void dequeue_task_dl(struct rq rq, struct task_struct p, int flags)
				942	{
				943	update_curr_dl(rq);
				944	__dequeue_task_dl(rq, p, flags);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	945	}
				946
				947	/*
				948	* Yield task semantic for -deadline tasks is:
				949	*
				950	* get off from the CPU until our next instance, with
				951	* a new runtime. This is of little use now, since we
				952	* don't have a bandwidth reclaiming mechanism. Anyway,
				953	* bandwidth reclaiming is planned for the future, and
				954	* yield_task_dl will indicate that some spare budget
				955	* is available for other task instances to use it.
				956	*/
				957	static void yield_task_dl(struct rq *rq)
				958	{
				959	struct task_struct *p = rq->curr;
				960
				961	/*
				962	* We make the task go to sleep until its current deadline by
				963	* forcing its runtime to zero. This way, update_curr_dl() stops
				964	* it and the bandwidth timer will wake it up and will give it
Juri Lelli	5bfd126	2014-04-15 13:49:04 +0200	[diff] [blame]	965	* new scheduling parameters (thanks to dl_yielded=1).
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	966	*/
				967	if (p->dl.runtime > 0) {
Juri Lelli	5bfd126	2014-04-15 13:49:04 +0200	[diff] [blame]	968	rq->curr->dl.dl_yielded = 1;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	969	p->dl.runtime = 0;
				970	}
Kirill Tkhai	6f1607f	2015-02-04 12:09:32 +0300	[diff] [blame]	971	update_rq_clock(rq);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	972	update_curr_dl(rq);
Wanpeng Li	44fb085	2015-03-10 12:20:00 +0800	[diff] [blame]	973	/*
				974	* Tell update_rq_clock() that we've just updated,
				975	* so we don't do microscopic update in schedule()
				976	* and double the fastpath cost.
				977	*/
				978	rq_clock_skip_update(rq, true);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	979	}
				980
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	981	#ifdef CONFIG_SMP
				982
				983	static int find_later_rq(struct task_struct *task);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	984
				985	static int
				986	select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
				987	{
				988	struct task_struct *curr;
				989	struct rq *rq;
				990
Wanpeng Li	1d7e974	2014-10-14 10:22:39 +0800	[diff] [blame]	991	if (sd_flag != SD_BALANCE_WAKE)
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	992	goto out;
				993
				994	rq = cpu_rq(cpu);
				995
				996	rcu_read_lock();
Jason Low	316c1608d	2015-04-28 13:00:20 -0700	[diff] [blame]	997	curr = READ_ONCE(rq->curr); /* unlocked access */
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	998
				999	/*
				1000	* If we are dealing with a -deadline task, we must
				1001	* decide where to wake it up.
				1002	* If it has a later deadline and the current task
				1003	* on this rq can't move (provided the waking task
				1004	* can!) we prefer to send it somewhere else. On the
				1005	* other hand, if it has a shorter deadline, we
				1006	* try to make it stay here, it might be important.
				1007	*/
				1008	if (unlikely(dl_task(curr)) &&
				1009	(curr->nr_cpus_allowed < 2 \|\|
				1010	!dl_entity_preempt(&p->dl, &curr->dl)) &&
				1011	(p->nr_cpus_allowed > 1)) {
				1012	int target = find_later_rq(p);
				1013
				1014	if (target != -1)
				1015	cpu = target;
				1016	}
				1017	rcu_read_unlock();
				1018
				1019	out:
				1020	return cpu;
				1021	}
				1022
				1023	static void check_preempt_equal_dl(struct rq rq, struct task_struct p)
				1024	{
				1025	/*
				1026	* Current can't be migrated, useless to reschedule,
				1027	* let's hope p can move out.
				1028	*/
				1029	if (rq->curr->nr_cpus_allowed == 1 \|\|
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	1030	cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1031	return;
				1032
				1033	/*
				1034	* p is migratable, so let's not schedule it and
				1035	* see if it is pushed or pulled somewhere else.
				1036	*/
				1037	if (p->nr_cpus_allowed != 1 &&
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	1038	cpudl_find(&rq->rd->cpudl, p, NULL) != -1)
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1039	return;
				1040
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	1041	resched_curr(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1042	}
				1043
Peter Zijlstra	38033c3	2014-01-23 20:32:21 +0100	[diff] [blame]	1044	static int pull_dl_task(struct rq *this_rq);
				1045
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1046	#endif /* CONFIG_SMP */
				1047
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1048	/*
				1049	* Only called when both the current and waking task are -deadline
				1050	* tasks.
				1051	*/
				1052	static void check_preempt_curr_dl(struct rq rq, struct task_struct p,
				1053	int flags)
				1054	{
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1055	if (dl_entity_preempt(&p->dl, &rq->curr->dl)) {
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	1056	resched_curr(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1057	return;
				1058	}
				1059
				1060	#ifdef CONFIG_SMP
				1061	/*
				1062	* In the unlikely case current and p have the same deadline
				1063	* let us try to decide what's the best thing to do...
				1064	*/
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	1065	if ((p->dl.deadline == rq->curr->dl.deadline) &&
				1066	!test_tsk_need_resched(rq->curr))
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1067	check_preempt_equal_dl(rq, p);
				1068	#endif /* CONFIG_SMP */
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1069	}
				1070
				1071	#ifdef CONFIG_SCHED_HRTICK
				1072	static void start_hrtick_dl(struct rq rq, struct task_struct p)
				1073	{
xiaofeng.yan	177ef2a	2014-08-26 03:15:41 +0000	[diff] [blame]	1074	hrtick_start(rq, p->dl.runtime);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1075	}
Wanpeng Li	36ce988	2014-11-11 09:52:26 +0800	[diff] [blame]	1076	#else /* !CONFIG_SCHED_HRTICK */
				1077	static void start_hrtick_dl(struct rq rq, struct task_struct p)
				1078	{
				1079	}
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1080	#endif
				1081
				1082	static struct sched_dl_entity pick_next_dl_entity(struct rq rq,
				1083	struct dl_rq *dl_rq)
				1084	{
				1085	struct rb_node *left = dl_rq->rb_leftmost;
				1086
				1087	if (!left)
				1088	return NULL;
				1089
				1090	return rb_entry(left, struct sched_dl_entity, rb_node);
				1091	}
				1092
Peter Zijlstra	606dba2	2012-02-11 06:05:00 +0100	[diff] [blame]	1093	struct task_struct pick_next_task_dl(struct rq rq, struct task_struct *prev)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1094	{
				1095	struct sched_dl_entity *dl_se;
				1096	struct task_struct *p;
				1097	struct dl_rq *dl_rq;
				1098
				1099	dl_rq = &rq->dl;
				1100
Kirill Tkhai	a1d9a32	2014-04-10 17:38:36 +0400	[diff] [blame]	1101	if (need_pull_dl_task(rq, prev)) {
Peter Zijlstra	38033c3	2014-01-23 20:32:21 +0100	[diff] [blame]	1102	pull_dl_task(rq);
Kirill Tkhai	a1d9a32	2014-04-10 17:38:36 +0400	[diff] [blame]	1103	/*
				1104	* pull_rt_task() can drop (and re-acquire) rq->lock; this
				1105	* means a stop task can slip in, in which case we need to
				1106	* re-start task selection.
				1107	*/
Kirill Tkhai	da0c1e6	2014-08-20 13:47:32 +0400	[diff] [blame]	1108	if (rq->stop && task_on_rq_queued(rq->stop))
Kirill Tkhai	a1d9a32	2014-04-10 17:38:36 +0400	[diff] [blame]	1109	return RETRY_TASK;
				1110	}
				1111
Kirill Tkhai	734ff2a	2014-03-04 19:25:46 +0400	[diff] [blame]	1112	/*
				1113	* When prev is DL, we may throttle it in put_prev_task().
				1114	* So, we update time before we check for dl_nr_running.
				1115	*/
				1116	if (prev->sched_class == &dl_sched_class)
				1117	update_curr_dl(rq);
Peter Zijlstra	38033c3	2014-01-23 20:32:21 +0100	[diff] [blame]	1118
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1119	if (unlikely(!dl_rq->dl_nr_running))
				1120	return NULL;
				1121
Peter Zijlstra	3f1d2a3	2014-02-12 10:49:30 +0100	[diff] [blame]	1122	put_prev_task(rq, prev);
Peter Zijlstra	606dba2	2012-02-11 06:05:00 +0100	[diff] [blame]	1123
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1124	dl_se = pick_next_dl_entity(rq, dl_rq);
				1125	BUG_ON(!dl_se);
				1126
				1127	p = dl_task_of(dl_se);
				1128	p->se.exec_start = rq_clock_task(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1129
				1130	/* Running task will never be pushed. */
Juri Lelli	7136265	2014-01-14 12:03:51 +0100	[diff] [blame]	1131	dequeue_pushable_dl_task(rq, p);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1132
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1133	if (hrtick_enabled(rq))
				1134	start_hrtick_dl(rq, p);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1135
Peter Zijlstra	e3fca9e	2015-06-11 14:46:37 +0200	[diff] [blame^]	1136	queue_push_tasks(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1137
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1138	return p;
				1139	}
				1140
				1141	static void put_prev_task_dl(struct rq rq, struct task_struct p)
				1142	{
				1143	update_curr_dl(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1144
				1145	if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
				1146	enqueue_pushable_dl_task(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1147	}
				1148
				1149	static void task_tick_dl(struct rq rq, struct task_struct p, int queued)
				1150	{
				1151	update_curr_dl(rq);
				1152
Wanpeng Li	a7bebf4	2014-11-26 08:44:01 +0800	[diff] [blame]	1153	/*
				1154	* Even when we have runtime, update_curr_dl() might have resulted in us
				1155	* not being the leftmost task anymore. In that case NEED_RESCHED will
				1156	* be set and schedule() will start a new hrtick for the next task.
				1157	*/
				1158	if (hrtick_enabled(rq) && queued && p->dl.runtime > 0 &&
				1159	is_leftmost(p, &rq->dl))
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1160	start_hrtick_dl(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1161	}
				1162
				1163	static void task_fork_dl(struct task_struct *p)
				1164	{
				1165	/*
				1166	* SCHED_DEADLINE tasks cannot fork and this is achieved through
				1167	* sched_fork()
				1168	*/
				1169	}
				1170
				1171	static void task_dead_dl(struct task_struct *p)
				1172	{
				1173	struct hrtimer *timer = &p->dl.dl_timer;
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	1174	struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
				1175
				1176	/*
				1177	* Since we are TASK_DEAD we won't slip out of the domain!
				1178	*/
				1179	raw_spin_lock_irq(&dl_b->lock);
Peter Zijlstra	40767b0	2015-01-28 15:08:03 +0100	[diff] [blame]	1180	/* XXX we should retain the bw until 0-lag */
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	1181	dl_b->total_bw -= p->dl.dl_bw;
				1182	raw_spin_unlock_irq(&dl_b->lock);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1183
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	1184	hrtimer_cancel(timer);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1185	}
				1186
				1187	static void set_curr_task_dl(struct rq *rq)
				1188	{
				1189	struct task_struct *p = rq->curr;
				1190
				1191	p->se.exec_start = rq_clock_task(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1192
				1193	/* You can't push away the running task */
				1194	dequeue_pushable_dl_task(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1195	}
				1196
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1197	#ifdef CONFIG_SMP
				1198
				1199	/* Only try algorithms three times */
				1200	#define DL_MAX_TRIES 3
				1201
				1202	static int pick_dl_task(struct rq rq, struct task_struct p, int cpu)
				1203	{
				1204	if (!task_running(rq, p) &&
Kirill Tkhai	1ba93d4	2014-09-12 17:42:20 +0400	[diff] [blame]	1205	cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1206	return 1;
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1207	return 0;
				1208	}
				1209
				1210	/* Returns the second earliest -deadline task, NULL otherwise */
				1211	static struct task_struct pick_next_earliest_dl_task(struct rq rq, int cpu)
				1212	{
				1213	struct rb_node *next_node = rq->dl.rb_leftmost;
				1214	struct sched_dl_entity *dl_se;
				1215	struct task_struct *p = NULL;
				1216
				1217	next_node:
				1218	next_node = rb_next(next_node);
				1219	if (next_node) {
				1220	dl_se = rb_entry(next_node, struct sched_dl_entity, rb_node);
				1221	p = dl_task_of(dl_se);
				1222
				1223	if (pick_dl_task(rq, p, cpu))
				1224	return p;
				1225
				1226	goto next_node;
				1227	}
				1228
				1229	return NULL;
				1230	}
				1231
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1232	static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
				1233
				1234	static int find_later_rq(struct task_struct *task)
				1235	{
				1236	struct sched_domain *sd;
Christoph Lameter	4ba2968	2014-08-26 19:12:21 -0500	[diff] [blame]	1237	struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1238	int this_cpu = smp_processor_id();
				1239	int best_cpu, cpu = task_cpu(task);
				1240
				1241	/* Make sure the mask is initialized first */
				1242	if (unlikely(!later_mask))
				1243	return -1;
				1244
				1245	if (task->nr_cpus_allowed == 1)
				1246	return -1;
				1247
Juri Lelli	91ec677	2014-09-19 10:22:41 +0100	[diff] [blame]	1248	/*
				1249	* We have to consider system topology and task affinity
				1250	* first, then we can look for a suitable cpu.
				1251	*/
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	1252	best_cpu = cpudl_find(&task_rq(task)->rd->cpudl,
				1253	task, later_mask);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1254	if (best_cpu == -1)
				1255	return -1;
				1256
				1257	/*
				1258	* If we are here, some target has been found,
				1259	* the most suitable of which is cached in best_cpu.
				1260	* This is, among the runqueues where the current tasks
				1261	* have later deadlines than the task's one, the rq
				1262	* with the latest possible one.
				1263	*
				1264	* Now we check how well this matches with task's
				1265	* affinity and system topology.
				1266	*
				1267	* The last cpu where the task run is our first
				1268	* guess, since it is most likely cache-hot there.
				1269	*/
				1270	if (cpumask_test_cpu(cpu, later_mask))
				1271	return cpu;
				1272	/*
				1273	* Check if this_cpu is to be skipped (i.e., it is
				1274	* not in the mask) or not.
				1275	*/
				1276	if (!cpumask_test_cpu(this_cpu, later_mask))
				1277	this_cpu = -1;
				1278
				1279	rcu_read_lock();
				1280	for_each_domain(cpu, sd) {
				1281	if (sd->flags & SD_WAKE_AFFINE) {
				1282
				1283	/*
				1284	* If possible, preempting this_cpu is
				1285	* cheaper than migrating.
				1286	*/
				1287	if (this_cpu != -1 &&
				1288	cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
				1289	rcu_read_unlock();
				1290	return this_cpu;
				1291	}
				1292
				1293	/*
				1294	* Last chance: if best_cpu is valid and is
				1295	* in the mask, that becomes our choice.
				1296	*/
				1297	if (best_cpu < nr_cpu_ids &&
				1298	cpumask_test_cpu(best_cpu, sched_domain_span(sd))) {
				1299	rcu_read_unlock();
				1300	return best_cpu;
				1301	}
				1302	}
				1303	}
				1304	rcu_read_unlock();
				1305
				1306	/*
				1307	* At this point, all our guesses failed, we just return
				1308	* 'something', and let the caller sort the things out.
				1309	*/
				1310	if (this_cpu != -1)
				1311	return this_cpu;
				1312
				1313	cpu = cpumask_any(later_mask);
				1314	if (cpu < nr_cpu_ids)
				1315	return cpu;
				1316
				1317	return -1;
				1318	}
				1319
				1320	/* Locks the rq it finds */
				1321	static struct rq find_lock_later_rq(struct task_struct task, struct rq *rq)
				1322	{
				1323	struct rq *later_rq = NULL;
				1324	int tries;
				1325	int cpu;
				1326
				1327	for (tries = 0; tries < DL_MAX_TRIES; tries++) {
				1328	cpu = find_later_rq(task);
				1329
				1330	if ((cpu == -1) \|\| (cpu == rq->cpu))
				1331	break;
				1332
				1333	later_rq = cpu_rq(cpu);
				1334
				1335	/* Retry if something changed. */
				1336	if (double_lock_balance(rq, later_rq)) {
				1337	if (unlikely(task_rq(task) != rq \|\|
				1338	!cpumask_test_cpu(later_rq->cpu,
				1339	&task->cpus_allowed) \|\|
Kirill Tkhai	da0c1e6	2014-08-20 13:47:32 +0400	[diff] [blame]	1340	task_running(rq, task) \|\|
				1341	!task_on_rq_queued(task))) {
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1342	double_unlock_balance(rq, later_rq);
				1343	later_rq = NULL;
				1344	break;
				1345	}
				1346	}
				1347
				1348	/*
				1349	* If the rq we found has no -deadline task, or
				1350	* its earliest one has a later deadline than our
				1351	* task, the rq is a good one.
				1352	*/
				1353	if (!later_rq->dl.dl_nr_running \|\|
				1354	dl_time_before(task->dl.deadline,
				1355	later_rq->dl.earliest_dl.curr))
				1356	break;
				1357
				1358	/* Otherwise we try again. */
				1359	double_unlock_balance(rq, later_rq);
				1360	later_rq = NULL;
				1361	}
				1362
				1363	return later_rq;
				1364	}
				1365
				1366	static struct task_struct pick_next_pushable_dl_task(struct rq rq)
				1367	{
				1368	struct task_struct *p;
				1369
				1370	if (!has_pushable_dl_tasks(rq))
				1371	return NULL;
				1372
				1373	p = rb_entry(rq->dl.pushable_dl_tasks_leftmost,
				1374	struct task_struct, pushable_dl_tasks);
				1375
				1376	BUG_ON(rq->cpu != task_cpu(p));
				1377	BUG_ON(task_current(rq, p));
				1378	BUG_ON(p->nr_cpus_allowed <= 1);
				1379
Kirill Tkhai	da0c1e6	2014-08-20 13:47:32 +0400	[diff] [blame]	1380	BUG_ON(!task_on_rq_queued(p));
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1381	BUG_ON(!dl_task(p));
				1382
				1383	return p;
				1384	}
				1385
				1386	/*
				1387	* See if the non running -deadline tasks on this rq
				1388	* can be sent to some other CPU where they can preempt
				1389	* and start executing.
				1390	*/
				1391	static int push_dl_task(struct rq *rq)
				1392	{
				1393	struct task_struct *next_task;
				1394	struct rq *later_rq;
Wanpeng Li	c51b8ab	2014-11-06 15:22:44 +0800	[diff] [blame]	1395	int ret = 0;
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1396
				1397	if (!rq->dl.overloaded)
				1398	return 0;
				1399
				1400	next_task = pick_next_pushable_dl_task(rq);
				1401	if (!next_task)
				1402	return 0;
				1403
				1404	retry:
				1405	if (unlikely(next_task == rq->curr)) {
				1406	WARN_ON(1);
				1407	return 0;
				1408	}
				1409
				1410	/*
				1411	* If next_task preempts rq->curr, and rq->curr
				1412	* can move away, it makes sense to just reschedule
				1413	* without going further in pushing next_task.
				1414	*/
				1415	if (dl_task(rq->curr) &&
				1416	dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
				1417	rq->curr->nr_cpus_allowed > 1) {
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	1418	resched_curr(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1419	return 0;
				1420	}
				1421
				1422	/* We might release rq lock */
				1423	get_task_struct(next_task);
				1424
				1425	/* Will lock the rq it'll find */
				1426	later_rq = find_lock_later_rq(next_task, rq);
				1427	if (!later_rq) {
				1428	struct task_struct *task;
				1429
				1430	/*
				1431	* We must check all this again, since
				1432	* find_lock_later_rq releases rq->lock and it is
				1433	* then possible that next_task has migrated.
				1434	*/
				1435	task = pick_next_pushable_dl_task(rq);
				1436	if (task_cpu(next_task) == rq->cpu && task == next_task) {
				1437	/*
				1438	* The task is still there. We don't try
				1439	* again, some other cpu will pull it when ready.
				1440	*/
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1441	goto out;
				1442	}
				1443
				1444	if (!task)
				1445	/* No more tasks */
				1446	goto out;
				1447
				1448	put_task_struct(next_task);
				1449	next_task = task;
				1450	goto retry;
				1451	}
				1452
				1453	deactivate_task(rq, next_task, 0);
				1454	set_task_cpu(next_task, later_rq->cpu);
				1455	activate_task(later_rq, next_task, 0);
Wanpeng Li	c51b8ab	2014-11-06 15:22:44 +0800	[diff] [blame]	1456	ret = 1;
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1457
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	1458	resched_curr(later_rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1459
				1460	double_unlock_balance(rq, later_rq);
				1461
				1462	out:
				1463	put_task_struct(next_task);
				1464
Wanpeng Li	c51b8ab	2014-11-06 15:22:44 +0800	[diff] [blame]	1465	return ret;
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1466	}
				1467
				1468	static void push_dl_tasks(struct rq *rq)
				1469	{
				1470	/* Terminates as it moves a -deadline task */
				1471	while (push_dl_task(rq))
				1472	;
				1473	}
				1474
				1475	static int pull_dl_task(struct rq *this_rq)
				1476	{
				1477	int this_cpu = this_rq->cpu, ret = 0, cpu;
				1478	struct task_struct *p;
				1479	struct rq *src_rq;
				1480	u64 dmin = LONG_MAX;
				1481
				1482	if (likely(!dl_overloaded(this_rq)))
				1483	return 0;
				1484
				1485	/*
				1486	* Match the barrier from dl_set_overloaded; this guarantees that if we
				1487	* see overloaded we must also see the dlo_mask bit.
				1488	*/
				1489	smp_rmb();
				1490
				1491	for_each_cpu(cpu, this_rq->rd->dlo_mask) {
				1492	if (this_cpu == cpu)
				1493	continue;
				1494
				1495	src_rq = cpu_rq(cpu);
				1496
				1497	/*
				1498	* It looks racy, abd it is! However, as in sched_rt.c,
				1499	* we are fine with this.
				1500	*/
				1501	if (this_rq->dl.dl_nr_running &&
				1502	dl_time_before(this_rq->dl.earliest_dl.curr,
				1503	src_rq->dl.earliest_dl.next))
				1504	continue;
				1505
				1506	/* Might drop this_rq->lock */
				1507	double_lock_balance(this_rq, src_rq);
				1508
				1509	/*
				1510	* If there are no more pullable tasks on the
				1511	* rq, we're done with it.
				1512	*/
				1513	if (src_rq->dl.dl_nr_running <= 1)
				1514	goto skip;
				1515
				1516	p = pick_next_earliest_dl_task(src_rq, this_cpu);
				1517
				1518	/*
				1519	* We found a task to be pulled if:
				1520	* - it preempts our current (if there's one),
				1521	* - it will preempt the last one we pulled (if any).
				1522	*/
				1523	if (p && dl_time_before(p->dl.deadline, dmin) &&
				1524	(!this_rq->dl.dl_nr_running \|\|
				1525	dl_time_before(p->dl.deadline,
				1526	this_rq->dl.earliest_dl.curr))) {
				1527	WARN_ON(p == src_rq->curr);
Kirill Tkhai	da0c1e6	2014-08-20 13:47:32 +0400	[diff] [blame]	1528	WARN_ON(!task_on_rq_queued(p));
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1529
				1530	/*
				1531	* Then we pull iff p has actually an earlier
				1532	* deadline than the current task of its runqueue.
				1533	*/
				1534	if (dl_time_before(p->dl.deadline,
				1535	src_rq->curr->dl.deadline))
				1536	goto skip;
				1537
				1538	ret = 1;
				1539
				1540	deactivate_task(src_rq, p, 0);
				1541	set_task_cpu(p, this_cpu);
				1542	activate_task(this_rq, p, 0);
				1543	dmin = p->dl.deadline;
				1544
				1545	/* Is there any other task even earlier? */
				1546	}
				1547	skip:
				1548	double_unlock_balance(this_rq, src_rq);
				1549	}
				1550
				1551	return ret;
				1552	}
				1553
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1554	/*
				1555	* Since the task is not running and a reschedule is not going to happen
				1556	* anytime soon on its runqueue, we try pushing it away now.
				1557	*/
				1558	static void task_woken_dl(struct rq rq, struct task_struct p)
				1559	{
				1560	if (!task_running(rq, p) &&
				1561	!test_tsk_need_resched(rq->curr) &&
				1562	has_pushable_dl_tasks(rq) &&
				1563	p->nr_cpus_allowed > 1 &&
				1564	dl_task(rq->curr) &&
				1565	(rq->curr->nr_cpus_allowed < 2 \|\|
Wanpeng Li	6b0a563	2014-10-31 06:39:34 +0800	[diff] [blame]	1566	!dl_entity_preempt(&p->dl, &rq->curr->dl))) {
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1567	push_dl_tasks(rq);
				1568	}
				1569	}
				1570
				1571	static void set_cpus_allowed_dl(struct task_struct *p,
				1572	const struct cpumask *new_mask)
				1573	{
				1574	struct rq *rq;
Juri Lelli	7f51412	2014-09-19 10:22:40 +0100	[diff] [blame]	1575	struct root_domain *src_rd;
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1576	int weight;
				1577
				1578	BUG_ON(!dl_task(p));
				1579
Juri Lelli	7f51412	2014-09-19 10:22:40 +0100	[diff] [blame]	1580	rq = task_rq(p);
				1581	src_rd = rq->rd;
				1582	/*
				1583	* Migrating a SCHED_DEADLINE task between exclusive
				1584	* cpusets (different root_domains) entails a bandwidth
				1585	* update. We already made space for us in the destination
				1586	* domain (see cpuset_can_attach()).
				1587	*/
				1588	if (!cpumask_intersects(src_rd->span, new_mask)) {
				1589	struct dl_bw *src_dl_b;
				1590
				1591	src_dl_b = dl_bw_of(cpu_of(rq));
				1592	/*
				1593	* We now free resources of the root_domain we are migrating
				1594	* off. In the worst case, sched_setattr() may temporary fail
				1595	* until we complete the update.
				1596	*/
				1597	raw_spin_lock(&src_dl_b->lock);
				1598	__dl_clear(src_dl_b, p->dl.dl_bw);
				1599	raw_spin_unlock(&src_dl_b->lock);
				1600	}
				1601
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1602	/*
				1603	* Update only if the task is actually running (i.e.,
				1604	* it is on the rq AND it is not throttled).
				1605	*/
				1606	if (!on_dl_rq(&p->dl))
				1607	return;
				1608
				1609	weight = cpumask_weight(new_mask);
				1610
				1611	/*
				1612	* Only update if the process changes its state from whether it
				1613	* can migrate or not.
				1614	*/
				1615	if ((p->nr_cpus_allowed > 1) == (weight > 1))
				1616	return;
				1617
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1618	/*
				1619	* The process used to be able to migrate OR it can now migrate
				1620	*/
				1621	if (weight <= 1) {
				1622	if (!task_current(rq, p))
				1623	dequeue_pushable_dl_task(rq, p);
				1624	BUG_ON(!rq->dl.dl_nr_migratory);
				1625	rq->dl.dl_nr_migratory--;
				1626	} else {
				1627	if (!task_current(rq, p))
				1628	enqueue_pushable_dl_task(rq, p);
				1629	rq->dl.dl_nr_migratory++;
				1630	}
				1631
				1632	update_dl_migration(&rq->dl);
				1633	}
				1634
				1635	/* Assumes rq->lock is held */
				1636	static void rq_online_dl(struct rq *rq)
				1637	{
				1638	if (rq->dl.overloaded)
				1639	dl_set_overload(rq);
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	1640
Xunlei Pang	16b2694	2015-01-19 04:49:36 +0000	[diff] [blame]	1641	cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu);
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	1642	if (rq->dl.dl_nr_running > 0)
				1643	cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1644	}
				1645
				1646	/* Assumes rq->lock is held */
				1647	static void rq_offline_dl(struct rq *rq)
				1648	{
				1649	if (rq->dl.overloaded)
				1650	dl_clear_overload(rq);
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	1651
				1652	cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
Xunlei Pang	16b2694	2015-01-19 04:49:36 +0000	[diff] [blame]	1653	cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1654	}
				1655
				1656	void init_sched_dl_class(void)
				1657	{
				1658	unsigned int i;
				1659
				1660	for_each_possible_cpu(i)
				1661	zalloc_cpumask_var_node(&per_cpu(local_cpu_mask_dl, i),
				1662	GFP_KERNEL, cpu_to_node(i));
				1663	}
				1664
				1665	#endif /* CONFIG_SMP */
				1666
Kirill Tkhai	67dfa1b	2014-10-27 17:40:52 +0300	[diff] [blame]	1667	/*
				1668	* Ensure p's dl_timer is cancelled. May drop rq->lock for a while.
				1669	*/
				1670	static void cancel_dl_timer(struct rq rq, struct task_struct p)
				1671	{
				1672	struct hrtimer *dl_timer = &p->dl.dl_timer;
				1673
				1674	/* Nobody will change task's class if pi_lock is held */
				1675	lockdep_assert_held(&p->pi_lock);
				1676
				1677	if (hrtimer_active(dl_timer)) {
				1678	int ret = hrtimer_try_to_cancel(dl_timer);
				1679
				1680	if (unlikely(ret == -1)) {
				1681	/*
				1682	* Note, p may migrate OR new deadline tasks
				1683	* may appear in rq when we are unlocking it.
				1684	* A caller of us must be fine with that.
				1685	*/
				1686	raw_spin_unlock(&rq->lock);
				1687	hrtimer_cancel(dl_timer);
				1688	raw_spin_lock(&rq->lock);
				1689	}
				1690	}
				1691	}
				1692
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1693	static void switched_from_dl(struct rq rq, struct task_struct p)
				1694	{
Peter Zijlstra	40767b0	2015-01-28 15:08:03 +0100	[diff] [blame]	1695	/* XXX we should retain the bw until 0-lag */
Kirill Tkhai	67dfa1b	2014-10-27 17:40:52 +0300	[diff] [blame]	1696	cancel_dl_timer(rq, p);
Juri Lelli	a5e7be3	2014-09-19 10:22:39 +0100	[diff] [blame]	1697	__dl_clear_params(p);
				1698
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1699	/*
				1700	* Since this might be the only -deadline task on the rq,
				1701	* this is the right place to try to pull some other one
				1702	* from an overloaded cpu, if any.
				1703	*/
Wanpeng Li	cd66091	2014-10-31 06:39:35 +0800	[diff] [blame]	1704	if (!task_on_rq_queued(p) \|\| rq->dl.dl_nr_running)
				1705	return;
				1706
				1707	if (pull_dl_task(rq))
				1708	resched_curr(rq);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1709	}
				1710
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1711	/*
				1712	* When switching to -deadline, we may overload the rq, then
				1713	* we try to push someone off, if possible.
				1714	*/
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1715	static void switched_to_dl(struct rq rq, struct task_struct p)
				1716	{
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1717	int check_resched = 1;
				1718
Kirill Tkhai	da0c1e6	2014-08-20 13:47:32 +0400	[diff] [blame]	1719	if (task_on_rq_queued(p) && rq->curr != p) {
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1720	#ifdef CONFIG_SMP
Wanpeng Li	d9aade7a	2014-10-22 08:36:43 +0800	[diff] [blame]	1721	if (p->nr_cpus_allowed > 1 && rq->dl.overloaded &&
				1722	push_dl_task(rq) && rq != task_rq(p))
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1723	/* Only reschedule if pushing failed */
				1724	check_resched = 0;
				1725	#endif /* CONFIG_SMP */
Kirill Tkhai	f3a7e1a	2014-10-21 20:35:56 +0400	[diff] [blame]	1726	if (check_resched) {
				1727	if (dl_task(rq->curr))
				1728	check_preempt_curr_dl(rq, p, 0);
				1729	else
				1730	resched_curr(rq);
				1731	}
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1732	}
				1733	}
				1734
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1735	/*
				1736	* If the scheduling parameters of a -deadline task changed,
				1737	* a push or pull operation might be needed.
				1738	*/
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1739	static void prio_changed_dl(struct rq rq, struct task_struct p,
				1740	int oldprio)
				1741	{
Kirill Tkhai	da0c1e6	2014-08-20 13:47:32 +0400	[diff] [blame]	1742	if (task_on_rq_queued(p) \|\| rq->curr == p) {
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1743	#ifdef CONFIG_SMP
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1744	/*
				1745	* This might be too much, but unfortunately
				1746	* we don't have the old deadline value, and
				1747	* we can't argue if the task is increasing
				1748	* or lowering its prio, so...
				1749	*/
				1750	if (!rq->dl.overloaded)
				1751	pull_dl_task(rq);
				1752
				1753	/*
				1754	* If we now have a earlier deadline task than p,
				1755	* then reschedule, provided p is still on this
				1756	* runqueue.
				1757	*/
				1758	if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline) &&
				1759	rq->curr == p)
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	1760	resched_curr(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1761	#else
				1762	/*
				1763	* Again, we don't know if p has a earlier
				1764	* or later deadline, so let's blindly set a
				1765	* (maybe not needed) rescheduling point.
				1766	*/
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	1767	resched_curr(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1768	#endif /* CONFIG_SMP */
				1769	} else
				1770	switched_to_dl(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1771	}
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1772
				1773	const struct sched_class dl_sched_class = {
				1774	.next = &rt_sched_class,
				1775	.enqueue_task = enqueue_task_dl,
				1776	.dequeue_task = dequeue_task_dl,
				1777	.yield_task = yield_task_dl,
				1778
				1779	.check_preempt_curr = check_preempt_curr_dl,
				1780
				1781	.pick_next_task = pick_next_task_dl,
				1782	.put_prev_task = put_prev_task_dl,
				1783
				1784	#ifdef CONFIG_SMP
				1785	.select_task_rq = select_task_rq_dl,
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1786	.set_cpus_allowed = set_cpus_allowed_dl,
				1787	.rq_online = rq_online_dl,
				1788	.rq_offline = rq_offline_dl,
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1789	.task_woken = task_woken_dl,
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1790	#endif
				1791
				1792	.set_curr_task = set_curr_task_dl,
				1793	.task_tick = task_tick_dl,
				1794	.task_fork = task_fork_dl,
				1795	.task_dead = task_dead_dl,
				1796
				1797	.prio_changed = prio_changed_dl,
				1798	.switched_from = switched_from_dl,
				1799	.switched_to = switched_to_dl,
Stanislaw Gruszka	6e99891	2014-11-12 16:58:44 +0100	[diff] [blame]	1800
				1801	.update_curr = update_curr_dl,
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1802	};
Wanpeng Li	acb3213	2014-10-31 06:39:33 +0800	[diff] [blame]	1803
				1804	#ifdef CONFIG_SCHED_DEBUG
				1805	extern void print_dl_rq(struct seq_file m, int cpu, struct dl_rq dl_rq);
				1806
				1807	void print_dl_stats(struct seq_file *m, int cpu)
				1808	{
				1809	print_dl_rq(m, cpu, &cpu_rq(cpu)->dl);
				1810	}
				1811	#endif /* CONFIG_SCHED_DEBUG */