Blame - kernel/sched/deadline.c - github.com/raspberrypi/raspberrypi-kernel

blob: 5e2f99bd5ce064254082c7e5c640385d5bbb8489 [file] [log] [blame]

Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1	/*
				2	* Deadline Scheduling Class (SCHED_DEADLINE)
				3	*
				4	* Earliest Deadline First (EDF) + Constant Bandwidth Server (CBS).
				5	*
				6	* Tasks that periodically executes their instances for less than their
				7	* runtime won't miss any of their deadlines.
				8	* Tasks that are not periodic or sporadic or that tries to execute more
				9	* than their reserved bandwidth will be slowed down (and may potentially
				10	* miss some of their deadlines), and won't affect any other task.
				11	*
				12	* Copyright (C) 2012 Dario Faggioli <raistlin@linux.it>,
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	13	* Juri Lelli <juri.lelli@gmail.com>,
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	14	* Michael Trimarchi <michael@amarulasolutions.com>,
				15	* Fabio Checconi <fchecconi@gmail.com>
				16	*/
				17	#include "sched.h"
				18
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	19	#include <linux/slab.h>
				20
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	21	struct dl_bandwidth def_dl_bandwidth;
				22
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	23	static inline struct task_struct dl_task_of(struct sched_dl_entity dl_se)
				24	{
				25	return container_of(dl_se, struct task_struct, dl);
				26	}
				27
				28	static inline struct rq rq_of_dl_rq(struct dl_rq dl_rq)
				29	{
				30	return container_of(dl_rq, struct rq, dl);
				31	}
				32
				33	static inline struct dl_rq dl_rq_of_se(struct sched_dl_entity dl_se)
				34	{
				35	struct task_struct *p = dl_task_of(dl_se);
				36	struct rq *rq = task_rq(p);
				37
				38	return &rq->dl;
				39	}
				40
				41	static inline int on_dl_rq(struct sched_dl_entity *dl_se)
				42	{
				43	return !RB_EMPTY_NODE(&dl_se->rb_node);
				44	}
				45
				46	static inline int is_leftmost(struct task_struct p, struct dl_rq dl_rq)
				47	{
				48	struct sched_dl_entity *dl_se = &p->dl;
				49
				50	return dl_rq->rb_leftmost == &dl_se->rb_node;
				51	}
				52
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	53	void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime)
				54	{
				55	raw_spin_lock_init(&dl_b->dl_runtime_lock);
				56	dl_b->dl_period = period;
				57	dl_b->dl_runtime = runtime;
				58	}
				59
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	60	void init_dl_bw(struct dl_bw *dl_b)
				61	{
				62	raw_spin_lock_init(&dl_b->lock);
				63	raw_spin_lock(&def_dl_bandwidth.dl_runtime_lock);
Peter Zijlstra	1724813	2013-12-17 12:44:49 +0100	[diff] [blame]	64	if (global_rt_runtime() == RUNTIME_INF)
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	65	dl_b->bw = -1;
				66	else
Peter Zijlstra	1724813	2013-12-17 12:44:49 +0100	[diff] [blame]	67	dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime());
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	68	raw_spin_unlock(&def_dl_bandwidth.dl_runtime_lock);
				69	dl_b->total_bw = 0;
				70	}
				71
Abel Vesa	07c54f7	2015-03-03 13:50:27 +0200	[diff] [blame^]	72	void init_dl_rq(struct dl_rq *dl_rq)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	73	{
				74	dl_rq->rb_root = RB_ROOT;
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	75
				76	#ifdef CONFIG_SMP
				77	/* zero means no -deadline tasks */
				78	dl_rq->earliest_dl.curr = dl_rq->earliest_dl.next = 0;
				79
				80	dl_rq->dl_nr_migratory = 0;
				81	dl_rq->overloaded = 0;
				82	dl_rq->pushable_dl_tasks_root = RB_ROOT;
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	83	#else
				84	init_dl_bw(&dl_rq->dl_bw);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	85	#endif
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	86	}
				87
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	88	#ifdef CONFIG_SMP
				89
				90	static inline int dl_overloaded(struct rq *rq)
				91	{
				92	return atomic_read(&rq->rd->dlo_count);
				93	}
				94
				95	static inline void dl_set_overload(struct rq *rq)
				96	{
				97	if (!rq->online)
				98	return;
				99
				100	cpumask_set_cpu(rq->cpu, rq->rd->dlo_mask);
				101	/*
				102	* Must be visible before the overload count is
				103	* set (as in sched_rt.c).
				104	*
				105	* Matched by the barrier in pull_dl_task().
				106	*/
				107	smp_wmb();
				108	atomic_inc(&rq->rd->dlo_count);
				109	}
				110
				111	static inline void dl_clear_overload(struct rq *rq)
				112	{
				113	if (!rq->online)
				114	return;
				115
				116	atomic_dec(&rq->rd->dlo_count);
				117	cpumask_clear_cpu(rq->cpu, rq->rd->dlo_mask);
				118	}
				119
				120	static void update_dl_migration(struct dl_rq *dl_rq)
				121	{
Kirill Tkhai	995b9ea	2014-02-18 02:24:13 +0400	[diff] [blame]	122	if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_running > 1) {
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	123	if (!dl_rq->overloaded) {
				124	dl_set_overload(rq_of_dl_rq(dl_rq));
				125	dl_rq->overloaded = 1;
				126	}
				127	} else if (dl_rq->overloaded) {
				128	dl_clear_overload(rq_of_dl_rq(dl_rq));
				129	dl_rq->overloaded = 0;
				130	}
				131	}
				132
				133	static void inc_dl_migration(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				134	{
				135	struct task_struct *p = dl_task_of(dl_se);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	136
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	137	if (p->nr_cpus_allowed > 1)
				138	dl_rq->dl_nr_migratory++;
				139
				140	update_dl_migration(dl_rq);
				141	}
				142
				143	static void dec_dl_migration(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				144	{
				145	struct task_struct *p = dl_task_of(dl_se);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	146
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	147	if (p->nr_cpus_allowed > 1)
				148	dl_rq->dl_nr_migratory--;
				149
				150	update_dl_migration(dl_rq);
				151	}
				152
				153	/*
				154	* The list of pushable -deadline task is not a plist, like in
				155	* sched_rt.c, it is an rb-tree with tasks ordered by deadline.
				156	*/
				157	static void enqueue_pushable_dl_task(struct rq rq, struct task_struct p)
				158	{
				159	struct dl_rq *dl_rq = &rq->dl;
				160	struct rb_node **link = &dl_rq->pushable_dl_tasks_root.rb_node;
				161	struct rb_node *parent = NULL;
				162	struct task_struct *entry;
				163	int leftmost = 1;
				164
				165	BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks));
				166
				167	while (*link) {
				168	parent = *link;
				169	entry = rb_entry(parent, struct task_struct,
				170	pushable_dl_tasks);
				171	if (dl_entity_preempt(&p->dl, &entry->dl))
				172	link = &parent->rb_left;
				173	else {
				174	link = &parent->rb_right;
				175	leftmost = 0;
				176	}
				177	}
				178
				179	if (leftmost)
				180	dl_rq->pushable_dl_tasks_leftmost = &p->pushable_dl_tasks;
				181
				182	rb_link_node(&p->pushable_dl_tasks, parent, link);
				183	rb_insert_color(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);
				184	}
				185
				186	static void dequeue_pushable_dl_task(struct rq rq, struct task_struct p)
				187	{
				188	struct dl_rq *dl_rq = &rq->dl;
				189
				190	if (RB_EMPTY_NODE(&p->pushable_dl_tasks))
				191	return;
				192
				193	if (dl_rq->pushable_dl_tasks_leftmost == &p->pushable_dl_tasks) {
				194	struct rb_node *next_node;
				195
				196	next_node = rb_next(&p->pushable_dl_tasks);
				197	dl_rq->pushable_dl_tasks_leftmost = next_node;
				198	}
				199
				200	rb_erase(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);
				201	RB_CLEAR_NODE(&p->pushable_dl_tasks);
				202	}
				203
				204	static inline int has_pushable_dl_tasks(struct rq *rq)
				205	{
				206	return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root);
				207	}
				208
				209	static int push_dl_task(struct rq *rq);
				210
Peter Zijlstra	dc87734	2014-02-12 15:47:29 +0100	[diff] [blame]	211	static inline bool need_pull_dl_task(struct rq rq, struct task_struct prev)
				212	{
				213	return dl_task(prev);
				214	}
				215
				216	static inline void set_post_schedule(struct rq *rq)
				217	{
				218	rq->post_schedule = has_pushable_dl_tasks(rq);
				219	}
				220
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	221	#else
				222
				223	static inline
				224	void enqueue_pushable_dl_task(struct rq rq, struct task_struct p)
				225	{
				226	}
				227
				228	static inline
				229	void dequeue_pushable_dl_task(struct rq rq, struct task_struct p)
				230	{
				231	}
				232
				233	static inline
				234	void inc_dl_migration(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				235	{
				236	}
				237
				238	static inline
				239	void dec_dl_migration(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				240	{
				241	}
				242
Peter Zijlstra	dc87734	2014-02-12 15:47:29 +0100	[diff] [blame]	243	static inline bool need_pull_dl_task(struct rq rq, struct task_struct prev)
				244	{
				245	return false;
				246	}
				247
				248	static inline int pull_dl_task(struct rq *rq)
				249	{
				250	return 0;
				251	}
				252
				253	static inline void set_post_schedule(struct rq *rq)
				254	{
				255	}
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	256	#endif /* CONFIG_SMP */
				257
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	258	static void enqueue_task_dl(struct rq rq, struct task_struct p, int flags);
				259	static void __dequeue_task_dl(struct rq rq, struct task_struct p, int flags);
				260	static void check_preempt_curr_dl(struct rq rq, struct task_struct p,
				261	int flags);
				262
				263	/*
				264	* We are being explicitly informed that a new instance is starting,
				265	* and this means that:
				266	* - the absolute deadline of the entity has to be placed at
				267	* current time + relative deadline;
				268	* - the runtime of the entity has to be set to the maximum value.
				269	*
				270	* The capability of specifying such event is useful whenever a -deadline
				271	* entity wants to (try to!) synchronize its behaviour with the scheduler's
				272	* one, and to (try to!) reconcile itself with its own scheduling
				273	* parameters.
				274	*/
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	275	static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se,
				276	struct sched_dl_entity *pi_se)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	277	{
				278	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				279	struct rq *rq = rq_of_dl_rq(dl_rq);
				280
				281	WARN_ON(!dl_se->dl_new \|\| dl_se->dl_throttled);
				282
				283	/*
				284	* We use the regular wall clock time to set deadlines in the
				285	* future; in fact, we must consider execution overheads (time
				286	* spent on hardirq context, etc.).
				287	*/
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	288	dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
				289	dl_se->runtime = pi_se->dl_runtime;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	290	dl_se->dl_new = 0;
				291	}
				292
				293	/*
				294	* Pure Earliest Deadline First (EDF) scheduling does not deal with the
				295	* possibility of a entity lasting more than what it declared, and thus
				296	* exhausting its runtime.
				297	*
				298	* Here we are interested in making runtime overrun possible, but we do
				299	* not want a entity which is misbehaving to affect the scheduling of all
				300	* other entities.
				301	* Therefore, a budgeting strategy called Constant Bandwidth Server (CBS)
				302	* is used, in order to confine each entity within its own bandwidth.
				303	*
				304	* This function deals exactly with that, and ensures that when the runtime
				305	* of a entity is replenished, its deadline is also postponed. That ensures
				306	* the overrunning entity can't interfere with other entity in the system and
				307	* can't make them miss their deadlines. Reasons why this kind of overruns
				308	* could happen are, typically, a entity voluntarily trying to overcome its
xiaofeng.yan	1b09d29	2014-07-07 05:59:04 +0000	[diff] [blame]	309	* runtime, or it just underestimated it during sched_setattr().
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	310	*/
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	311	static void replenish_dl_entity(struct sched_dl_entity *dl_se,
				312	struct sched_dl_entity *pi_se)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	313	{
				314	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				315	struct rq *rq = rq_of_dl_rq(dl_rq);
				316
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	317	BUG_ON(pi_se->dl_runtime <= 0);
				318
				319	/*
				320	* This could be the case for a !-dl task that is boosted.
				321	* Just go with full inherited parameters.
				322	*/
				323	if (dl_se->dl_deadline == 0) {
				324	dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
				325	dl_se->runtime = pi_se->dl_runtime;
				326	}
				327
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	328	/*
				329	* We keep moving the deadline away until we get some
				330	* available runtime for the entity. This ensures correct
				331	* handling of situations where the runtime overrun is
				332	* arbitrary large.
				333	*/
				334	while (dl_se->runtime <= 0) {
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	335	dl_se->deadline += pi_se->dl_period;
				336	dl_se->runtime += pi_se->dl_runtime;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	337	}
				338
				339	/*
				340	* At this point, the deadline really should be "in
				341	* the future" with respect to rq->clock. If it's
				342	* not, we are, for some reason, lagging too much!
				343	* Anyway, after having warn userspace abut that,
				344	* we still try to keep the things running by
				345	* resetting the deadline and the budget of the
				346	* entity.
				347	*/
				348	if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
John Stultz	c224815	2014-06-04 16:11:41 -0700	[diff] [blame]	349	printk_deferred_once("sched: DL replenish lagged to much\n");
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	350	dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
				351	dl_se->runtime = pi_se->dl_runtime;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	352	}
Peter Zijlstra	1019a35	2014-11-26 08:44:03 +0800	[diff] [blame]	353
				354	if (dl_se->dl_yielded)
				355	dl_se->dl_yielded = 0;
				356	if (dl_se->dl_throttled)
				357	dl_se->dl_throttled = 0;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	358	}
				359
				360	/*
				361	* Here we check if --at time t-- an entity (which is probably being
				362	* [re]activated or, in general, enqueued) can use its remaining runtime
				363	* and its current deadline _without_ exceeding the bandwidth it is
				364	* assigned (function returns true if it can't). We are in fact applying
				365	* one of the CBS rules: when a task wakes up, if the residual runtime
				366	* over residual deadline fits within the allocated bandwidth, then we
				367	* can keep the current (absolute) deadline and residual budget without
				368	* disrupting the schedulability of the system. Otherwise, we should
				369	* refill the runtime and set the deadline a period in the future,
				370	* because keeping the current (absolute) deadline of the task would
Dario Faggioli	712e5e3	2014-01-27 12:20:15 +0100	[diff] [blame]	371	* result in breaking guarantees promised to other tasks (refer to
				372	* Documentation/scheduler/sched-deadline.txt for more informations).
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	373	*
				374	* This function returns true if:
				375	*
Harald Gustafsson	755378a	2013-11-07 14:43:40 +0100	[diff] [blame]	376	* runtime / (deadline - t) > dl_runtime / dl_period ,
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	377	*
				378	* IOW we can't recycle current parameters.
Harald Gustafsson	755378a	2013-11-07 14:43:40 +0100	[diff] [blame]	379	*
				380	* Notice that the bandwidth check is done against the period. For
				381	* task with deadline equal to period this is the same of using
				382	* dl_deadline instead of dl_period in the equation above.
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	383	*/
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	384	static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
				385	struct sched_dl_entity *pi_se, u64 t)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	386	{
				387	u64 left, right;
				388
				389	/*
				390	* left and right are the two sides of the equation above,
				391	* after a bit of shuffling to use multiplications instead
				392	* of divisions.
				393	*
				394	* Note that none of the time values involved in the two
				395	* multiplications are absolute: dl_deadline and dl_runtime
				396	* are the relative deadline and the maximum runtime of each
				397	* instance, runtime is the runtime left for the last instance
				398	* and (deadline - t), since t is rq->clock, is the time left
				399	* to the (absolute) deadline. Even if overflowing the u64 type
				400	* is very unlikely to occur in both cases, here we scale down
				401	* as we want to avoid that risk at all. Scaling down by 10
				402	* means that we reduce granularity to 1us. We are fine with it,
				403	* since this is only a true/false check and, anyway, thinking
				404	* of anything below microseconds resolution is actually fiction
				405	* (but still we want to give the user that illusion >;).
				406	*/
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	407	left = (pi_se->dl_period >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
				408	right = ((dl_se->deadline - t) >> DL_SCALE) *
				409	(pi_se->dl_runtime >> DL_SCALE);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	410
				411	return dl_time_before(right, left);
				412	}
				413
				414	/*
				415	* When a -deadline entity is queued back on the runqueue, its runtime and
				416	* deadline might need updating.
				417	*
				418	* The policy here is that we update the deadline of the entity only if:
				419	* - the current deadline is in the past,
				420	* - using the remaining runtime with the current deadline would make
				421	* the entity exceed its bandwidth.
				422	*/
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	423	static void update_dl_entity(struct sched_dl_entity *dl_se,
				424	struct sched_dl_entity *pi_se)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	425	{
				426	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				427	struct rq *rq = rq_of_dl_rq(dl_rq);
				428
				429	/*
				430	* The arrival of a new instance needs special treatment, i.e.,
				431	* the actual scheduling parameters have to be "renewed".
				432	*/
				433	if (dl_se->dl_new) {
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	434	setup_new_dl_entity(dl_se, pi_se);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	435	return;
				436	}
				437
				438	if (dl_time_before(dl_se->deadline, rq_clock(rq)) \|\|
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	439	dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
				440	dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
				441	dl_se->runtime = pi_se->dl_runtime;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	442	}
				443	}
				444
				445	/*
				446	* If the entity depleted all its runtime, and if we want it to sleep
				447	* while waiting for some new execution time to become available, we
				448	* set the bandwidth enforcement timer to the replenishment instant
				449	* and try to activate it.
				450	*
				451	* Notice that it is important for the caller to know if the timer
				452	* actually started or not (i.e., the replenishment instant is in
				453	* the future or in the past).
				454	*/
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	455	static int start_dl_timer(struct sched_dl_entity *dl_se, bool boosted)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	456	{
				457	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				458	struct rq *rq = rq_of_dl_rq(dl_rq);
				459	ktime_t now, act;
				460	ktime_t soft, hard;
				461	unsigned long range;
				462	s64 delta;
				463
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	464	if (boosted)
				465	return 0;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	466	/*
				467	* We want the timer to fire at the deadline, but considering
				468	* that it is actually coming from rq->clock and not from
				469	* hrtimer's time base reading.
				470	*/
				471	act = ns_to_ktime(dl_se->deadline);
				472	now = hrtimer_cb_get_time(&dl_se->dl_timer);
				473	delta = ktime_to_ns(now) - rq_clock(rq);
				474	act = ktime_add_ns(act, delta);
				475
				476	/*
				477	* If the expiry time already passed, e.g., because the value
				478	* chosen as the deadline is too small, don't even try to
				479	* start the timer in the past!
				480	*/
				481	if (ktime_us_delta(act, now) < 0)
				482	return 0;
				483
				484	hrtimer_set_expires(&dl_se->dl_timer, act);
				485
				486	soft = hrtimer_get_softexpires(&dl_se->dl_timer);
				487	hard = hrtimer_get_expires(&dl_se->dl_timer);
				488	range = ktime_to_ns(ktime_sub(hard, soft));
				489	__hrtimer_start_range_ns(&dl_se->dl_timer, soft,
				490	range, HRTIMER_MODE_ABS, 0);
				491
				492	return hrtimer_active(&dl_se->dl_timer);
				493	}
				494
				495	/*
				496	* This is the bandwidth enforcement timer callback. If here, we know
				497	* a task is not on its dl_rq, since the fact that the timer was running
				498	* means the task is throttled and needs a runtime replenishment.
				499	*
				500	* However, what we actually do depends on the fact the task is active,
				501	* (it is on its rq) or has been removed from there by a call to
				502	* dequeue_task_dl(). In the former case we must issue the runtime
				503	* replenishment and add the task back to the dl_rq; in the latter, we just
				504	* do nothing but clearing dl_throttled, so that runtime and deadline
				505	* updating (and the queueing back to dl_rq) will be done by the
				506	* next call to enqueue_task_dl().
				507	*/
				508	static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
				509	{
				510	struct sched_dl_entity *dl_se = container_of(timer,
				511	struct sched_dl_entity,
				512	dl_timer);
				513	struct task_struct *p = dl_task_of(dl_se);
Peter Zijlstra	3960c8c	2015-02-17 13:22:25 +0100	[diff] [blame]	514	unsigned long flags;
Kirill Tkhai	0f397f2	2014-05-20 13:33:42 +0400	[diff] [blame]	515	struct rq *rq;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	516
Peter Zijlstra	3960c8c	2015-02-17 13:22:25 +0100	[diff] [blame]	517	rq = task_rq_lock(current, &flags);
Kirill Tkhai	0f397f2	2014-05-20 13:33:42 +0400	[diff] [blame]	518
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	519	/*
Juri Lelli	aee38ea	2014-10-24 10:16:38 +0100	[diff] [blame]	520	* We need to take care of several possible races here:
				521	*
				522	* - the task might have changed its scheduling policy
				523	* to something different than SCHED_DEADLINE
				524	* - the task might have changed its reservation parameters
				525	* (through sched_setattr())
				526	* - the task might have been boosted by someone else and
				527	* might be in the boosting/deboosting path
				528	*
				529	* In all this cases we bail out, as the task is already
				530	* in the runqueue or is going to be enqueued back anyway.
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	531	*/
Juri Lelli	aee38ea	2014-10-24 10:16:38 +0100	[diff] [blame]	532	if (!dl_task(p) \|\| dl_se->dl_new \|\|
				533	dl_se->dl_boosted \|\| !dl_se->dl_throttled)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	534	goto unlock;
				535
				536	sched_clock_tick();
				537	update_rq_clock(rq);
Kirill Tkhai	a79ec89	2015-02-16 15:38:34 +0300	[diff] [blame]	538
				539	/*
				540	* If the throttle happened during sched-out; like:
				541	*
				542	* schedule()
				543	* deactivate_task()
				544	* dequeue_task_dl()
				545	* update_curr_dl()
				546	* start_dl_timer()
				547	* __dequeue_task_dl()
				548	* prev->on_rq = 0;
				549	*
				550	* We can be both throttled and !queued. Replenish the counter
				551	* but do not enqueue -- wait for our wakeup to do that.
				552	*/
				553	if (!task_on_rq_queued(p)) {
				554	replenish_dl_entity(dl_se, dl_se);
				555	goto unlock;
				556	}
				557
Peter Zijlstra	1019a35	2014-11-26 08:44:03 +0800	[diff] [blame]	558	enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
				559	if (dl_task(rq->curr))
				560	check_preempt_curr_dl(rq, p, 0);
				561	else
				562	resched_curr(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	563	#ifdef CONFIG_SMP
Peter Zijlstra	1019a35	2014-11-26 08:44:03 +0800	[diff] [blame]	564	/*
				565	* Queueing this task back might have overloaded rq,
				566	* check if we need to kick someone away.
				567	*/
				568	if (has_pushable_dl_tasks(rq))
				569	push_dl_task(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	570	#endif
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	571	unlock:
Peter Zijlstra	3960c8c	2015-02-17 13:22:25 +0100	[diff] [blame]	572	task_rq_unlock(rq, current, &flags);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	573
				574	return HRTIMER_NORESTART;
				575	}
				576
				577	void init_dl_task_timer(struct sched_dl_entity *dl_se)
				578	{
				579	struct hrtimer *timer = &dl_se->dl_timer;
				580
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	581	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
				582	timer->function = dl_task_timer;
				583	}
				584
				585	static
				586	int dl_runtime_exceeded(struct rq rq, struct sched_dl_entity dl_se)
				587	{
Luca Abeni	269ad80	2014-12-17 11:50:32 +0100	[diff] [blame]	588	return (dl_se->runtime <= 0);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	589	}
				590
Juri Lelli	faa5993	2014-02-21 11:37:15 +0100	[diff] [blame]	591	extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
				592
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	593	/*
				594	* Update the current task's runtime statistics (provided it is still
				595	* a -deadline task and has not been removed from the dl_rq).
				596	*/
				597	static void update_curr_dl(struct rq *rq)
				598	{
				599	struct task_struct *curr = rq->curr;
				600	struct sched_dl_entity *dl_se = &curr->dl;
				601	u64 delta_exec;
				602
				603	if (!dl_task(curr) \|\| !on_dl_rq(dl_se))
				604	return;
				605
				606	/*
				607	* Consumed budget is computed considering the time as
				608	* observed by schedulable tasks (excluding time spent
				609	* in hardirq context, etc.). Deadlines are instead
				610	* computed using hard walltime. This seems to be the more
				611	* natural solution, but the full ramifications of this
				612	* approach need further study.
				613	*/
				614	delta_exec = rq_clock_task(rq) - curr->se.exec_start;
Kirill Tkhai	734ff2a	2014-03-04 19:25:46 +0400	[diff] [blame]	615	if (unlikely((s64)delta_exec <= 0))
				616	return;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	617
				618	schedstat_set(curr->se.statistics.exec_max,
				619	max(curr->se.statistics.exec_max, delta_exec));
				620
				621	curr->se.sum_exec_runtime += delta_exec;
				622	account_group_exec_runtime(curr, delta_exec);
				623
				624	curr->se.exec_start = rq_clock_task(rq);
				625	cpuacct_charge(curr, delta_exec);
				626
Dario Faggioli	239be4a	2013-11-07 14:43:39 +0100	[diff] [blame]	627	sched_rt_avg_update(rq, delta_exec);
				628
Wanpeng Li	8049688	2014-10-31 06:39:32 +0800	[diff] [blame]	629	dl_se->runtime -= dl_se->dl_yielded ? 0 : delta_exec;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	630	if (dl_runtime_exceeded(rq, dl_se)) {
Peter Zijlstra	1019a35	2014-11-26 08:44:03 +0800	[diff] [blame]	631	dl_se->dl_throttled = 1;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	632	__dequeue_task_dl(rq, curr, 0);
Peter Zijlstra	1019a35	2014-11-26 08:44:03 +0800	[diff] [blame]	633	if (unlikely(!start_dl_timer(dl_se, curr->dl.dl_boosted)))
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	634	enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
				635
				636	if (!is_leftmost(curr, &rq->dl))
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	637	resched_curr(rq);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	638	}
Peter Zijlstra	1724813	2013-12-17 12:44:49 +0100	[diff] [blame]	639
				640	/*
				641	* Because -- for now -- we share the rt bandwidth, we need to
				642	* account our runtime there too, otherwise actual rt tasks
				643	* would be able to exceed the shared quota.
				644	*
				645	* Account to the root rt group for now.
				646	*
				647	* The solution we're working towards is having the RT groups scheduled
				648	* using deadline servers -- however there's a few nasties to figure
				649	* out before that can happen.
				650	*/
				651	if (rt_bandwidth_enabled()) {
				652	struct rt_rq *rt_rq = &rq->rt;
				653
				654	raw_spin_lock(&rt_rq->rt_runtime_lock);
Peter Zijlstra	1724813	2013-12-17 12:44:49 +0100	[diff] [blame]	655	/*
				656	* We'll let actual RT tasks worry about the overflow here, we
Juri Lelli	faa5993	2014-02-21 11:37:15 +0100	[diff] [blame]	657	* have our own CBS to keep us inline; only account when RT
				658	* bandwidth is relevant.
Peter Zijlstra	1724813	2013-12-17 12:44:49 +0100	[diff] [blame]	659	*/
Juri Lelli	faa5993	2014-02-21 11:37:15 +0100	[diff] [blame]	660	if (sched_rt_bandwidth_account(rt_rq))
				661	rt_rq->rt_time += delta_exec;
Peter Zijlstra	1724813	2013-12-17 12:44:49 +0100	[diff] [blame]	662	raw_spin_unlock(&rt_rq->rt_runtime_lock);
				663	}
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	664	}
				665
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	666	#ifdef CONFIG_SMP
				667
				668	static struct task_struct pick_next_earliest_dl_task(struct rq rq, int cpu);
				669
				670	static inline u64 next_deadline(struct rq *rq)
				671	{
				672	struct task_struct *next = pick_next_earliest_dl_task(rq, rq->cpu);
				673
				674	if (next && dl_prio(next->prio))
				675	return next->dl.deadline;
				676	else
				677	return 0;
				678	}
				679
				680	static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
				681	{
				682	struct rq *rq = rq_of_dl_rq(dl_rq);
				683
				684	if (dl_rq->earliest_dl.curr == 0 \|\|
				685	dl_time_before(deadline, dl_rq->earliest_dl.curr)) {
				686	/*
				687	* If the dl_rq had no -deadline tasks, or if the new task
				688	* has shorter deadline than the current one on dl_rq, we
				689	* know that the previous earliest becomes our next earliest,
				690	* as the new task becomes the earliest itself.
				691	*/
				692	dl_rq->earliest_dl.next = dl_rq->earliest_dl.curr;
				693	dl_rq->earliest_dl.curr = deadline;
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	694	cpudl_set(&rq->rd->cpudl, rq->cpu, deadline, 1);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	695	} else if (dl_rq->earliest_dl.next == 0 \|\|
				696	dl_time_before(deadline, dl_rq->earliest_dl.next)) {
				697	/*
				698	* On the other hand, if the new -deadline task has a
				699	* a later deadline than the earliest one on dl_rq, but
				700	* it is earlier than the next (if any), we must
				701	* recompute the next-earliest.
				702	*/
				703	dl_rq->earliest_dl.next = next_deadline(rq);
				704	}
				705	}
				706
				707	static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
				708	{
				709	struct rq *rq = rq_of_dl_rq(dl_rq);
				710
				711	/*
				712	* Since we may have removed our earliest (and/or next earliest)
				713	* task we must recompute them.
				714	*/
				715	if (!dl_rq->dl_nr_running) {
				716	dl_rq->earliest_dl.curr = 0;
				717	dl_rq->earliest_dl.next = 0;
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	718	cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	719	} else {
				720	struct rb_node *leftmost = dl_rq->rb_leftmost;
				721	struct sched_dl_entity *entry;
				722
				723	entry = rb_entry(leftmost, struct sched_dl_entity, rb_node);
				724	dl_rq->earliest_dl.curr = entry->deadline;
				725	dl_rq->earliest_dl.next = next_deadline(rq);
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	726	cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline, 1);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	727	}
				728	}
				729
				730	#else
				731
				732	static inline void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
				733	static inline void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
				734
				735	#endif /* CONFIG_SMP */
				736
				737	static inline
				738	void inc_dl_tasks(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				739	{
				740	int prio = dl_task_of(dl_se)->prio;
				741	u64 deadline = dl_se->deadline;
				742
				743	WARN_ON(!dl_prio(prio));
				744	dl_rq->dl_nr_running++;
Kirill Tkhai	7246544	2014-05-09 03:00:14 +0400	[diff] [blame]	745	add_nr_running(rq_of_dl_rq(dl_rq), 1);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	746
				747	inc_dl_deadline(dl_rq, deadline);
				748	inc_dl_migration(dl_se, dl_rq);
				749	}
				750
				751	static inline
				752	void dec_dl_tasks(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				753	{
				754	int prio = dl_task_of(dl_se)->prio;
				755
				756	WARN_ON(!dl_prio(prio));
				757	WARN_ON(!dl_rq->dl_nr_running);
				758	dl_rq->dl_nr_running--;
Kirill Tkhai	7246544	2014-05-09 03:00:14 +0400	[diff] [blame]	759	sub_nr_running(rq_of_dl_rq(dl_rq), 1);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	760
				761	dec_dl_deadline(dl_rq, dl_se->deadline);
				762	dec_dl_migration(dl_se, dl_rq);
				763	}
				764
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	765	static void __enqueue_dl_entity(struct sched_dl_entity *dl_se)
				766	{
				767	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				768	struct rb_node **link = &dl_rq->rb_root.rb_node;
				769	struct rb_node *parent = NULL;
				770	struct sched_dl_entity *entry;
				771	int leftmost = 1;
				772
				773	BUG_ON(!RB_EMPTY_NODE(&dl_se->rb_node));
				774
				775	while (*link) {
				776	parent = *link;
				777	entry = rb_entry(parent, struct sched_dl_entity, rb_node);
				778	if (dl_time_before(dl_se->deadline, entry->deadline))
				779	link = &parent->rb_left;
				780	else {
				781	link = &parent->rb_right;
				782	leftmost = 0;
				783	}
				784	}
				785
				786	if (leftmost)
				787	dl_rq->rb_leftmost = &dl_se->rb_node;
				788
				789	rb_link_node(&dl_se->rb_node, parent, link);
				790	rb_insert_color(&dl_se->rb_node, &dl_rq->rb_root);
				791
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	792	inc_dl_tasks(dl_se, dl_rq);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	793	}
				794
				795	static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
				796	{
				797	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				798
				799	if (RB_EMPTY_NODE(&dl_se->rb_node))
				800	return;
				801
				802	if (dl_rq->rb_leftmost == &dl_se->rb_node) {
				803	struct rb_node *next_node;
				804
				805	next_node = rb_next(&dl_se->rb_node);
				806	dl_rq->rb_leftmost = next_node;
				807	}
				808
				809	rb_erase(&dl_se->rb_node, &dl_rq->rb_root);
				810	RB_CLEAR_NODE(&dl_se->rb_node);
				811
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	812	dec_dl_tasks(dl_se, dl_rq);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	813	}
				814
				815	static void
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	816	enqueue_dl_entity(struct sched_dl_entity *dl_se,
				817	struct sched_dl_entity *pi_se, int flags)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	818	{
				819	BUG_ON(on_dl_rq(dl_se));
				820
				821	/*
				822	* If this is a wakeup or a new instance, the scheduling
				823	* parameters of the task might need updating. Otherwise,
				824	* we want a replenishment of its runtime.
				825	*/
Luca Abeni	6a503c3	2014-12-17 11:50:31 +0100	[diff] [blame]	826	if (dl_se->dl_new \|\| flags & ENQUEUE_WAKEUP)
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	827	update_dl_entity(dl_se, pi_se);
Luca Abeni	6a503c3	2014-12-17 11:50:31 +0100	[diff] [blame]	828	else if (flags & ENQUEUE_REPLENISH)
				829	replenish_dl_entity(dl_se, pi_se);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	830
				831	__enqueue_dl_entity(dl_se);
				832	}
				833
				834	static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
				835	{
				836	__dequeue_dl_entity(dl_se);
				837	}
				838
				839	static void enqueue_task_dl(struct rq rq, struct task_struct p, int flags)
				840	{
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	841	struct task_struct *pi_task = rt_mutex_get_top_task(p);
				842	struct sched_dl_entity *pi_se = &p->dl;
				843
				844	/*
				845	* Use the scheduling parameters of the top pi-waiter
				846	* task if we have one and its (relative) deadline is
				847	* smaller than our one... OTW we keep our runtime and
				848	* deadline.
				849	*/
Juri Lelli	64be6f1	2014-10-24 10:16:37 +0100	[diff] [blame]	850	if (pi_task && p->dl.dl_boosted && dl_prio(pi_task->normal_prio)) {
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	851	pi_se = &pi_task->dl;
Juri Lelli	64be6f1	2014-10-24 10:16:37 +0100	[diff] [blame]	852	} else if (!dl_prio(p->normal_prio)) {
				853	/*
				854	* Special case in which we have a !SCHED_DEADLINE task
				855	* that is going to be deboosted, but exceedes its
				856	* runtime while doing so. No point in replenishing
				857	* it, as it's going to return back to its original
				858	* scheduling class after this.
				859	*/
				860	BUG_ON(!p->dl.dl_boosted \|\| flags != ENQUEUE_REPLENISH);
				861	return;
				862	}
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	863
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	864	/*
				865	* If p is throttled, we do nothing. In fact, if it exhausted
				866	* its budget it needs a replenishment and, since it now is on
				867	* its rq, the bandwidth timer callback (which clearly has not
				868	* run yet) will take care of this.
				869	*/
Peter Zijlstra	1019a35	2014-11-26 08:44:03 +0800	[diff] [blame]	870	if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH))
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	871	return;
				872
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	873	enqueue_dl_entity(&p->dl, pi_se, flags);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	874
				875	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
				876	enqueue_pushable_dl_task(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	877	}
				878
				879	static void __dequeue_task_dl(struct rq rq, struct task_struct p, int flags)
				880	{
				881	dequeue_dl_entity(&p->dl);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	882	dequeue_pushable_dl_task(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	883	}
				884
				885	static void dequeue_task_dl(struct rq rq, struct task_struct p, int flags)
				886	{
				887	update_curr_dl(rq);
				888	__dequeue_task_dl(rq, p, flags);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	889	}
				890
				891	/*
				892	* Yield task semantic for -deadline tasks is:
				893	*
				894	* get off from the CPU until our next instance, with
				895	* a new runtime. This is of little use now, since we
				896	* don't have a bandwidth reclaiming mechanism. Anyway,
				897	* bandwidth reclaiming is planned for the future, and
				898	* yield_task_dl will indicate that some spare budget
				899	* is available for other task instances to use it.
				900	*/
				901	static void yield_task_dl(struct rq *rq)
				902	{
				903	struct task_struct *p = rq->curr;
				904
				905	/*
				906	* We make the task go to sleep until its current deadline by
				907	* forcing its runtime to zero. This way, update_curr_dl() stops
				908	* it and the bandwidth timer will wake it up and will give it
Juri Lelli	5bfd126	2014-04-15 13:49:04 +0200	[diff] [blame]	909	* new scheduling parameters (thanks to dl_yielded=1).
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	910	*/
				911	if (p->dl.runtime > 0) {
Juri Lelli	5bfd126	2014-04-15 13:49:04 +0200	[diff] [blame]	912	rq->curr->dl.dl_yielded = 1;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	913	p->dl.runtime = 0;
				914	}
Kirill Tkhai	6f1607f	2015-02-04 12:09:32 +0300	[diff] [blame]	915	update_rq_clock(rq);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	916	update_curr_dl(rq);
Wanpeng Li	44fb085	2015-03-10 12:20:00 +0800	[diff] [blame]	917	/*
				918	* Tell update_rq_clock() that we've just updated,
				919	* so we don't do microscopic update in schedule()
				920	* and double the fastpath cost.
				921	*/
				922	rq_clock_skip_update(rq, true);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	923	}
				924
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	925	#ifdef CONFIG_SMP
				926
				927	static int find_later_rq(struct task_struct *task);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	928
				929	static int
				930	select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
				931	{
				932	struct task_struct *curr;
				933	struct rq *rq;
				934
Wanpeng Li	1d7e974	2014-10-14 10:22:39 +0800	[diff] [blame]	935	if (sd_flag != SD_BALANCE_WAKE)
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	936	goto out;
				937
				938	rq = cpu_rq(cpu);
				939
				940	rcu_read_lock();
				941	curr = ACCESS_ONCE(rq->curr); /* unlocked access */
				942
				943	/*
				944	* If we are dealing with a -deadline task, we must
				945	* decide where to wake it up.
				946	* If it has a later deadline and the current task
				947	* on this rq can't move (provided the waking task
				948	* can!) we prefer to send it somewhere else. On the
				949	* other hand, if it has a shorter deadline, we
				950	* try to make it stay here, it might be important.
				951	*/
				952	if (unlikely(dl_task(curr)) &&
				953	(curr->nr_cpus_allowed < 2 \|\|
				954	!dl_entity_preempt(&p->dl, &curr->dl)) &&
				955	(p->nr_cpus_allowed > 1)) {
				956	int target = find_later_rq(p);
				957
				958	if (target != -1)
				959	cpu = target;
				960	}
				961	rcu_read_unlock();
				962
				963	out:
				964	return cpu;
				965	}
				966
				967	static void check_preempt_equal_dl(struct rq rq, struct task_struct p)
				968	{
				969	/*
				970	* Current can't be migrated, useless to reschedule,
				971	* let's hope p can move out.
				972	*/
				973	if (rq->curr->nr_cpus_allowed == 1 \|\|
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	974	cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	975	return;
				976
				977	/*
				978	* p is migratable, so let's not schedule it and
				979	* see if it is pushed or pulled somewhere else.
				980	*/
				981	if (p->nr_cpus_allowed != 1 &&
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	982	cpudl_find(&rq->rd->cpudl, p, NULL) != -1)
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	983	return;
				984
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	985	resched_curr(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	986	}
				987
Peter Zijlstra	38033c3	2014-01-23 20:32:21 +0100	[diff] [blame]	988	static int pull_dl_task(struct rq *this_rq);
				989
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	990	#endif /* CONFIG_SMP */
				991
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	992	/*
				993	* Only called when both the current and waking task are -deadline
				994	* tasks.
				995	*/
				996	static void check_preempt_curr_dl(struct rq rq, struct task_struct p,
				997	int flags)
				998	{
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	999	if (dl_entity_preempt(&p->dl, &rq->curr->dl)) {
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	1000	resched_curr(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1001	return;
				1002	}
				1003
				1004	#ifdef CONFIG_SMP
				1005	/*
				1006	* In the unlikely case current and p have the same deadline
				1007	* let us try to decide what's the best thing to do...
				1008	*/
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	1009	if ((p->dl.deadline == rq->curr->dl.deadline) &&
				1010	!test_tsk_need_resched(rq->curr))
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1011	check_preempt_equal_dl(rq, p);
				1012	#endif /* CONFIG_SMP */
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1013	}
				1014
				1015	#ifdef CONFIG_SCHED_HRTICK
				1016	static void start_hrtick_dl(struct rq rq, struct task_struct p)
				1017	{
xiaofeng.yan	177ef2a	2014-08-26 03:15:41 +0000	[diff] [blame]	1018	hrtick_start(rq, p->dl.runtime);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1019	}
Wanpeng Li	36ce988	2014-11-11 09:52:26 +0800	[diff] [blame]	1020	#else /* !CONFIG_SCHED_HRTICK */
				1021	static void start_hrtick_dl(struct rq rq, struct task_struct p)
				1022	{
				1023	}
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1024	#endif
				1025
				1026	static struct sched_dl_entity pick_next_dl_entity(struct rq rq,
				1027	struct dl_rq *dl_rq)
				1028	{
				1029	struct rb_node *left = dl_rq->rb_leftmost;
				1030
				1031	if (!left)
				1032	return NULL;
				1033
				1034	return rb_entry(left, struct sched_dl_entity, rb_node);
				1035	}
				1036
Peter Zijlstra	606dba2	2012-02-11 06:05:00 +0100	[diff] [blame]	1037	struct task_struct pick_next_task_dl(struct rq rq, struct task_struct *prev)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1038	{
				1039	struct sched_dl_entity *dl_se;
				1040	struct task_struct *p;
				1041	struct dl_rq *dl_rq;
				1042
				1043	dl_rq = &rq->dl;
				1044
Kirill Tkhai	a1d9a32	2014-04-10 17:38:36 +0400	[diff] [blame]	1045	if (need_pull_dl_task(rq, prev)) {
Peter Zijlstra	38033c3	2014-01-23 20:32:21 +0100	[diff] [blame]	1046	pull_dl_task(rq);
Kirill Tkhai	a1d9a32	2014-04-10 17:38:36 +0400	[diff] [blame]	1047	/*
				1048	* pull_rt_task() can drop (and re-acquire) rq->lock; this
				1049	* means a stop task can slip in, in which case we need to
				1050	* re-start task selection.
				1051	*/
Kirill Tkhai	da0c1e6	2014-08-20 13:47:32 +0400	[diff] [blame]	1052	if (rq->stop && task_on_rq_queued(rq->stop))
Kirill Tkhai	a1d9a32	2014-04-10 17:38:36 +0400	[diff] [blame]	1053	return RETRY_TASK;
				1054	}
				1055
Kirill Tkhai	734ff2a	2014-03-04 19:25:46 +0400	[diff] [blame]	1056	/*
				1057	* When prev is DL, we may throttle it in put_prev_task().
				1058	* So, we update time before we check for dl_nr_running.
				1059	*/
				1060	if (prev->sched_class == &dl_sched_class)
				1061	update_curr_dl(rq);
Peter Zijlstra	38033c3	2014-01-23 20:32:21 +0100	[diff] [blame]	1062
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1063	if (unlikely(!dl_rq->dl_nr_running))
				1064	return NULL;
				1065
Peter Zijlstra	3f1d2a3	2014-02-12 10:49:30 +0100	[diff] [blame]	1066	put_prev_task(rq, prev);
Peter Zijlstra	606dba2	2012-02-11 06:05:00 +0100	[diff] [blame]	1067
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1068	dl_se = pick_next_dl_entity(rq, dl_rq);
				1069	BUG_ON(!dl_se);
				1070
				1071	p = dl_task_of(dl_se);
				1072	p->se.exec_start = rq_clock_task(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1073
				1074	/* Running task will never be pushed. */
Juri Lelli	7136265	2014-01-14 12:03:51 +0100	[diff] [blame]	1075	dequeue_pushable_dl_task(rq, p);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1076
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1077	if (hrtick_enabled(rq))
				1078	start_hrtick_dl(rq, p);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1079
Peter Zijlstra	dc87734	2014-02-12 15:47:29 +0100	[diff] [blame]	1080	set_post_schedule(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1081
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1082	return p;
				1083	}
				1084
				1085	static void put_prev_task_dl(struct rq rq, struct task_struct p)
				1086	{
				1087	update_curr_dl(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1088
				1089	if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
				1090	enqueue_pushable_dl_task(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1091	}
				1092
				1093	static void task_tick_dl(struct rq rq, struct task_struct p, int queued)
				1094	{
				1095	update_curr_dl(rq);
				1096
Wanpeng Li	a7bebf4	2014-11-26 08:44:01 +0800	[diff] [blame]	1097	/*
				1098	* Even when we have runtime, update_curr_dl() might have resulted in us
				1099	* not being the leftmost task anymore. In that case NEED_RESCHED will
				1100	* be set and schedule() will start a new hrtick for the next task.
				1101	*/
				1102	if (hrtick_enabled(rq) && queued && p->dl.runtime > 0 &&
				1103	is_leftmost(p, &rq->dl))
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1104	start_hrtick_dl(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1105	}
				1106
				1107	static void task_fork_dl(struct task_struct *p)
				1108	{
				1109	/*
				1110	* SCHED_DEADLINE tasks cannot fork and this is achieved through
				1111	* sched_fork()
				1112	*/
				1113	}
				1114
				1115	static void task_dead_dl(struct task_struct *p)
				1116	{
				1117	struct hrtimer *timer = &p->dl.dl_timer;
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	1118	struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
				1119
				1120	/*
				1121	* Since we are TASK_DEAD we won't slip out of the domain!
				1122	*/
				1123	raw_spin_lock_irq(&dl_b->lock);
Peter Zijlstra	40767b0	2015-01-28 15:08:03 +0100	[diff] [blame]	1124	/* XXX we should retain the bw until 0-lag */
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	1125	dl_b->total_bw -= p->dl.dl_bw;
				1126	raw_spin_unlock_irq(&dl_b->lock);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1127
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	1128	hrtimer_cancel(timer);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1129	}
				1130
				1131	static void set_curr_task_dl(struct rq *rq)
				1132	{
				1133	struct task_struct *p = rq->curr;
				1134
				1135	p->se.exec_start = rq_clock_task(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1136
				1137	/* You can't push away the running task */
				1138	dequeue_pushable_dl_task(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1139	}
				1140
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1141	#ifdef CONFIG_SMP
				1142
				1143	/* Only try algorithms three times */
				1144	#define DL_MAX_TRIES 3
				1145
				1146	static int pick_dl_task(struct rq rq, struct task_struct p, int cpu)
				1147	{
				1148	if (!task_running(rq, p) &&
Kirill Tkhai	1ba93d4	2014-09-12 17:42:20 +0400	[diff] [blame]	1149	cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1150	return 1;
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1151	return 0;
				1152	}
				1153
				1154	/* Returns the second earliest -deadline task, NULL otherwise */
				1155	static struct task_struct pick_next_earliest_dl_task(struct rq rq, int cpu)
				1156	{
				1157	struct rb_node *next_node = rq->dl.rb_leftmost;
				1158	struct sched_dl_entity *dl_se;
				1159	struct task_struct *p = NULL;
				1160
				1161	next_node:
				1162	next_node = rb_next(next_node);
				1163	if (next_node) {
				1164	dl_se = rb_entry(next_node, struct sched_dl_entity, rb_node);
				1165	p = dl_task_of(dl_se);
				1166
				1167	if (pick_dl_task(rq, p, cpu))
				1168	return p;
				1169
				1170	goto next_node;
				1171	}
				1172
				1173	return NULL;
				1174	}
				1175
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1176	static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
				1177
				1178	static int find_later_rq(struct task_struct *task)
				1179	{
				1180	struct sched_domain *sd;
Christoph Lameter	4ba2968	2014-08-26 19:12:21 -0500	[diff] [blame]	1181	struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1182	int this_cpu = smp_processor_id();
				1183	int best_cpu, cpu = task_cpu(task);
				1184
				1185	/* Make sure the mask is initialized first */
				1186	if (unlikely(!later_mask))
				1187	return -1;
				1188
				1189	if (task->nr_cpus_allowed == 1)
				1190	return -1;
				1191
Juri Lelli	91ec677	2014-09-19 10:22:41 +0100	[diff] [blame]	1192	/*
				1193	* We have to consider system topology and task affinity
				1194	* first, then we can look for a suitable cpu.
				1195	*/
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	1196	best_cpu = cpudl_find(&task_rq(task)->rd->cpudl,
				1197	task, later_mask);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1198	if (best_cpu == -1)
				1199	return -1;
				1200
				1201	/*
				1202	* If we are here, some target has been found,
				1203	* the most suitable of which is cached in best_cpu.
				1204	* This is, among the runqueues where the current tasks
				1205	* have later deadlines than the task's one, the rq
				1206	* with the latest possible one.
				1207	*
				1208	* Now we check how well this matches with task's
				1209	* affinity and system topology.
				1210	*
				1211	* The last cpu where the task run is our first
				1212	* guess, since it is most likely cache-hot there.
				1213	*/
				1214	if (cpumask_test_cpu(cpu, later_mask))
				1215	return cpu;
				1216	/*
				1217	* Check if this_cpu is to be skipped (i.e., it is
				1218	* not in the mask) or not.
				1219	*/
				1220	if (!cpumask_test_cpu(this_cpu, later_mask))
				1221	this_cpu = -1;
				1222
				1223	rcu_read_lock();
				1224	for_each_domain(cpu, sd) {
				1225	if (sd->flags & SD_WAKE_AFFINE) {
				1226
				1227	/*
				1228	* If possible, preempting this_cpu is
				1229	* cheaper than migrating.
				1230	*/
				1231	if (this_cpu != -1 &&
				1232	cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
				1233	rcu_read_unlock();
				1234	return this_cpu;
				1235	}
				1236
				1237	/*
				1238	* Last chance: if best_cpu is valid and is
				1239	* in the mask, that becomes our choice.
				1240	*/
				1241	if (best_cpu < nr_cpu_ids &&
				1242	cpumask_test_cpu(best_cpu, sched_domain_span(sd))) {
				1243	rcu_read_unlock();
				1244	return best_cpu;
				1245	}
				1246	}
				1247	}
				1248	rcu_read_unlock();
				1249
				1250	/*
				1251	* At this point, all our guesses failed, we just return
				1252	* 'something', and let the caller sort the things out.
				1253	*/
				1254	if (this_cpu != -1)
				1255	return this_cpu;
				1256
				1257	cpu = cpumask_any(later_mask);
				1258	if (cpu < nr_cpu_ids)
				1259	return cpu;
				1260
				1261	return -1;
				1262	}
				1263
				1264	/* Locks the rq it finds */
				1265	static struct rq find_lock_later_rq(struct task_struct task, struct rq *rq)
				1266	{
				1267	struct rq *later_rq = NULL;
				1268	int tries;
				1269	int cpu;
				1270
				1271	for (tries = 0; tries < DL_MAX_TRIES; tries++) {
				1272	cpu = find_later_rq(task);
				1273
				1274	if ((cpu == -1) \|\| (cpu == rq->cpu))
				1275	break;
				1276
				1277	later_rq = cpu_rq(cpu);
				1278
				1279	/* Retry if something changed. */
				1280	if (double_lock_balance(rq, later_rq)) {
				1281	if (unlikely(task_rq(task) != rq \|\|
				1282	!cpumask_test_cpu(later_rq->cpu,
				1283	&task->cpus_allowed) \|\|
Kirill Tkhai	da0c1e6	2014-08-20 13:47:32 +0400	[diff] [blame]	1284	task_running(rq, task) \|\|
				1285	!task_on_rq_queued(task))) {
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1286	double_unlock_balance(rq, later_rq);
				1287	later_rq = NULL;
				1288	break;
				1289	}
				1290	}
				1291
				1292	/*
				1293	* If the rq we found has no -deadline task, or
				1294	* its earliest one has a later deadline than our
				1295	* task, the rq is a good one.
				1296	*/
				1297	if (!later_rq->dl.dl_nr_running \|\|
				1298	dl_time_before(task->dl.deadline,
				1299	later_rq->dl.earliest_dl.curr))
				1300	break;
				1301
				1302	/* Otherwise we try again. */
				1303	double_unlock_balance(rq, later_rq);
				1304	later_rq = NULL;
				1305	}
				1306
				1307	return later_rq;
				1308	}
				1309
				1310	static struct task_struct pick_next_pushable_dl_task(struct rq rq)
				1311	{
				1312	struct task_struct *p;
				1313
				1314	if (!has_pushable_dl_tasks(rq))
				1315	return NULL;
				1316
				1317	p = rb_entry(rq->dl.pushable_dl_tasks_leftmost,
				1318	struct task_struct, pushable_dl_tasks);
				1319
				1320	BUG_ON(rq->cpu != task_cpu(p));
				1321	BUG_ON(task_current(rq, p));
				1322	BUG_ON(p->nr_cpus_allowed <= 1);
				1323
Kirill Tkhai	da0c1e6	2014-08-20 13:47:32 +0400	[diff] [blame]	1324	BUG_ON(!task_on_rq_queued(p));
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1325	BUG_ON(!dl_task(p));
				1326
				1327	return p;
				1328	}
				1329
				1330	/*
				1331	* See if the non running -deadline tasks on this rq
				1332	* can be sent to some other CPU where they can preempt
				1333	* and start executing.
				1334	*/
				1335	static int push_dl_task(struct rq *rq)
				1336	{
				1337	struct task_struct *next_task;
				1338	struct rq *later_rq;
Wanpeng Li	c51b8ab	2014-11-06 15:22:44 +0800	[diff] [blame]	1339	int ret = 0;
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1340
				1341	if (!rq->dl.overloaded)
				1342	return 0;
				1343
				1344	next_task = pick_next_pushable_dl_task(rq);
				1345	if (!next_task)
				1346	return 0;
				1347
				1348	retry:
				1349	if (unlikely(next_task == rq->curr)) {
				1350	WARN_ON(1);
				1351	return 0;
				1352	}
				1353
				1354	/*
				1355	* If next_task preempts rq->curr, and rq->curr
				1356	* can move away, it makes sense to just reschedule
				1357	* without going further in pushing next_task.
				1358	*/
				1359	if (dl_task(rq->curr) &&
				1360	dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
				1361	rq->curr->nr_cpus_allowed > 1) {
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	1362	resched_curr(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1363	return 0;
				1364	}
				1365
				1366	/* We might release rq lock */
				1367	get_task_struct(next_task);
				1368
				1369	/* Will lock the rq it'll find */
				1370	later_rq = find_lock_later_rq(next_task, rq);
				1371	if (!later_rq) {
				1372	struct task_struct *task;
				1373
				1374	/*
				1375	* We must check all this again, since
				1376	* find_lock_later_rq releases rq->lock and it is
				1377	* then possible that next_task has migrated.
				1378	*/
				1379	task = pick_next_pushable_dl_task(rq);
				1380	if (task_cpu(next_task) == rq->cpu && task == next_task) {
				1381	/*
				1382	* The task is still there. We don't try
				1383	* again, some other cpu will pull it when ready.
				1384	*/
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1385	goto out;
				1386	}
				1387
				1388	if (!task)
				1389	/* No more tasks */
				1390	goto out;
				1391
				1392	put_task_struct(next_task);
				1393	next_task = task;
				1394	goto retry;
				1395	}
				1396
				1397	deactivate_task(rq, next_task, 0);
				1398	set_task_cpu(next_task, later_rq->cpu);
				1399	activate_task(later_rq, next_task, 0);
Wanpeng Li	c51b8ab	2014-11-06 15:22:44 +0800	[diff] [blame]	1400	ret = 1;
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1401
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	1402	resched_curr(later_rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1403
				1404	double_unlock_balance(rq, later_rq);
				1405
				1406	out:
				1407	put_task_struct(next_task);
				1408
Wanpeng Li	c51b8ab	2014-11-06 15:22:44 +0800	[diff] [blame]	1409	return ret;
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1410	}
				1411
				1412	static void push_dl_tasks(struct rq *rq)
				1413	{
				1414	/* Terminates as it moves a -deadline task */
				1415	while (push_dl_task(rq))
				1416	;
				1417	}
				1418
				1419	static int pull_dl_task(struct rq *this_rq)
				1420	{
				1421	int this_cpu = this_rq->cpu, ret = 0, cpu;
				1422	struct task_struct *p;
				1423	struct rq *src_rq;
				1424	u64 dmin = LONG_MAX;
				1425
				1426	if (likely(!dl_overloaded(this_rq)))
				1427	return 0;
				1428
				1429	/*
				1430	* Match the barrier from dl_set_overloaded; this guarantees that if we
				1431	* see overloaded we must also see the dlo_mask bit.
				1432	*/
				1433	smp_rmb();
				1434
				1435	for_each_cpu(cpu, this_rq->rd->dlo_mask) {
				1436	if (this_cpu == cpu)
				1437	continue;
				1438
				1439	src_rq = cpu_rq(cpu);
				1440
				1441	/*
				1442	* It looks racy, abd it is! However, as in sched_rt.c,
				1443	* we are fine with this.
				1444	*/
				1445	if (this_rq->dl.dl_nr_running &&
				1446	dl_time_before(this_rq->dl.earliest_dl.curr,
				1447	src_rq->dl.earliest_dl.next))
				1448	continue;
				1449
				1450	/* Might drop this_rq->lock */
				1451	double_lock_balance(this_rq, src_rq);
				1452
				1453	/*
				1454	* If there are no more pullable tasks on the
				1455	* rq, we're done with it.
				1456	*/
				1457	if (src_rq->dl.dl_nr_running <= 1)
				1458	goto skip;
				1459
				1460	p = pick_next_earliest_dl_task(src_rq, this_cpu);
				1461
				1462	/*
				1463	* We found a task to be pulled if:
				1464	* - it preempts our current (if there's one),
				1465	* - it will preempt the last one we pulled (if any).
				1466	*/
				1467	if (p && dl_time_before(p->dl.deadline, dmin) &&
				1468	(!this_rq->dl.dl_nr_running \|\|
				1469	dl_time_before(p->dl.deadline,
				1470	this_rq->dl.earliest_dl.curr))) {
				1471	WARN_ON(p == src_rq->curr);
Kirill Tkhai	da0c1e6	2014-08-20 13:47:32 +0400	[diff] [blame]	1472	WARN_ON(!task_on_rq_queued(p));
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1473
				1474	/*
				1475	* Then we pull iff p has actually an earlier
				1476	* deadline than the current task of its runqueue.
				1477	*/
				1478	if (dl_time_before(p->dl.deadline,
				1479	src_rq->curr->dl.deadline))
				1480	goto skip;
				1481
				1482	ret = 1;
				1483
				1484	deactivate_task(src_rq, p, 0);
				1485	set_task_cpu(p, this_cpu);
				1486	activate_task(this_rq, p, 0);
				1487	dmin = p->dl.deadline;
				1488
				1489	/* Is there any other task even earlier? */
				1490	}
				1491	skip:
				1492	double_unlock_balance(this_rq, src_rq);
				1493	}
				1494
				1495	return ret;
				1496	}
				1497
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1498	static void post_schedule_dl(struct rq *rq)
				1499	{
				1500	push_dl_tasks(rq);
				1501	}
				1502
				1503	/*
				1504	* Since the task is not running and a reschedule is not going to happen
				1505	* anytime soon on its runqueue, we try pushing it away now.
				1506	*/
				1507	static void task_woken_dl(struct rq rq, struct task_struct p)
				1508	{
				1509	if (!task_running(rq, p) &&
				1510	!test_tsk_need_resched(rq->curr) &&
				1511	has_pushable_dl_tasks(rq) &&
				1512	p->nr_cpus_allowed > 1 &&
				1513	dl_task(rq->curr) &&
				1514	(rq->curr->nr_cpus_allowed < 2 \|\|
Wanpeng Li	6b0a563	2014-10-31 06:39:34 +0800	[diff] [blame]	1515	!dl_entity_preempt(&p->dl, &rq->curr->dl))) {
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1516	push_dl_tasks(rq);
				1517	}
				1518	}
				1519
				1520	static void set_cpus_allowed_dl(struct task_struct *p,
				1521	const struct cpumask *new_mask)
				1522	{
				1523	struct rq *rq;
Juri Lelli	7f51412	2014-09-19 10:22:40 +0100	[diff] [blame]	1524	struct root_domain *src_rd;
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1525	int weight;
				1526
				1527	BUG_ON(!dl_task(p));
				1528
Juri Lelli	7f51412	2014-09-19 10:22:40 +0100	[diff] [blame]	1529	rq = task_rq(p);
				1530	src_rd = rq->rd;
				1531	/*
				1532	* Migrating a SCHED_DEADLINE task between exclusive
				1533	* cpusets (different root_domains) entails a bandwidth
				1534	* update. We already made space for us in the destination
				1535	* domain (see cpuset_can_attach()).
				1536	*/
				1537	if (!cpumask_intersects(src_rd->span, new_mask)) {
				1538	struct dl_bw *src_dl_b;
				1539
				1540	src_dl_b = dl_bw_of(cpu_of(rq));
				1541	/*
				1542	* We now free resources of the root_domain we are migrating
				1543	* off. In the worst case, sched_setattr() may temporary fail
				1544	* until we complete the update.
				1545	*/
				1546	raw_spin_lock(&src_dl_b->lock);
				1547	__dl_clear(src_dl_b, p->dl.dl_bw);
				1548	raw_spin_unlock(&src_dl_b->lock);
				1549	}
				1550
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1551	/*
				1552	* Update only if the task is actually running (i.e.,
				1553	* it is on the rq AND it is not throttled).
				1554	*/
				1555	if (!on_dl_rq(&p->dl))
				1556	return;
				1557
				1558	weight = cpumask_weight(new_mask);
				1559
				1560	/*
				1561	* Only update if the process changes its state from whether it
				1562	* can migrate or not.
				1563	*/
				1564	if ((p->nr_cpus_allowed > 1) == (weight > 1))
				1565	return;
				1566
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1567	/*
				1568	* The process used to be able to migrate OR it can now migrate
				1569	*/
				1570	if (weight <= 1) {
				1571	if (!task_current(rq, p))
				1572	dequeue_pushable_dl_task(rq, p);
				1573	BUG_ON(!rq->dl.dl_nr_migratory);
				1574	rq->dl.dl_nr_migratory--;
				1575	} else {
				1576	if (!task_current(rq, p))
				1577	enqueue_pushable_dl_task(rq, p);
				1578	rq->dl.dl_nr_migratory++;
				1579	}
				1580
				1581	update_dl_migration(&rq->dl);
				1582	}
				1583
				1584	/* Assumes rq->lock is held */
				1585	static void rq_online_dl(struct rq *rq)
				1586	{
				1587	if (rq->dl.overloaded)
				1588	dl_set_overload(rq);
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	1589
Xunlei Pang	16b2694	2015-01-19 04:49:36 +0000	[diff] [blame]	1590	cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu);
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	1591	if (rq->dl.dl_nr_running > 0)
				1592	cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1593	}
				1594
				1595	/* Assumes rq->lock is held */
				1596	static void rq_offline_dl(struct rq *rq)
				1597	{
				1598	if (rq->dl.overloaded)
				1599	dl_clear_overload(rq);
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	1600
				1601	cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
Xunlei Pang	16b2694	2015-01-19 04:49:36 +0000	[diff] [blame]	1602	cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1603	}
				1604
				1605	void init_sched_dl_class(void)
				1606	{
				1607	unsigned int i;
				1608
				1609	for_each_possible_cpu(i)
				1610	zalloc_cpumask_var_node(&per_cpu(local_cpu_mask_dl, i),
				1611	GFP_KERNEL, cpu_to_node(i));
				1612	}
				1613
				1614	#endif /* CONFIG_SMP */
				1615
Kirill Tkhai	67dfa1b	2014-10-27 17:40:52 +0300	[diff] [blame]	1616	/*
				1617	* Ensure p's dl_timer is cancelled. May drop rq->lock for a while.
				1618	*/
				1619	static void cancel_dl_timer(struct rq rq, struct task_struct p)
				1620	{
				1621	struct hrtimer *dl_timer = &p->dl.dl_timer;
				1622
				1623	/* Nobody will change task's class if pi_lock is held */
				1624	lockdep_assert_held(&p->pi_lock);
				1625
				1626	if (hrtimer_active(dl_timer)) {
				1627	int ret = hrtimer_try_to_cancel(dl_timer);
				1628
				1629	if (unlikely(ret == -1)) {
				1630	/*
				1631	* Note, p may migrate OR new deadline tasks
				1632	* may appear in rq when we are unlocking it.
				1633	* A caller of us must be fine with that.
				1634	*/
				1635	raw_spin_unlock(&rq->lock);
				1636	hrtimer_cancel(dl_timer);
				1637	raw_spin_lock(&rq->lock);
				1638	}
				1639	}
				1640	}
				1641
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1642	static void switched_from_dl(struct rq rq, struct task_struct p)
				1643	{
Peter Zijlstra	40767b0	2015-01-28 15:08:03 +0100	[diff] [blame]	1644	/* XXX we should retain the bw until 0-lag */
Kirill Tkhai	67dfa1b	2014-10-27 17:40:52 +0300	[diff] [blame]	1645	cancel_dl_timer(rq, p);
Juri Lelli	a5e7be3	2014-09-19 10:22:39 +0100	[diff] [blame]	1646	__dl_clear_params(p);
				1647
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1648	/*
				1649	* Since this might be the only -deadline task on the rq,
				1650	* this is the right place to try to pull some other one
				1651	* from an overloaded cpu, if any.
				1652	*/
Wanpeng Li	cd66091	2014-10-31 06:39:35 +0800	[diff] [blame]	1653	if (!task_on_rq_queued(p) \|\| rq->dl.dl_nr_running)
				1654	return;
				1655
				1656	if (pull_dl_task(rq))
				1657	resched_curr(rq);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1658	}
				1659
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1660	/*
				1661	* When switching to -deadline, we may overload the rq, then
				1662	* we try to push someone off, if possible.
				1663	*/
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1664	static void switched_to_dl(struct rq rq, struct task_struct p)
				1665	{
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1666	int check_resched = 1;
				1667
Kirill Tkhai	da0c1e6	2014-08-20 13:47:32 +0400	[diff] [blame]	1668	if (task_on_rq_queued(p) && rq->curr != p) {
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1669	#ifdef CONFIG_SMP
Wanpeng Li	d9aade7a	2014-10-22 08:36:43 +0800	[diff] [blame]	1670	if (p->nr_cpus_allowed > 1 && rq->dl.overloaded &&
				1671	push_dl_task(rq) && rq != task_rq(p))
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1672	/* Only reschedule if pushing failed */
				1673	check_resched = 0;
				1674	#endif /* CONFIG_SMP */
Kirill Tkhai	f3a7e1a	2014-10-21 20:35:56 +0400	[diff] [blame]	1675	if (check_resched) {
				1676	if (dl_task(rq->curr))
				1677	check_preempt_curr_dl(rq, p, 0);
				1678	else
				1679	resched_curr(rq);
				1680	}
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1681	}
				1682	}
				1683
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1684	/*
				1685	* If the scheduling parameters of a -deadline task changed,
				1686	* a push or pull operation might be needed.
				1687	*/
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1688	static void prio_changed_dl(struct rq rq, struct task_struct p,
				1689	int oldprio)
				1690	{
Kirill Tkhai	da0c1e6	2014-08-20 13:47:32 +0400	[diff] [blame]	1691	if (task_on_rq_queued(p) \|\| rq->curr == p) {
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1692	#ifdef CONFIG_SMP
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1693	/*
				1694	* This might be too much, but unfortunately
				1695	* we don't have the old deadline value, and
				1696	* we can't argue if the task is increasing
				1697	* or lowering its prio, so...
				1698	*/
				1699	if (!rq->dl.overloaded)
				1700	pull_dl_task(rq);
				1701
				1702	/*
				1703	* If we now have a earlier deadline task than p,
				1704	* then reschedule, provided p is still on this
				1705	* runqueue.
				1706	*/
				1707	if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline) &&
				1708	rq->curr == p)
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	1709	resched_curr(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1710	#else
				1711	/*
				1712	* Again, we don't know if p has a earlier
				1713	* or later deadline, so let's blindly set a
				1714	* (maybe not needed) rescheduling point.
				1715	*/
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	1716	resched_curr(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1717	#endif /* CONFIG_SMP */
				1718	} else
				1719	switched_to_dl(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1720	}
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1721
				1722	const struct sched_class dl_sched_class = {
				1723	.next = &rt_sched_class,
				1724	.enqueue_task = enqueue_task_dl,
				1725	.dequeue_task = dequeue_task_dl,
				1726	.yield_task = yield_task_dl,
				1727
				1728	.check_preempt_curr = check_preempt_curr_dl,
				1729
				1730	.pick_next_task = pick_next_task_dl,
				1731	.put_prev_task = put_prev_task_dl,
				1732
				1733	#ifdef CONFIG_SMP
				1734	.select_task_rq = select_task_rq_dl,
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1735	.set_cpus_allowed = set_cpus_allowed_dl,
				1736	.rq_online = rq_online_dl,
				1737	.rq_offline = rq_offline_dl,
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1738	.post_schedule = post_schedule_dl,
				1739	.task_woken = task_woken_dl,
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1740	#endif
				1741
				1742	.set_curr_task = set_curr_task_dl,
				1743	.task_tick = task_tick_dl,
				1744	.task_fork = task_fork_dl,
				1745	.task_dead = task_dead_dl,
				1746
				1747	.prio_changed = prio_changed_dl,
				1748	.switched_from = switched_from_dl,
				1749	.switched_to = switched_to_dl,
Stanislaw Gruszka	6e99891	2014-11-12 16:58:44 +0100	[diff] [blame]	1750
				1751	.update_curr = update_curr_dl,
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1752	};
Wanpeng Li	acb3213	2014-10-31 06:39:33 +0800	[diff] [blame]	1753
				1754	#ifdef CONFIG_SCHED_DEBUG
				1755	extern void print_dl_rq(struct seq_file m, int cpu, struct dl_rq dl_rq);
				1756
				1757	void print_dl_stats(struct seq_file *m, int cpu)
				1758	{
				1759	print_dl_rq(m, cpu, &cpu_rq(cpu)->dl);
				1760	}
				1761	#endif /* CONFIG_SCHED_DEBUG */