blob: 37a32884986b166da409623e1a2ce7ef13dae847 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
Tejun Heoc54fce62010-09-10 16:51:36 +02002 * kernel/workqueue.c - generic async execution with shared worker pool
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
Tejun Heoc54fce62010-09-10 16:51:36 +02004 * Copyright (C) 2002 Ingo Molnar
Linus Torvalds1da177e2005-04-16 15:20:36 -07005 *
Tejun Heoc54fce62010-09-10 16:51:36 +02006 * Derived from the taskqueue/keventd code by:
7 * David Woodhouse <dwmw2@infradead.org>
8 * Andrew Morton
9 * Kai Petzke <wpp@marie.physik.tu-berlin.de>
10 * Theodore Ts'o <tytso@mit.edu>
Christoph Lameter89ada672005-10-30 15:01:59 -080011 *
Christoph Lametercde53532008-07-04 09:59:22 -070012 * Made to use alloc_percpu by Christoph Lameter.
Tejun Heoc54fce62010-09-10 16:51:36 +020013 *
14 * Copyright (C) 2010 SUSE Linux Products GmbH
15 * Copyright (C) 2010 Tejun Heo <tj@kernel.org>
16 *
17 * This is the generic async execution mechanism. Work items as are
18 * executed in process context. The worker pool is shared and
Libinb11895c2013-08-21 08:50:39 +080019 * automatically managed. There are two worker pools for each CPU (one for
20 * normal work items and the other for high priority ones) and some extra
21 * pools for workqueues which are not bound to any specific CPU - the
22 * number of these backing pools is dynamic.
Tejun Heoc54fce62010-09-10 16:51:36 +020023 *
Benjamin Peterson9a261492017-08-06 19:33:22 -070024 * Please read Documentation/core-api/workqueue.rst for details.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Paul Gortmaker9984de12011-05-23 14:51:41 -040027#include <linux/export.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/kernel.h>
29#include <linux/sched.h>
30#include <linux/init.h>
31#include <linux/signal.h>
32#include <linux/completion.h>
33#include <linux/workqueue.h>
34#include <linux/slab.h>
35#include <linux/cpu.h>
36#include <linux/notifier.h>
37#include <linux/kthread.h>
James Bottomley1fa44ec2006-02-23 12:43:43 -060038#include <linux/hardirq.h>
Christoph Lameter469340232006-10-11 01:21:26 -070039#include <linux/mempolicy.h>
Rafael J. Wysocki341a5952006-12-06 20:34:49 -080040#include <linux/freezer.h>
Peter Zijlstrad5abe662006-12-06 20:37:26 -080041#include <linux/debug_locks.h>
Johannes Berg4e6045f2007-10-18 23:39:55 -070042#include <linux/lockdep.h>
Tejun Heoc34056a2010-06-29 10:07:11 +020043#include <linux/idr.h>
Tejun Heo29c91e92013-03-12 11:30:03 -070044#include <linux/jhash.h>
Sasha Levin42f85702012-12-17 10:01:23 -050045#include <linux/hashtable.h>
Tejun Heo76af4d92013-03-12 11:30:00 -070046#include <linux/rculist.h>
Tejun Heobce90382013-04-01 11:23:32 -070047#include <linux/nodemask.h>
Tejun Heo4c16bd32013-04-01 11:23:36 -070048#include <linux/moduleparam.h>
Tejun Heo3d1cb202013-04-30 15:27:22 -070049#include <linux/uaccess.h>
Tal Shorerc98a9802017-11-03 17:27:50 +020050#include <linux/sched/isolation.h>
Sergey Senozhatsky62635ea2018-01-11 09:53:35 +090051#include <linux/nmi.h>
Tejun Heoe22bee72010-06-29 10:07:14 +020052
Tejun Heoea138442013-01-18 14:05:55 -080053#include "workqueue_internal.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070054
Tejun Heoc8e55f32010-06-29 10:07:12 +020055enum {
Tejun Heobc2ae0f2012-07-17 12:39:27 -070056 /*
Tejun Heo24647572013-01-24 11:01:33 -080057 * worker_pool flags
Tejun Heobc2ae0f2012-07-17 12:39:27 -070058 *
Tejun Heo24647572013-01-24 11:01:33 -080059 * A bound pool is either associated or disassociated with its CPU.
Tejun Heobc2ae0f2012-07-17 12:39:27 -070060 * While associated (!DISASSOCIATED), all workers are bound to the
61 * CPU and none has %WORKER_UNBOUND set and concurrency management
62 * is in effect.
63 *
64 * While DISASSOCIATED, the cpu may be offline and all workers have
65 * %WORKER_UNBOUND set and concurrency management disabled, and may
Tejun Heo24647572013-01-24 11:01:33 -080066 * be executing on any CPU. The pool behaves as an unbound one.
Tejun Heobc2ae0f2012-07-17 12:39:27 -070067 *
Tejun Heobc3a1af2013-03-13 19:47:39 -070068 * Note that DISASSOCIATED should be flipped only while holding
Tejun Heo1258fae2018-05-18 08:47:13 -070069 * wq_pool_attach_mutex to avoid changing binding state while
Lai Jiangshan4736cbf2014-05-20 17:46:35 +080070 * worker_attach_to_pool() is in progress.
Tejun Heobc2ae0f2012-07-17 12:39:27 -070071 */
Tejun Heo692b4822017-10-09 08:04:13 -070072 POOL_MANAGER_ACTIVE = 1 << 0, /* being managed */
Tejun Heo24647572013-01-24 11:01:33 -080073 POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */
Tejun Heodb7bccf2010-06-29 10:07:12 +020074
Tejun Heoc8e55f32010-06-29 10:07:12 +020075 /* worker flags */
Tejun Heoc8e55f32010-06-29 10:07:12 +020076 WORKER_DIE = 1 << 1, /* die die die */
77 WORKER_IDLE = 1 << 2, /* is idle */
Tejun Heoe22bee72010-06-29 10:07:14 +020078 WORKER_PREP = 1 << 3, /* preparing to run works */
Tejun Heofb0e7be2010-06-29 10:07:15 +020079 WORKER_CPU_INTENSIVE = 1 << 6, /* cpu intensive */
Tejun Heof3421792010-07-02 10:03:51 +020080 WORKER_UNBOUND = 1 << 7, /* worker is unbound */
Tejun Heoa9ab7752013-03-19 13:45:21 -070081 WORKER_REBOUND = 1 << 8, /* worker was rebound */
Tejun Heoe22bee72010-06-29 10:07:14 +020082
Tejun Heoa9ab7752013-03-19 13:45:21 -070083 WORKER_NOT_RUNNING = WORKER_PREP | WORKER_CPU_INTENSIVE |
84 WORKER_UNBOUND | WORKER_REBOUND,
Tejun Heodb7bccf2010-06-29 10:07:12 +020085
Tejun Heoe34cdddb2013-01-24 11:01:33 -080086 NR_STD_WORKER_POOLS = 2, /* # standard pools per cpu */
Tejun Heo4ce62e92012-07-13 22:16:44 -070087
Tejun Heo29c91e92013-03-12 11:30:03 -070088 UNBOUND_POOL_HASH_ORDER = 6, /* hashed by pool->attrs */
Tejun Heoc8e55f32010-06-29 10:07:12 +020089 BUSY_WORKER_HASH_ORDER = 6, /* 64 pointers */
Tejun Heodb7bccf2010-06-29 10:07:12 +020090
Tejun Heoe22bee72010-06-29 10:07:14 +020091 MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */
92 IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */
93
Tejun Heo3233cdb2011-02-16 18:10:19 +010094 MAYDAY_INITIAL_TIMEOUT = HZ / 100 >= 2 ? HZ / 100 : 2,
95 /* call for help after 10ms
96 (min two ticks) */
Tejun Heoe22bee72010-06-29 10:07:14 +020097 MAYDAY_INTERVAL = HZ / 10, /* and then every 100ms */
98 CREATE_COOLDOWN = HZ, /* time to breath after fail */
Tejun Heoe22bee72010-06-29 10:07:14 +020099
100 /*
101 * Rescue workers are used only on emergencies and shared by
Dongsheng Yang8698a742014-03-11 18:09:12 +0800102 * all cpus. Give MIN_NICE.
Tejun Heoe22bee72010-06-29 10:07:14 +0200103 */
Dongsheng Yang8698a742014-03-11 18:09:12 +0800104 RESCUER_NICE_LEVEL = MIN_NICE,
105 HIGHPRI_NICE_LEVEL = MIN_NICE,
Tejun Heoecf68812013-04-01 11:23:34 -0700106
107 WQ_NAME_LEN = 24,
Tejun Heoc8e55f32010-06-29 10:07:12 +0200108};
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109
110/*
Tejun Heo4690c4a2010-06-29 10:07:10 +0200111 * Structure fields follow one of the following exclusion rules.
112 *
Tejun Heoe41e7042010-08-24 14:22:47 +0200113 * I: Modifiable by initialization/destruction paths and read-only for
114 * everyone else.
Tejun Heo4690c4a2010-06-29 10:07:10 +0200115 *
Tejun Heoe22bee72010-06-29 10:07:14 +0200116 * P: Preemption protected. Disabling preemption is enough and should
117 * only be modified and accessed from the local cpu.
118 *
Tejun Heod565ed62013-01-24 11:01:33 -0800119 * L: pool->lock protected. Access with pool->lock held.
Tejun Heo4690c4a2010-06-29 10:07:10 +0200120 *
Tejun Heod565ed62013-01-24 11:01:33 -0800121 * X: During normal operation, modification requires pool->lock and should
122 * be done only from local cpu. Either disabling preemption on local
123 * cpu or grabbing pool->lock is enough for read access. If
124 * POOL_DISASSOCIATED is set, it's identical to L.
Tejun Heoe22bee72010-06-29 10:07:14 +0200125 *
Tejun Heo1258fae2018-05-18 08:47:13 -0700126 * A: wq_pool_attach_mutex protected.
Tejun Heo822d8402013-03-19 13:45:21 -0700127 *
Lai Jiangshan68e13a62013-03-25 16:57:17 -0700128 * PL: wq_pool_mutex protected.
Tejun Heo76af4d92013-03-12 11:30:00 -0700129 *
Thomas Gleixner24acfb72019-03-13 17:55:47 +0100130 * PR: wq_pool_mutex protected for writes. RCU protected for reads.
Tejun Heo5bcab332013-03-13 19:47:40 -0700131 *
Lai Jiangshan5b95e1a2015-05-12 20:32:29 +0800132 * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads.
133 *
134 * PWR: wq_pool_mutex and wq->mutex protected for writes. Either or
Thomas Gleixner24acfb72019-03-13 17:55:47 +0100135 * RCU for reads.
Lai Jiangshan5b95e1a2015-05-12 20:32:29 +0800136 *
Lai Jiangshan3c25a552013-03-25 16:57:17 -0700137 * WQ: wq->mutex protected.
138 *
Thomas Gleixner24acfb72019-03-13 17:55:47 +0100139 * WR: wq->mutex protected for writes. RCU protected for reads.
Tejun Heo2e109a22013-03-13 19:47:40 -0700140 *
141 * MD: wq_mayday_lock protected.
Tejun Heo4690c4a2010-06-29 10:07:10 +0200142 */
143
Tejun Heo2eaebdb2013-01-18 14:05:55 -0800144/* struct worker is defined in workqueue_internal.h */
Tejun Heoc34056a2010-06-29 10:07:11 +0200145
Tejun Heobd7bdd42012-07-12 14:46:37 -0700146struct worker_pool {
Tejun Heod565ed62013-01-24 11:01:33 -0800147 spinlock_t lock; /* the pool lock */
Tejun Heod84ff052013-03-12 11:29:59 -0700148 int cpu; /* I: the associated cpu */
Tejun Heof3f90ad2013-04-01 11:23:34 -0700149 int node; /* I: the associated node ID */
Tejun Heo9daf9e62013-01-24 11:01:33 -0800150 int id; /* I: pool ID */
Tejun Heo11ebea52012-07-12 14:46:37 -0700151 unsigned int flags; /* X: flags */
Tejun Heobd7bdd42012-07-12 14:46:37 -0700152
Tejun Heo82607adc2015-12-08 11:28:04 -0500153 unsigned long watchdog_ts; /* L: watchdog timestamp */
154
Tejun Heobd7bdd42012-07-12 14:46:37 -0700155 struct list_head worklist; /* L: list of pending works */
Lai Jiangshanea1abd62012-09-18 09:59:22 -0700156
Lai Jiangshan5826cc82018-03-20 17:24:05 +0800157 int nr_workers; /* L: total number of workers */
158 int nr_idle; /* L: currently idle workers */
Tejun Heobd7bdd42012-07-12 14:46:37 -0700159
160 struct list_head idle_list; /* X: list of idle workers */
161 struct timer_list idle_timer; /* L: worker idle timeout */
162 struct timer_list mayday_timer; /* L: SOS timer for workers */
163
Tejun Heoc5aa87b2013-03-13 16:51:36 -0700164 /* a workers is either on busy_hash or idle_list, or the manager */
Tejun Heoc9e7cf22013-01-24 11:01:33 -0800165 DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
166 /* L: hash of busy workers */
167
Tejun Heo2607d7a2015-03-09 09:22:28 -0400168 struct worker *manager; /* L: purely informational */
Lai Jiangshan92f9c5c2014-05-20 17:46:34 +0800169 struct list_head workers; /* A: attached workers */
Lai Jiangshan60f5a4b2014-05-20 17:46:29 +0800170 struct completion *detach_completion; /* all workers detached */
Tejun Heoe19e3972013-01-24 11:39:44 -0800171
Lai Jiangshan7cda9aa2014-05-20 17:46:32 +0800172 struct ida worker_ida; /* worker IDs for task name */
Tejun Heoe19e3972013-01-24 11:39:44 -0800173
Tejun Heo7a4e3442013-03-12 11:30:00 -0700174 struct workqueue_attrs *attrs; /* I: worker attributes */
Lai Jiangshan68e13a62013-03-25 16:57:17 -0700175 struct hlist_node hash_node; /* PL: unbound_pool_hash node */
176 int refcnt; /* PL: refcnt for unbound pools */
Tejun Heo7a4e3442013-03-12 11:30:00 -0700177
Tejun Heoe19e3972013-01-24 11:39:44 -0800178 /*
179 * The current concurrency level. As it's likely to be accessed
180 * from other CPUs during try_to_wake_up(), put it in a separate
181 * cacheline.
182 */
183 atomic_t nr_running ____cacheline_aligned_in_smp;
Tejun Heo29c91e92013-03-12 11:30:03 -0700184
185 /*
Thomas Gleixner24acfb72019-03-13 17:55:47 +0100186 * Destruction of pool is RCU protected to allow dereferences
Tejun Heo29c91e92013-03-12 11:30:03 -0700187 * from get_work_pool().
188 */
189 struct rcu_head rcu;
Tejun Heo8b03ae32010-06-29 10:07:12 +0200190} ____cacheline_aligned_in_smp;
191
192/*
Tejun Heo112202d2013-02-13 19:29:12 -0800193 * The per-pool workqueue. While queued, the lower WORK_STRUCT_FLAG_BITS
194 * of work_struct->data are used for flags and the remaining high bits
195 * point to the pwq; thus, pwqs need to be aligned at two's power of the
196 * number of flag bits.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197 */
Tejun Heo112202d2013-02-13 19:29:12 -0800198struct pool_workqueue {
Tejun Heobd7bdd42012-07-12 14:46:37 -0700199 struct worker_pool *pool; /* I: the associated pool */
Tejun Heo4690c4a2010-06-29 10:07:10 +0200200 struct workqueue_struct *wq; /* I: the owning workqueue */
Tejun Heo73f53c42010-06-29 10:07:11 +0200201 int work_color; /* L: current color */
202 int flush_color; /* L: flushing color */
Tejun Heo8864b4e2013-03-12 11:30:04 -0700203 int refcnt; /* L: reference count */
Tejun Heo73f53c42010-06-29 10:07:11 +0200204 int nr_in_flight[WORK_NR_COLORS];
205 /* L: nr of in_flight works */
Tejun Heo1e19ffc2010-06-29 10:07:12 +0200206 int nr_active; /* L: nr of active works */
Tejun Heoa0a1a5f2010-06-29 10:07:12 +0200207 int max_active; /* L: max active works */
Tejun Heo1e19ffc2010-06-29 10:07:12 +0200208 struct list_head delayed_works; /* L: delayed works */
Lai Jiangshan3c25a552013-03-25 16:57:17 -0700209 struct list_head pwqs_node; /* WR: node on wq->pwqs */
Tejun Heo2e109a22013-03-13 19:47:40 -0700210 struct list_head mayday_node; /* MD: node on wq->maydays */
Tejun Heo8864b4e2013-03-12 11:30:04 -0700211
212 /*
213 * Release of unbound pwq is punted to system_wq. See put_pwq()
214 * and pwq_unbound_release_workfn() for details. pool_workqueue
Thomas Gleixner24acfb72019-03-13 17:55:47 +0100215 * itself is also RCU protected so that the first pwq can be
Lai Jiangshanb09f4fd2013-03-25 16:57:18 -0700216 * determined without grabbing wq->mutex.
Tejun Heo8864b4e2013-03-12 11:30:04 -0700217 */
218 struct work_struct unbound_release_work;
219 struct rcu_head rcu;
Tejun Heoe904e6c2013-03-12 11:29:57 -0700220} __aligned(1 << WORK_STRUCT_FLAG_BITS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222/*
Tejun Heo73f53c42010-06-29 10:07:11 +0200223 * Structure used to wait for workqueue flush.
224 */
225struct wq_flusher {
Lai Jiangshan3c25a552013-03-25 16:57:17 -0700226 struct list_head list; /* WQ: list of flushers */
227 int flush_color; /* WQ: flush color waiting for */
Tejun Heo73f53c42010-06-29 10:07:11 +0200228 struct completion done; /* flush completion */
229};
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230
Tejun Heo226223a2013-03-12 11:30:05 -0700231struct wq_device;
232
Tejun Heo73f53c42010-06-29 10:07:11 +0200233/*
Tejun Heoc5aa87b2013-03-13 16:51:36 -0700234 * The externally visible workqueue. It relays the issued work items to
235 * the appropriate worker_pool through its pool_workqueues.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236 */
237struct workqueue_struct {
Lai Jiangshan3c25a552013-03-25 16:57:17 -0700238 struct list_head pwqs; /* WR: all pwqs of this wq */
Tejun Heoe2dca7a2015-03-09 09:22:28 -0400239 struct list_head list; /* PR: list of all workqueues */
Tejun Heo73f53c42010-06-29 10:07:11 +0200240
Lai Jiangshan3c25a552013-03-25 16:57:17 -0700241 struct mutex mutex; /* protects this wq */
242 int work_color; /* WQ: current work color */
243 int flush_color; /* WQ: current flush color */
Tejun Heo112202d2013-02-13 19:29:12 -0800244 atomic_t nr_pwqs_to_flush; /* flush in progress */
Lai Jiangshan3c25a552013-03-25 16:57:17 -0700245 struct wq_flusher *first_flusher; /* WQ: first flusher */
246 struct list_head flusher_queue; /* WQ: flush waiters */
247 struct list_head flusher_overflow; /* WQ: flush overflow list */
Tejun Heo73f53c42010-06-29 10:07:11 +0200248
Tejun Heo2e109a22013-03-13 19:47:40 -0700249 struct list_head maydays; /* MD: pwqs requesting rescue */
Tejun Heoe22bee72010-06-29 10:07:14 +0200250 struct worker *rescuer; /* I: rescue worker */
251
Lai Jiangshan87fc7412013-03-25 16:57:18 -0700252 int nr_drainers; /* WQ: drain in progress */
Lai Jiangshana357fc02013-03-25 16:57:19 -0700253 int saved_max_active; /* WQ: saved pwq max_active */
Tejun Heo226223a2013-03-12 11:30:05 -0700254
Lai Jiangshan5b95e1a2015-05-12 20:32:29 +0800255 struct workqueue_attrs *unbound_attrs; /* PW: only for unbound wqs */
256 struct pool_workqueue *dfl_pwq; /* PW: only for unbound wqs */
Tejun Heo6029a912013-04-01 11:23:34 -0700257
Tejun Heo226223a2013-03-12 11:30:05 -0700258#ifdef CONFIG_SYSFS
259 struct wq_device *wq_dev; /* I: for sysfs interface */
260#endif
Johannes Berg4e6045f2007-10-18 23:39:55 -0700261#ifdef CONFIG_LOCKDEP
Bart Van Assche669de8b2019-02-14 15:00:54 -0800262 char *lock_name;
263 struct lock_class_key key;
Tejun Heo4690c4a2010-06-29 10:07:10 +0200264 struct lockdep_map lockdep_map;
Johannes Berg4e6045f2007-10-18 23:39:55 -0700265#endif
Tejun Heoecf68812013-04-01 11:23:34 -0700266 char name[WQ_NAME_LEN]; /* I: workqueue name */
Tejun Heo2728fd22013-04-01 11:23:35 -0700267
Tejun Heoe2dca7a2015-03-09 09:22:28 -0400268 /*
Thomas Gleixner24acfb72019-03-13 17:55:47 +0100269 * Destruction of workqueue_struct is RCU protected to allow walking
270 * the workqueues list without grabbing wq_pool_mutex.
Tejun Heoe2dca7a2015-03-09 09:22:28 -0400271 * This is used to dump all workqueues from sysrq.
272 */
273 struct rcu_head rcu;
274
Tejun Heo2728fd22013-04-01 11:23:35 -0700275 /* hot fields used during command issue, aligned to cacheline */
276 unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */
277 struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */
Lai Jiangshan5b95e1a2015-05-12 20:32:29 +0800278 struct pool_workqueue __rcu *numa_pwq_tbl[]; /* PWR: unbound pwqs indexed by node */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279};
280
Tejun Heoe904e6c2013-03-12 11:29:57 -0700281static struct kmem_cache *pwq_cache;
282
Tejun Heobce90382013-04-01 11:23:32 -0700283static cpumask_var_t *wq_numa_possible_cpumask;
284 /* possible CPUs of each node */
285
Tejun Heod55262c2013-04-01 11:23:38 -0700286static bool wq_disable_numa;
287module_param_named(disable_numa, wq_disable_numa, bool, 0444);
288
Viresh Kumarcee22a12013-04-08 16:45:40 +0530289/* see the comment above the definition of WQ_POWER_EFFICIENT */
Luis R. Rodriguez552f5302015-05-27 11:09:39 +0930290static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT);
Viresh Kumarcee22a12013-04-08 16:45:40 +0530291module_param_named(power_efficient, wq_power_efficient, bool, 0444);
292
Tejun Heo863b7102016-09-16 15:49:34 -0400293static bool wq_online; /* can kworkers be created yet? */
Tejun Heo3347fa02016-09-16 15:49:32 -0400294
Tejun Heobce90382013-04-01 11:23:32 -0700295static bool wq_numa_enabled; /* unbound NUMA affinity enabled */
296
Tejun Heo4c16bd32013-04-01 11:23:36 -0700297/* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */
298static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
299
Lai Jiangshan68e13a62013-03-25 16:57:17 -0700300static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */
Tejun Heo1258fae2018-05-18 08:47:13 -0700301static DEFINE_MUTEX(wq_pool_attach_mutex); /* protects worker attach/detach */
Tejun Heo2e109a22013-03-13 19:47:40 -0700302static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
Tejun Heo692b4822017-10-09 08:04:13 -0700303static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
Tejun Heo5bcab332013-03-13 19:47:40 -0700304
Tejun Heoe2dca7a2015-03-09 09:22:28 -0400305static LIST_HEAD(workqueues); /* PR: list of all workqueues */
Lai Jiangshan68e13a62013-03-25 16:57:17 -0700306static bool workqueue_freezing; /* PL: have wqs started freezing? */
Tejun Heo7d19c5c2013-03-13 19:47:40 -0700307
Mike Galbraithef5571802016-02-09 17:59:38 -0500308/* PL: allowable cpus for unbound wqs and work items */
309static cpumask_var_t wq_unbound_cpumask;
310
311/* CPU where unbound work was last round robin scheduled from this CPU */
312static DEFINE_PER_CPU(int, wq_rr_cpu_last);
Frederic Weisbeckerb05a7922015-04-27 17:58:39 +0800313
Tejun Heof303fccb2016-02-09 17:59:38 -0500314/*
315 * Local execution of unbound work items is no longer guaranteed. The
316 * following always forces round-robin CPU selection on unbound work items
317 * to uncover usages which depend on it.
318 */
319#ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
320static bool wq_debug_force_rr_cpu = true;
321#else
322static bool wq_debug_force_rr_cpu = false;
323#endif
324module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
325
Tejun Heo7d19c5c2013-03-13 19:47:40 -0700326/* the per-cpu worker pools */
Peter Zijlstra25528212016-03-15 14:52:49 -0700327static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], cpu_worker_pools);
Tejun Heo7d19c5c2013-03-13 19:47:40 -0700328
Lai Jiangshan68e13a62013-03-25 16:57:17 -0700329static DEFINE_IDR(worker_pool_idr); /* PR: idr of all pools */
Tejun Heo7d19c5c2013-03-13 19:47:40 -0700330
Lai Jiangshan68e13a62013-03-25 16:57:17 -0700331/* PL: hash of all unbound pools keyed by pool->attrs */
Tejun Heo29c91e92013-03-12 11:30:03 -0700332static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
333
Tejun Heoc5aa87b2013-03-13 16:51:36 -0700334/* I: attributes used when instantiating standard unbound pools on demand */
Tejun Heo29c91e92013-03-12 11:30:03 -0700335static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
336
Tejun Heo8a2b7532013-09-05 12:30:04 -0400337/* I: attributes used when instantiating ordered pools on demand */
338static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS];
339
Tejun Heod320c032010-06-29 10:07:14 +0200340struct workqueue_struct *system_wq __read_mostly;
Marc Dionnead7b1f82013-05-06 17:44:55 -0400341EXPORT_SYMBOL(system_wq);
Valentin Ilie044c7822012-08-19 00:52:42 +0300342struct workqueue_struct *system_highpri_wq __read_mostly;
Joonsoo Kim1aabe902012-08-15 23:25:39 +0900343EXPORT_SYMBOL_GPL(system_highpri_wq);
Valentin Ilie044c7822012-08-19 00:52:42 +0300344struct workqueue_struct *system_long_wq __read_mostly;
Tejun Heod320c032010-06-29 10:07:14 +0200345EXPORT_SYMBOL_GPL(system_long_wq);
Valentin Ilie044c7822012-08-19 00:52:42 +0300346struct workqueue_struct *system_unbound_wq __read_mostly;
Tejun Heof3421792010-07-02 10:03:51 +0200347EXPORT_SYMBOL_GPL(system_unbound_wq);
Valentin Ilie044c7822012-08-19 00:52:42 +0300348struct workqueue_struct *system_freezable_wq __read_mostly;
Tejun Heo24d51ad2011-02-21 09:52:50 +0100349EXPORT_SYMBOL_GPL(system_freezable_wq);
Viresh Kumar06681062013-04-24 17:12:54 +0530350struct workqueue_struct *system_power_efficient_wq __read_mostly;
351EXPORT_SYMBOL_GPL(system_power_efficient_wq);
352struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
353EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
Tejun Heod320c032010-06-29 10:07:14 +0200354
Tejun Heo7d19c5c2013-03-13 19:47:40 -0700355static int worker_thread(void *__worker);
Frederic Weisbecker6ba94422015-04-02 19:14:39 +0800356static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
Tejun Heo7d19c5c2013-03-13 19:47:40 -0700357
Tejun Heo97bd2342010-10-05 10:41:14 +0200358#define CREATE_TRACE_POINTS
359#include <trace/events/workqueue.h>
360
Lai Jiangshan68e13a62013-03-25 16:57:17 -0700361#define assert_rcu_or_pool_mutex() \
Thomas Gleixner24acfb72019-03-13 17:55:47 +0100362 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
Paul E. McKenneyf78f5b92015-06-18 15:50:02 -0700363 !lockdep_is_held(&wq_pool_mutex), \
Thomas Gleixner24acfb72019-03-13 17:55:47 +0100364 "RCU or wq_pool_mutex should be held")
Tejun Heo5bcab332013-03-13 19:47:40 -0700365
Lai Jiangshanb09f4fd2013-03-25 16:57:18 -0700366#define assert_rcu_or_wq_mutex(wq) \
Thomas Gleixner24acfb72019-03-13 17:55:47 +0100367 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
Paul E. McKenneyf78f5b92015-06-18 15:50:02 -0700368 !lockdep_is_held(&wq->mutex), \
Thomas Gleixner24acfb72019-03-13 17:55:47 +0100369 "RCU or wq->mutex should be held")
Tejun Heo76af4d92013-03-12 11:30:00 -0700370
Lai Jiangshan5b95e1a2015-05-12 20:32:29 +0800371#define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
Thomas Gleixner24acfb72019-03-13 17:55:47 +0100372 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
Paul E. McKenneyf78f5b92015-06-18 15:50:02 -0700373 !lockdep_is_held(&wq->mutex) && \
374 !lockdep_is_held(&wq_pool_mutex), \
Thomas Gleixner24acfb72019-03-13 17:55:47 +0100375 "RCU, wq->mutex or wq_pool_mutex should be held")
Lai Jiangshan5b95e1a2015-05-12 20:32:29 +0800376
Tejun Heof02ae732013-03-12 11:30:03 -0700377#define for_each_cpu_worker_pool(pool, cpu) \
378 for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
379 (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
Tejun Heo7a62c2c2013-03-12 11:30:03 -0700380 (pool)++)
Tejun Heo4ce62e92012-07-13 22:16:44 -0700381
Tejun Heo49e3cf42013-03-12 11:29:58 -0700382/**
Tejun Heo17116962013-03-12 11:29:58 -0700383 * for_each_pool - iterate through all worker_pools in the system
384 * @pool: iteration cursor
Tejun Heo611c92a2013-03-13 16:51:36 -0700385 * @pi: integer used for iteration
Tejun Heofa1b54e2013-03-12 11:30:00 -0700386 *
Thomas Gleixner24acfb72019-03-13 17:55:47 +0100387 * This must be called either with wq_pool_mutex held or RCU read
Lai Jiangshan68e13a62013-03-25 16:57:17 -0700388 * locked. If the pool needs to be used beyond the locking in effect, the
389 * caller is responsible for guaranteeing that the pool stays online.
Tejun Heofa1b54e2013-03-12 11:30:00 -0700390 *
391 * The if/else clause exists only for the lockdep assertion and can be
392 * ignored.
Tejun Heo17116962013-03-12 11:29:58 -0700393 */
Tejun Heo611c92a2013-03-13 16:51:36 -0700394#define for_each_pool(pool, pi) \
395 idr_for_each_entry(&worker_pool_idr, pool, pi) \
Lai Jiangshan68e13a62013-03-25 16:57:17 -0700396 if (({ assert_rcu_or_pool_mutex(); false; })) { } \
Tejun Heofa1b54e2013-03-12 11:30:00 -0700397 else
Tejun Heo17116962013-03-12 11:29:58 -0700398
399/**
Tejun Heo822d8402013-03-19 13:45:21 -0700400 * for_each_pool_worker - iterate through all workers of a worker_pool
401 * @worker: iteration cursor
Tejun Heo822d8402013-03-19 13:45:21 -0700402 * @pool: worker_pool to iterate workers of
403 *
Tejun Heo1258fae2018-05-18 08:47:13 -0700404 * This must be called with wq_pool_attach_mutex.
Tejun Heo822d8402013-03-19 13:45:21 -0700405 *
406 * The if/else clause exists only for the lockdep assertion and can be
407 * ignored.
408 */
Lai Jiangshanda028462014-05-20 17:46:31 +0800409#define for_each_pool_worker(worker, pool) \
410 list_for_each_entry((worker), &(pool)->workers, node) \
Tejun Heo1258fae2018-05-18 08:47:13 -0700411 if (({ lockdep_assert_held(&wq_pool_attach_mutex); false; })) { } \
Tejun Heo822d8402013-03-19 13:45:21 -0700412 else
413
414/**
Tejun Heo49e3cf42013-03-12 11:29:58 -0700415 * for_each_pwq - iterate through all pool_workqueues of the specified workqueue
416 * @pwq: iteration cursor
417 * @wq: the target workqueue
Tejun Heo76af4d92013-03-12 11:30:00 -0700418 *
Thomas Gleixner24acfb72019-03-13 17:55:47 +0100419 * This must be called either with wq->mutex held or RCU read locked.
Tejun Heo794b18bc2013-03-13 19:47:40 -0700420 * If the pwq needs to be used beyond the locking in effect, the caller is
421 * responsible for guaranteeing that the pwq stays online.
Tejun Heo76af4d92013-03-12 11:30:00 -0700422 *
423 * The if/else clause exists only for the lockdep assertion and can be
424 * ignored.
Tejun Heo49e3cf42013-03-12 11:29:58 -0700425 */
426#define for_each_pwq(pwq, wq) \
Tejun Heo76af4d92013-03-12 11:30:00 -0700427 list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node) \
Lai Jiangshanb09f4fd2013-03-25 16:57:18 -0700428 if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \
Tejun Heo76af4d92013-03-12 11:30:00 -0700429 else
Tejun Heof3421792010-07-02 10:03:51 +0200430
Thomas Gleixnerdc186ad2009-11-16 01:09:48 +0900431#ifdef CONFIG_DEBUG_OBJECTS_WORK
432
433static struct debug_obj_descr work_debug_descr;
434
Stanislaw Gruszka99777282011-03-07 09:58:33 +0100435static void *work_debug_hint(void *addr)
436{
437 return ((struct work_struct *) addr)->func;
438}
439
Du, Changbinb9fdac7f2016-05-19 17:09:41 -0700440static bool work_is_static_object(void *addr)
441{
442 struct work_struct *work = addr;
443
444 return test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work));
445}
446
Thomas Gleixnerdc186ad2009-11-16 01:09:48 +0900447/*
448 * fixup_init is called when:
449 * - an active object is initialized
450 */
Du, Changbin02a982a2016-05-19 17:09:26 -0700451static bool work_fixup_init(void *addr, enum debug_obj_state state)
Thomas Gleixnerdc186ad2009-11-16 01:09:48 +0900452{
453 struct work_struct *work = addr;
454
455 switch (state) {
456 case ODEBUG_STATE_ACTIVE:
457 cancel_work_sync(work);
458 debug_object_init(work, &work_debug_descr);
Du, Changbin02a982a2016-05-19 17:09:26 -0700459 return true;
Thomas Gleixnerdc186ad2009-11-16 01:09:48 +0900460 default:
Du, Changbin02a982a2016-05-19 17:09:26 -0700461 return false;
Thomas Gleixnerdc186ad2009-11-16 01:09:48 +0900462 }
463}
464
465/*
Thomas Gleixnerdc186ad2009-11-16 01:09:48 +0900466 * fixup_free is called when:
467 * - an active object is freed
468 */
Du, Changbin02a982a2016-05-19 17:09:26 -0700469static bool work_fixup_free(void *addr, enum debug_obj_state state)
Thomas Gleixnerdc186ad2009-11-16 01:09:48 +0900470{
471 struct work_struct *work = addr;
472
473 switch (state) {
474 case ODEBUG_STATE_ACTIVE:
475 cancel_work_sync(work);
476 debug_object_free(work, &work_debug_descr);
Du, Changbin02a982a2016-05-19 17:09:26 -0700477 return true;
Thomas Gleixnerdc186ad2009-11-16 01:09:48 +0900478 default:
Du, Changbin02a982a2016-05-19 17:09:26 -0700479 return false;
Thomas Gleixnerdc186ad2009-11-16 01:09:48 +0900480 }
481}
482
483static struct debug_obj_descr work_debug_descr = {
484 .name = "work_struct",
Stanislaw Gruszka99777282011-03-07 09:58:33 +0100485 .debug_hint = work_debug_hint,
Du, Changbinb9fdac7f2016-05-19 17:09:41 -0700486 .is_static_object = work_is_static_object,
Thomas Gleixnerdc186ad2009-11-16 01:09:48 +0900487 .fixup_init = work_fixup_init,
Thomas Gleixnerdc186ad2009-11-16 01:09:48 +0900488 .fixup_free = work_fixup_free,
489};
490
491static inline void debug_work_activate(struct work_struct *work)
492{
493 debug_object_activate(work, &work_debug_descr);
494}
495
496static inline void debug_work_deactivate(struct work_struct *work)
497{
498 debug_object_deactivate(work, &work_debug_descr);
499}
500
501void __init_work(struct work_struct *work, int onstack)
502{
503 if (onstack)
504 debug_object_init_on_stack(work, &work_debug_descr);
505 else
506 debug_object_init(work, &work_debug_descr);
507}
508EXPORT_SYMBOL_GPL(__init_work);
509
510void destroy_work_on_stack(struct work_struct *work)
511{
512 debug_object_free(work, &work_debug_descr);
513}
514EXPORT_SYMBOL_GPL(destroy_work_on_stack);
515
Thomas Gleixnerea2e64f2014-03-23 14:20:44 +0000516void destroy_delayed_work_on_stack(struct delayed_work *work)
517{
518 destroy_timer_on_stack(&work->timer);
519 debug_object_free(&work->work, &work_debug_descr);
520}
521EXPORT_SYMBOL_GPL(destroy_delayed_work_on_stack);
522
Thomas Gleixnerdc186ad2009-11-16 01:09:48 +0900523#else
524static inline void debug_work_activate(struct work_struct *work) { }
525static inline void debug_work_deactivate(struct work_struct *work) { }
526#endif
527
Li Bin4e8b22b2013-09-10 09:52:35 +0800528/**
529 * worker_pool_assign_id - allocate ID and assing it to @pool
530 * @pool: the pool pointer of interest
531 *
532 * Returns 0 if ID in [0, WORK_OFFQ_POOL_NONE) is allocated and assigned
533 * successfully, -errno on failure.
534 */
Tejun Heo9daf9e62013-01-24 11:01:33 -0800535static int worker_pool_assign_id(struct worker_pool *pool)
536{
537 int ret;
538
Lai Jiangshan68e13a62013-03-25 16:57:17 -0700539 lockdep_assert_held(&wq_pool_mutex);
Tejun Heo5bcab332013-03-13 19:47:40 -0700540
Li Bin4e8b22b2013-09-10 09:52:35 +0800541 ret = idr_alloc(&worker_pool_idr, pool, 0, WORK_OFFQ_POOL_NONE,
542 GFP_KERNEL);
Tejun Heo229641a2013-04-01 17:08:13 -0700543 if (ret >= 0) {
Tejun Heoe68035f2013-03-13 14:59:38 -0700544 pool->id = ret;
Tejun Heo229641a2013-04-01 17:08:13 -0700545 return 0;
546 }
Tejun Heo9daf9e62013-01-24 11:01:33 -0800547 return ret;
548}
549
Tejun Heo76af4d92013-03-12 11:30:00 -0700550/**
Tejun Heodf2d5ae2013-04-01 11:23:35 -0700551 * unbound_pwq_by_node - return the unbound pool_workqueue for the given node
552 * @wq: the target workqueue
553 * @node: the node ID
554 *
Thomas Gleixner24acfb72019-03-13 17:55:47 +0100555 * This must be called with any of wq_pool_mutex, wq->mutex or RCU
Lai Jiangshan5b95e1a2015-05-12 20:32:29 +0800556 * read locked.
Tejun Heodf2d5ae2013-04-01 11:23:35 -0700557 * If the pwq needs to be used beyond the locking in effect, the caller is
558 * responsible for guaranteeing that the pwq stays online.
Yacine Belkadid185af32013-07-31 14:59:24 -0700559 *
560 * Return: The unbound pool_workqueue for @node.
Tejun Heodf2d5ae2013-04-01 11:23:35 -0700561 */
562static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
563 int node)
564{
Lai Jiangshan5b95e1a2015-05-12 20:32:29 +0800565 assert_rcu_or_wq_mutex_or_pool_mutex(wq);
Tejun Heod6e022f2016-02-03 13:54:25 -0500566
567 /*
568 * XXX: @node can be NUMA_NO_NODE if CPU goes offline while a
569 * delayed item is pending. The plan is to keep CPU -> NODE
570 * mapping valid and stable across CPU on/offlines. Once that
571 * happens, this workaround can be removed.
572 */
573 if (unlikely(node == NUMA_NO_NODE))
574 return wq->dfl_pwq;
575
Tejun Heodf2d5ae2013-04-01 11:23:35 -0700576 return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
577}
578
Tejun Heo73f53c42010-06-29 10:07:11 +0200579static unsigned int work_color_to_flags(int color)
580{
581 return color << WORK_STRUCT_COLOR_SHIFT;
582}
583
584static int get_work_color(struct work_struct *work)
585{
586 return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
587 ((1 << WORK_STRUCT_COLOR_BITS) - 1);
588}
589
590static int work_next_color(int color)
591{
592 return (color + 1) % WORK_NR_COLORS;
Oleg Nesterova848e3b2007-05-09 02:34:17 -0700593}
594
David Howells4594bf12006-12-07 11:33:26 +0000595/*
Tejun Heo112202d2013-02-13 19:29:12 -0800596 * While queued, %WORK_STRUCT_PWQ is set and non flag bits of a work's data
597 * contain the pointer to the queued pwq. Once execution starts, the flag
Tejun Heo7c3eed52013-01-24 11:01:33 -0800598 * is cleared and the high bits contain OFFQ flags and pool ID.
Tejun Heo7a22ad72010-06-29 10:07:13 +0200599 *
Tejun Heo112202d2013-02-13 19:29:12 -0800600 * set_work_pwq(), set_work_pool_and_clear_pending(), mark_work_canceling()
601 * and clear_work_data() can be used to set the pwq, pool or clear
Tejun Heobbb68df2012-08-03 10:30:46 -0700602 * work->data. These functions should only be called while the work is
603 * owned - ie. while the PENDING bit is set.
Tejun Heo7a22ad72010-06-29 10:07:13 +0200604 *
Tejun Heo112202d2013-02-13 19:29:12 -0800605 * get_work_pool() and get_work_pwq() can be used to obtain the pool or pwq
Tejun Heo7c3eed52013-01-24 11:01:33 -0800606 * corresponding to a work. Pool is available once the work has been
Tejun Heo112202d2013-02-13 19:29:12 -0800607 * queued anywhere after initialization until it is sync canceled. pwq is
Tejun Heo7c3eed52013-01-24 11:01:33 -0800608 * available only while the work item is queued.
Tejun Heobbb68df2012-08-03 10:30:46 -0700609 *
610 * %WORK_OFFQ_CANCELING is used to mark a work item which is being
611 * canceled. While being canceled, a work item may have its PENDING set
612 * but stay off timer and worklist for arbitrarily long and nobody should
613 * try to steal the PENDING bit.
David Howells4594bf12006-12-07 11:33:26 +0000614 */
Tejun Heo7a22ad72010-06-29 10:07:13 +0200615static inline void set_work_data(struct work_struct *work, unsigned long data,
616 unsigned long flags)
David Howells365970a2006-11-22 14:54:49 +0000617{
Tejun Heo6183c002013-03-12 11:29:57 -0700618 WARN_ON_ONCE(!work_pending(work));
Tejun Heo7a22ad72010-06-29 10:07:13 +0200619 atomic_long_set(&work->data, data | flags | work_static(work));
David Howells365970a2006-11-22 14:54:49 +0000620}
David Howells365970a2006-11-22 14:54:49 +0000621
Tejun Heo112202d2013-02-13 19:29:12 -0800622static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq,
Tejun Heo7a22ad72010-06-29 10:07:13 +0200623 unsigned long extra_flags)
Oleg Nesterov4d707b92010-04-23 17:40:40 +0200624{
Tejun Heo112202d2013-02-13 19:29:12 -0800625 set_work_data(work, (unsigned long)pwq,
626 WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | extra_flags);
Oleg Nesterov4d707b92010-04-23 17:40:40 +0200627}
628
Lai Jiangshan4468a002013-02-06 18:04:53 -0800629static void set_work_pool_and_keep_pending(struct work_struct *work,
630 int pool_id)
631{
632 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT,
633 WORK_STRUCT_PENDING);
634}
635
Tejun Heo7c3eed52013-01-24 11:01:33 -0800636static void set_work_pool_and_clear_pending(struct work_struct *work,
637 int pool_id)
David Howells365970a2006-11-22 14:54:49 +0000638{
Tejun Heo23657bb2012-08-13 17:08:19 -0700639 /*
640 * The following wmb is paired with the implied mb in
641 * test_and_set_bit(PENDING) and ensures all updates to @work made
642 * here are visible to and precede any updates by the next PENDING
643 * owner.
644 */
645 smp_wmb();
Tejun Heo7c3eed52013-01-24 11:01:33 -0800646 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
Roman Pen346c09f2016-04-26 13:15:35 +0200647 /*
648 * The following mb guarantees that previous clear of a PENDING bit
649 * will not be reordered with any speculative LOADS or STORES from
650 * work->current_func, which is executed afterwards. This possible
Liu Song8bdc6202019-02-19 23:53:27 +0800651 * reordering can lead to a missed execution on attempt to queue
Roman Pen346c09f2016-04-26 13:15:35 +0200652 * the same @work. E.g. consider this case:
653 *
654 * CPU#0 CPU#1
655 * ---------------------------- --------------------------------
656 *
657 * 1 STORE event_indicated
658 * 2 queue_work_on() {
659 * 3 test_and_set_bit(PENDING)
660 * 4 } set_..._and_clear_pending() {
661 * 5 set_work_data() # clear bit
662 * 6 smp_mb()
663 * 7 work->current_func() {
664 * 8 LOAD event_indicated
665 * }
666 *
667 * Without an explicit full barrier speculative LOAD on line 8 can
668 * be executed before CPU#0 does STORE on line 1. If that happens,
669 * CPU#0 observes the PENDING bit is still set and new execution of
670 * a @work is not queued in a hope, that CPU#1 will eventually
671 * finish the queued @work. Meanwhile CPU#1 does not see
672 * event_indicated is set, because speculative LOAD was executed
673 * before actual STORE.
674 */
675 smp_mb();
Tejun Heo7a22ad72010-06-29 10:07:13 +0200676}
677
678static void clear_work_data(struct work_struct *work)
679{
Tejun Heo7c3eed52013-01-24 11:01:33 -0800680 smp_wmb(); /* see set_work_pool_and_clear_pending() */
681 set_work_data(work, WORK_STRUCT_NO_POOL, 0);
Tejun Heo7a22ad72010-06-29 10:07:13 +0200682}
683
Tejun Heo112202d2013-02-13 19:29:12 -0800684static struct pool_workqueue *get_work_pwq(struct work_struct *work)
Tejun Heo7a22ad72010-06-29 10:07:13 +0200685{
Tejun Heoe1201532010-07-22 14:14:25 +0200686 unsigned long data = atomic_long_read(&work->data);
Tejun Heo7a22ad72010-06-29 10:07:13 +0200687
Tejun Heo112202d2013-02-13 19:29:12 -0800688 if (data & WORK_STRUCT_PWQ)
Tejun Heoe1201532010-07-22 14:14:25 +0200689 return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
690 else
691 return NULL;
Tejun Heo7a22ad72010-06-29 10:07:13 +0200692}
693
Tejun Heo7c3eed52013-01-24 11:01:33 -0800694/**
695 * get_work_pool - return the worker_pool a given work was associated with
696 * @work: the work item of interest
697 *
Lai Jiangshan68e13a62013-03-25 16:57:17 -0700698 * Pools are created and destroyed under wq_pool_mutex, and allows read
Thomas Gleixner24acfb72019-03-13 17:55:47 +0100699 * access under RCU read lock. As such, this function should be
700 * called under wq_pool_mutex or inside of a rcu_read_lock() region.
Tejun Heofa1b54e2013-03-12 11:30:00 -0700701 *
702 * All fields of the returned pool are accessible as long as the above
703 * mentioned locking is in effect. If the returned pool needs to be used
704 * beyond the critical section, the caller is responsible for ensuring the
705 * returned pool is and stays online.
Yacine Belkadid185af32013-07-31 14:59:24 -0700706 *
707 * Return: The worker_pool @work was last associated with. %NULL if none.
Tejun Heo7c3eed52013-01-24 11:01:33 -0800708 */
709static struct worker_pool *get_work_pool(struct work_struct *work)
Tejun Heo7a22ad72010-06-29 10:07:13 +0200710{
Tejun Heoe1201532010-07-22 14:14:25 +0200711 unsigned long data = atomic_long_read(&work->data);
Tejun Heo7c3eed52013-01-24 11:01:33 -0800712 int pool_id;
Tejun Heo7a22ad72010-06-29 10:07:13 +0200713
Lai Jiangshan68e13a62013-03-25 16:57:17 -0700714 assert_rcu_or_pool_mutex();
Tejun Heofa1b54e2013-03-12 11:30:00 -0700715
Tejun Heo112202d2013-02-13 19:29:12 -0800716 if (data & WORK_STRUCT_PWQ)
717 return ((struct pool_workqueue *)
Tejun Heo7c3eed52013-01-24 11:01:33 -0800718 (data & WORK_STRUCT_WQ_DATA_MASK))->pool;
Tejun Heo7a22ad72010-06-29 10:07:13 +0200719
Tejun Heo7c3eed52013-01-24 11:01:33 -0800720 pool_id = data >> WORK_OFFQ_POOL_SHIFT;
721 if (pool_id == WORK_OFFQ_POOL_NONE)
Tejun Heo7a22ad72010-06-29 10:07:13 +0200722 return NULL;
723
Tejun Heofa1b54e2013-03-12 11:30:00 -0700724 return idr_find(&worker_pool_idr, pool_id);
Tejun Heo7c3eed52013-01-24 11:01:33 -0800725}
726
727/**
728 * get_work_pool_id - return the worker pool ID a given work is associated with
729 * @work: the work item of interest
730 *
Yacine Belkadid185af32013-07-31 14:59:24 -0700731 * Return: The worker_pool ID @work was last associated with.
Tejun Heo7c3eed52013-01-24 11:01:33 -0800732 * %WORK_OFFQ_POOL_NONE if none.
733 */
734static int get_work_pool_id(struct work_struct *work)
735{
Lai Jiangshan54d5b7d2013-02-07 13:14:20 -0800736 unsigned long data = atomic_long_read(&work->data);
Tejun Heo7c3eed52013-01-24 11:01:33 -0800737
Tejun Heo112202d2013-02-13 19:29:12 -0800738 if (data & WORK_STRUCT_PWQ)
739 return ((struct pool_workqueue *)
Lai Jiangshan54d5b7d2013-02-07 13:14:20 -0800740 (data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
741
742 return data >> WORK_OFFQ_POOL_SHIFT;
Tejun Heo7c3eed52013-01-24 11:01:33 -0800743}
744
Tejun Heobbb68df2012-08-03 10:30:46 -0700745static void mark_work_canceling(struct work_struct *work)
746{
Tejun Heo7c3eed52013-01-24 11:01:33 -0800747 unsigned long pool_id = get_work_pool_id(work);
Tejun Heobbb68df2012-08-03 10:30:46 -0700748
Tejun Heo7c3eed52013-01-24 11:01:33 -0800749 pool_id <<= WORK_OFFQ_POOL_SHIFT;
750 set_work_data(work, pool_id | WORK_OFFQ_CANCELING, WORK_STRUCT_PENDING);
Tejun Heobbb68df2012-08-03 10:30:46 -0700751}
752
753static bool work_is_canceling(struct work_struct *work)
754{
755 unsigned long data = atomic_long_read(&work->data);
756
Tejun Heo112202d2013-02-13 19:29:12 -0800757 return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING);
Tejun Heobbb68df2012-08-03 10:30:46 -0700758}
759
David Howells365970a2006-11-22 14:54:49 +0000760/*
Tejun Heo32704762012-07-13 22:16:45 -0700761 * Policy functions. These define the policies on how the global worker
762 * pools are managed. Unless noted otherwise, these functions assume that
Tejun Heod565ed62013-01-24 11:01:33 -0800763 * they're being called with pool->lock held.
David Howells365970a2006-11-22 14:54:49 +0000764 */
Tejun Heoe22bee72010-06-29 10:07:14 +0200765
Tejun Heo63d95a92012-07-12 14:46:37 -0700766static bool __need_more_worker(struct worker_pool *pool)
David Howells365970a2006-11-22 14:54:49 +0000767{
Tejun Heoe19e3972013-01-24 11:39:44 -0800768 return !atomic_read(&pool->nr_running);
David Howells365970a2006-11-22 14:54:49 +0000769}
770
Tejun Heoe22bee72010-06-29 10:07:14 +0200771/*
772 * Need to wake up a worker? Called from anything but currently
773 * running workers.
Tejun Heo974271c42012-07-12 14:46:37 -0700774 *
775 * Note that, because unbound workers never contribute to nr_running, this
Tejun Heo706026c2013-01-24 11:01:34 -0800776 * function will always return %true for unbound pools as long as the
Tejun Heo974271c42012-07-12 14:46:37 -0700777 * worklist isn't empty.
Tejun Heoe22bee72010-06-29 10:07:14 +0200778 */
Tejun Heo63d95a92012-07-12 14:46:37 -0700779static bool need_more_worker(struct worker_pool *pool)
David Howells365970a2006-11-22 14:54:49 +0000780{
Tejun Heo63d95a92012-07-12 14:46:37 -0700781 return !list_empty(&pool->worklist) && __need_more_worker(pool);
David Howells365970a2006-11-22 14:54:49 +0000782}
783
Tejun Heoe22bee72010-06-29 10:07:14 +0200784/* Can I start working? Called from busy but !running workers. */
Tejun Heo63d95a92012-07-12 14:46:37 -0700785static bool may_start_working(struct worker_pool *pool)
Tejun Heoe22bee72010-06-29 10:07:14 +0200786{
Tejun Heo63d95a92012-07-12 14:46:37 -0700787 return pool->nr_idle;
Tejun Heoe22bee72010-06-29 10:07:14 +0200788}
789
790/* Do I need to keep working? Called from currently running workers. */
Tejun Heo63d95a92012-07-12 14:46:37 -0700791static bool keep_working(struct worker_pool *pool)
Tejun Heoe22bee72010-06-29 10:07:14 +0200792{
Tejun Heoe19e3972013-01-24 11:39:44 -0800793 return !list_empty(&pool->worklist) &&
794 atomic_read(&pool->nr_running) <= 1;
Tejun Heoe22bee72010-06-29 10:07:14 +0200795}
796
797/* Do we need a new worker? Called from manager. */
Tejun Heo63d95a92012-07-12 14:46:37 -0700798static bool need_to_create_worker(struct worker_pool *pool)
Tejun Heoe22bee72010-06-29 10:07:14 +0200799{
Tejun Heo63d95a92012-07-12 14:46:37 -0700800 return need_more_worker(pool) && !may_start_working(pool);
Tejun Heoe22bee72010-06-29 10:07:14 +0200801}
802
Tejun Heoe22bee72010-06-29 10:07:14 +0200803/* Do we have too many workers and should some go away? */
Tejun Heo63d95a92012-07-12 14:46:37 -0700804static bool too_many_workers(struct worker_pool *pool)
Tejun Heoe22bee72010-06-29 10:07:14 +0200805{
Tejun Heo692b4822017-10-09 08:04:13 -0700806 bool managing = pool->flags & POOL_MANAGER_ACTIVE;
Tejun Heo63d95a92012-07-12 14:46:37 -0700807 int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
808 int nr_busy = pool->nr_workers - nr_idle;
Tejun Heoe22bee72010-06-29 10:07:14 +0200809
810 return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
811}
812
813/*
814 * Wake up functions.
815 */
816
Lai Jiangshan1037de32014-05-22 16:44:07 +0800817/* Return the first idle worker. Safe with preemption disabled */
818static struct worker *first_idle_worker(struct worker_pool *pool)
Tejun Heo7e116292010-06-29 10:07:13 +0200819{
Tejun Heo63d95a92012-07-12 14:46:37 -0700820 if (unlikely(list_empty(&pool->idle_list)))
Tejun Heo7e116292010-06-29 10:07:13 +0200821 return NULL;
822
Tejun Heo63d95a92012-07-12 14:46:37 -0700823 return list_first_entry(&pool->idle_list, struct worker, entry);
Tejun Heo7e116292010-06-29 10:07:13 +0200824}
825
826/**
827 * wake_up_worker - wake up an idle worker
Tejun Heo63d95a92012-07-12 14:46:37 -0700828 * @pool: worker pool to wake worker from
Tejun Heo7e116292010-06-29 10:07:13 +0200829 *
Tejun Heo63d95a92012-07-12 14:46:37 -0700830 * Wake up the first idle worker of @pool.
Tejun Heo7e116292010-06-29 10:07:13 +0200831 *
832 * CONTEXT:
Tejun Heod565ed62013-01-24 11:01:33 -0800833 * spin_lock_irq(pool->lock).
Tejun Heo7e116292010-06-29 10:07:13 +0200834 */
Tejun Heo63d95a92012-07-12 14:46:37 -0700835static void wake_up_worker(struct worker_pool *pool)
Tejun Heo7e116292010-06-29 10:07:13 +0200836{
Lai Jiangshan1037de32014-05-22 16:44:07 +0800837 struct worker *worker = first_idle_worker(pool);
Tejun Heo7e116292010-06-29 10:07:13 +0200838
839 if (likely(worker))
840 wake_up_process(worker->task);
841}
842
Tejun Heo4690c4a2010-06-29 10:07:10 +0200843/**
Tejun Heoe22bee72010-06-29 10:07:14 +0200844 * wq_worker_waking_up - a worker is waking up
845 * @task: task waking up
846 * @cpu: CPU @task is waking up to
847 *
848 * This function is called during try_to_wake_up() when a worker is
849 * being awoken.
850 *
851 * CONTEXT:
852 * spin_lock_irq(rq->lock)
853 */
Tejun Heod84ff052013-03-12 11:29:59 -0700854void wq_worker_waking_up(struct task_struct *task, int cpu)
Tejun Heoe22bee72010-06-29 10:07:14 +0200855{
856 struct worker *worker = kthread_data(task);
857
Joonsoo Kim36576002012-10-26 23:03:49 +0900858 if (!(worker->flags & WORKER_NOT_RUNNING)) {
Tejun Heoec22ca52013-01-24 11:01:33 -0800859 WARN_ON_ONCE(worker->pool->cpu != cpu);
Tejun Heoe19e3972013-01-24 11:39:44 -0800860 atomic_inc(&worker->pool->nr_running);
Joonsoo Kim36576002012-10-26 23:03:49 +0900861 }
Tejun Heoe22bee72010-06-29 10:07:14 +0200862}
863
864/**
865 * wq_worker_sleeping - a worker is going to sleep
866 * @task: task going to sleep
Tejun Heoe22bee72010-06-29 10:07:14 +0200867 *
868 * This function is called during schedule() when a busy worker is
869 * going to sleep. Worker on the same cpu can be woken up by
870 * returning pointer to its task.
871 *
872 * CONTEXT:
873 * spin_lock_irq(rq->lock)
874 *
Yacine Belkadid185af32013-07-31 14:59:24 -0700875 * Return:
Tejun Heoe22bee72010-06-29 10:07:14 +0200876 * Worker task on @cpu to wake up, %NULL if none.
877 */
Alexander Gordeev9b7f6592016-03-02 12:53:31 +0100878struct task_struct *wq_worker_sleeping(struct task_struct *task)
Tejun Heoe22bee72010-06-29 10:07:14 +0200879{
880 struct worker *worker = kthread_data(task), *to_wakeup = NULL;
Tejun Heo111c2252013-01-17 17:16:24 -0800881 struct worker_pool *pool;
Tejun Heoe22bee72010-06-29 10:07:14 +0200882
Tejun Heo111c2252013-01-17 17:16:24 -0800883 /*
884 * Rescuers, which may not have all the fields set up like normal
885 * workers, also reach here, let's not access anything before
886 * checking NOT_RUNNING.
887 */
Steven Rostedt2d646722010-12-03 23:12:33 -0500888 if (worker->flags & WORKER_NOT_RUNNING)
Tejun Heoe22bee72010-06-29 10:07:14 +0200889 return NULL;
890
Tejun Heo111c2252013-01-17 17:16:24 -0800891 pool = worker->pool;
Tejun Heo111c2252013-01-17 17:16:24 -0800892
Tejun Heoe22bee72010-06-29 10:07:14 +0200893 /* this can only happen on the local cpu */
Alexander Gordeev9b7f6592016-03-02 12:53:31 +0100894 if (WARN_ON_ONCE(pool->cpu != raw_smp_processor_id()))
Tejun Heo6183c002013-03-12 11:29:57 -0700895 return NULL;
Tejun Heoe22bee72010-06-29 10:07:14 +0200896
897 /*
898 * The counterpart of the following dec_and_test, implied mb,
899 * worklist not empty test sequence is in insert_work().
900 * Please read comment there.
901 *
Tejun Heo628c78e2012-07-17 12:39:27 -0700902 * NOT_RUNNING is clear. This means that we're bound to and
903 * running on the local cpu w/ rq lock held and preemption
904 * disabled, which in turn means that none else could be
Tejun Heod565ed62013-01-24 11:01:33 -0800905 * manipulating idle_list, so dereferencing idle_list without pool
Tejun Heo628c78e2012-07-17 12:39:27 -0700906 * lock is safe.
Tejun Heoe22bee72010-06-29 10:07:14 +0200907 */
Tejun Heoe19e3972013-01-24 11:39:44 -0800908 if (atomic_dec_and_test(&pool->nr_running) &&
909 !list_empty(&pool->worklist))
Lai Jiangshan1037de32014-05-22 16:44:07 +0800910 to_wakeup = first_idle_worker(pool);
Tejun Heoe22bee72010-06-29 10:07:14 +0200911 return to_wakeup ? to_wakeup->task : NULL;
912}
913
914/**
Johannes Weiner1b69ac62019-02-01 14:20:42 -0800915 * wq_worker_last_func - retrieve worker's last work function
Bart Van Assche8194fe92019-03-19 10:45:09 -0700916 * @task: Task to retrieve last work function of.
Johannes Weiner1b69ac62019-02-01 14:20:42 -0800917 *
918 * Determine the last function a worker executed. This is called from
919 * the scheduler to get a worker's last known identity.
920 *
921 * CONTEXT:
922 * spin_lock_irq(rq->lock)
923 *
Johannes Weiner4b047002019-03-07 16:29:30 -0800924 * This function is called during schedule() when a kworker is going
925 * to sleep. It's used by psi to identify aggregation workers during
926 * dequeuing, to allow periodic aggregation to shut-off when that
927 * worker is the last task in the system or cgroup to go to sleep.
928 *
929 * As this function doesn't involve any workqueue-related locking, it
930 * only returns stable values when called from inside the scheduler's
931 * queuing and dequeuing paths, when @task, which must be a kworker,
932 * is guaranteed to not be processing any works.
933 *
Johannes Weiner1b69ac62019-02-01 14:20:42 -0800934 * Return:
935 * The last work function %current executed as a worker, NULL if it
936 * hasn't executed any work yet.
937 */
938work_func_t wq_worker_last_func(struct task_struct *task)
939{
940 struct worker *worker = kthread_data(task);
941
942 return worker->last_func;
943}
944
945/**
Tejun Heoe22bee72010-06-29 10:07:14 +0200946 * worker_set_flags - set worker flags and adjust nr_running accordingly
Tejun Heocb444762010-07-02 10:03:50 +0200947 * @worker: self
Tejun Heod302f012010-06-29 10:07:13 +0200948 * @flags: flags to set
Tejun Heod302f012010-06-29 10:07:13 +0200949 *
Lai Jiangshan228f1d02014-07-22 13:02:00 +0800950 * Set @flags in @worker->flags and adjust nr_running accordingly.
Tejun Heod302f012010-06-29 10:07:13 +0200951 *
Tejun Heocb444762010-07-02 10:03:50 +0200952 * CONTEXT:
Tejun Heod565ed62013-01-24 11:01:33 -0800953 * spin_lock_irq(pool->lock)
Tejun Heod302f012010-06-29 10:07:13 +0200954 */
Lai Jiangshan228f1d02014-07-22 13:02:00 +0800955static inline void worker_set_flags(struct worker *worker, unsigned int flags)
Tejun Heod302f012010-06-29 10:07:13 +0200956{
Tejun Heobd7bdd42012-07-12 14:46:37 -0700957 struct worker_pool *pool = worker->pool;
Tejun Heoe22bee72010-06-29 10:07:14 +0200958
Tejun Heocb444762010-07-02 10:03:50 +0200959 WARN_ON_ONCE(worker->task != current);
960
Lai Jiangshan228f1d02014-07-22 13:02:00 +0800961 /* If transitioning into NOT_RUNNING, adjust nr_running. */
Tejun Heoe22bee72010-06-29 10:07:14 +0200962 if ((flags & WORKER_NOT_RUNNING) &&
963 !(worker->flags & WORKER_NOT_RUNNING)) {
Lai Jiangshan228f1d02014-07-22 13:02:00 +0800964 atomic_dec(&pool->nr_running);
Tejun Heoe22bee72010-06-29 10:07:14 +0200965 }
966
Tejun Heod302f012010-06-29 10:07:13 +0200967 worker->flags |= flags;
968}
969
970/**
Tejun Heoe22bee72010-06-29 10:07:14 +0200971 * worker_clr_flags - clear worker flags and adjust nr_running accordingly
Tejun Heocb444762010-07-02 10:03:50 +0200972 * @worker: self
Tejun Heod302f012010-06-29 10:07:13 +0200973 * @flags: flags to clear
974 *
Tejun Heoe22bee72010-06-29 10:07:14 +0200975 * Clear @flags in @worker->flags and adjust nr_running accordingly.
Tejun Heod302f012010-06-29 10:07:13 +0200976 *
Tejun Heocb444762010-07-02 10:03:50 +0200977 * CONTEXT:
Tejun Heod565ed62013-01-24 11:01:33 -0800978 * spin_lock_irq(pool->lock)
Tejun Heod302f012010-06-29 10:07:13 +0200979 */
980static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
981{
Tejun Heo63d95a92012-07-12 14:46:37 -0700982 struct worker_pool *pool = worker->pool;
Tejun Heoe22bee72010-06-29 10:07:14 +0200983 unsigned int oflags = worker->flags;
984
Tejun Heocb444762010-07-02 10:03:50 +0200985 WARN_ON_ONCE(worker->task != current);
986
Tejun Heod302f012010-06-29 10:07:13 +0200987 worker->flags &= ~flags;
Tejun Heoe22bee72010-06-29 10:07:14 +0200988
Tejun Heo42c025f2011-01-11 15:58:49 +0100989 /*
990 * If transitioning out of NOT_RUNNING, increment nr_running. Note
991 * that the nested NOT_RUNNING is not a noop. NOT_RUNNING is mask
992 * of multiple flags, not a single flag.
993 */
Tejun Heoe22bee72010-06-29 10:07:14 +0200994 if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
995 if (!(worker->flags & WORKER_NOT_RUNNING))
Tejun Heoe19e3972013-01-24 11:39:44 -0800996 atomic_inc(&pool->nr_running);
Tejun Heod302f012010-06-29 10:07:13 +0200997}
998
999/**
Tejun Heo8cca0ee2010-06-29 10:07:13 +02001000 * find_worker_executing_work - find worker which is executing a work
Tejun Heoc9e7cf22013-01-24 11:01:33 -08001001 * @pool: pool of interest
Tejun Heo8cca0ee2010-06-29 10:07:13 +02001002 * @work: work to find worker for
1003 *
Tejun Heoc9e7cf22013-01-24 11:01:33 -08001004 * Find a worker which is executing @work on @pool by searching
1005 * @pool->busy_hash which is keyed by the address of @work. For a worker
Tejun Heoa2c1c572012-12-18 10:35:02 -08001006 * to match, its current execution should match the address of @work and
1007 * its work function. This is to avoid unwanted dependency between
1008 * unrelated work executions through a work item being recycled while still
1009 * being executed.
1010 *
1011 * This is a bit tricky. A work item may be freed once its execution
1012 * starts and nothing prevents the freed area from being recycled for
1013 * another work item. If the same work item address ends up being reused
1014 * before the original execution finishes, workqueue will identify the
1015 * recycled work item as currently executing and make it wait until the
1016 * current execution finishes, introducing an unwanted dependency.
1017 *
Tejun Heoc5aa87b2013-03-13 16:51:36 -07001018 * This function checks the work item address and work function to avoid
1019 * false positives. Note that this isn't complete as one may construct a
1020 * work function which can introduce dependency onto itself through a
1021 * recycled work item. Well, if somebody wants to shoot oneself in the
1022 * foot that badly, there's only so much we can do, and if such deadlock
1023 * actually occurs, it should be easy to locate the culprit work function.
Tejun Heo8cca0ee2010-06-29 10:07:13 +02001024 *
1025 * CONTEXT:
Tejun Heod565ed62013-01-24 11:01:33 -08001026 * spin_lock_irq(pool->lock).
Tejun Heo8cca0ee2010-06-29 10:07:13 +02001027 *
Yacine Belkadid185af32013-07-31 14:59:24 -07001028 * Return:
1029 * Pointer to worker which is executing @work if found, %NULL
Tejun Heo8cca0ee2010-06-29 10:07:13 +02001030 * otherwise.
1031 */
Tejun Heoc9e7cf22013-01-24 11:01:33 -08001032static struct worker *find_worker_executing_work(struct worker_pool *pool,
Tejun Heo8cca0ee2010-06-29 10:07:13 +02001033 struct work_struct *work)
1034{
Sasha Levin42f85702012-12-17 10:01:23 -05001035 struct worker *worker;
Sasha Levin42f85702012-12-17 10:01:23 -05001036
Sasha Levinb67bfe02013-02-27 17:06:00 -08001037 hash_for_each_possible(pool->busy_hash, worker, hentry,
Tejun Heoa2c1c572012-12-18 10:35:02 -08001038 (unsigned long)work)
1039 if (worker->current_work == work &&
1040 worker->current_func == work->func)
Sasha Levin42f85702012-12-17 10:01:23 -05001041 return worker;
1042
1043 return NULL;
Tejun Heo8cca0ee2010-06-29 10:07:13 +02001044}
1045
1046/**
Tejun Heobf4ede02012-08-03 10:30:46 -07001047 * move_linked_works - move linked works to a list
1048 * @work: start of series of works to be scheduled
1049 * @head: target list to append @work to
Shailendra Verma402dd892015-05-23 10:38:14 +05301050 * @nextp: out parameter for nested worklist walking
Tejun Heobf4ede02012-08-03 10:30:46 -07001051 *
1052 * Schedule linked works starting from @work to @head. Work series to
1053 * be scheduled starts at @work and includes any consecutive work with
1054 * WORK_STRUCT_LINKED set in its predecessor.
1055 *
1056 * If @nextp is not NULL, it's updated to point to the next work of
1057 * the last scheduled work. This allows move_linked_works() to be
1058 * nested inside outer list_for_each_entry_safe().
1059 *
1060 * CONTEXT:
Tejun Heod565ed62013-01-24 11:01:33 -08001061 * spin_lock_irq(pool->lock).
Tejun Heobf4ede02012-08-03 10:30:46 -07001062 */
1063static void move_linked_works(struct work_struct *work, struct list_head *head,
1064 struct work_struct **nextp)
1065{
1066 struct work_struct *n;
1067
1068 /*
1069 * Linked worklist will always end before the end of the list,
1070 * use NULL for list head.
1071 */
1072 list_for_each_entry_safe_from(work, n, NULL, entry) {
1073 list_move_tail(&work->entry, head);
1074 if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
1075 break;
1076 }
1077
1078 /*
1079 * If we're already inside safe list traversal and have moved
1080 * multiple works to the scheduled queue, the next position
1081 * needs to be updated.
1082 */
1083 if (nextp)
1084 *nextp = n;
1085}
1086
Tejun Heo8864b4e2013-03-12 11:30:04 -07001087/**
1088 * get_pwq - get an extra reference on the specified pool_workqueue
1089 * @pwq: pool_workqueue to get
1090 *
1091 * Obtain an extra reference on @pwq. The caller should guarantee that
1092 * @pwq has positive refcnt and be holding the matching pool->lock.
1093 */
1094static void get_pwq(struct pool_workqueue *pwq)
1095{
1096 lockdep_assert_held(&pwq->pool->lock);
1097 WARN_ON_ONCE(pwq->refcnt <= 0);
1098 pwq->refcnt++;
1099}
1100
1101/**
1102 * put_pwq - put a pool_workqueue reference
1103 * @pwq: pool_workqueue to put
1104 *
1105 * Drop a reference of @pwq. If its refcnt reaches zero, schedule its
1106 * destruction. The caller should be holding the matching pool->lock.
1107 */
1108static void put_pwq(struct pool_workqueue *pwq)
1109{
1110 lockdep_assert_held(&pwq->pool->lock);
1111 if (likely(--pwq->refcnt))
1112 return;
1113 if (WARN_ON_ONCE(!(pwq->wq->flags & WQ_UNBOUND)))
1114 return;
1115 /*
1116 * @pwq can't be released under pool->lock, bounce to
1117 * pwq_unbound_release_workfn(). This never recurses on the same
1118 * pool->lock as this path is taken only for unbound workqueues and
1119 * the release work item is scheduled on a per-cpu workqueue. To
1120 * avoid lockdep warning, unbound pool->locks are given lockdep
1121 * subclass of 1 in get_unbound_pool().
1122 */
1123 schedule_work(&pwq->unbound_release_work);
1124}
1125
Tejun Heodce90d42013-04-01 11:23:35 -07001126/**
1127 * put_pwq_unlocked - put_pwq() with surrounding pool lock/unlock
1128 * @pwq: pool_workqueue to put (can be %NULL)
1129 *
1130 * put_pwq() with locking. This function also allows %NULL @pwq.
1131 */
1132static void put_pwq_unlocked(struct pool_workqueue *pwq)
1133{
1134 if (pwq) {
1135 /*
Thomas Gleixner24acfb72019-03-13 17:55:47 +01001136 * As both pwqs and pools are RCU protected, the
Tejun Heodce90d42013-04-01 11:23:35 -07001137 * following lock operations are safe.
1138 */
1139 spin_lock_irq(&pwq->pool->lock);
1140 put_pwq(pwq);
1141 spin_unlock_irq(&pwq->pool->lock);
1142 }
1143}
1144
Tejun Heo112202d2013-02-13 19:29:12 -08001145static void pwq_activate_delayed_work(struct work_struct *work)
Tejun Heobf4ede02012-08-03 10:30:46 -07001146{
Tejun Heo112202d2013-02-13 19:29:12 -08001147 struct pool_workqueue *pwq = get_work_pwq(work);
Tejun Heobf4ede02012-08-03 10:30:46 -07001148
1149 trace_workqueue_activate_work(work);
Tejun Heo82607adc2015-12-08 11:28:04 -05001150 if (list_empty(&pwq->pool->worklist))
1151 pwq->pool->watchdog_ts = jiffies;
Tejun Heo112202d2013-02-13 19:29:12 -08001152 move_linked_works(work, &pwq->pool->worklist, NULL);
Tejun Heobf4ede02012-08-03 10:30:46 -07001153 __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
Tejun Heo112202d2013-02-13 19:29:12 -08001154 pwq->nr_active++;
Tejun Heobf4ede02012-08-03 10:30:46 -07001155}
1156
Tejun Heo112202d2013-02-13 19:29:12 -08001157static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
Lai Jiangshan3aa62492012-09-18 10:40:00 -07001158{
Tejun Heo112202d2013-02-13 19:29:12 -08001159 struct work_struct *work = list_first_entry(&pwq->delayed_works,
Lai Jiangshan3aa62492012-09-18 10:40:00 -07001160 struct work_struct, entry);
1161
Tejun Heo112202d2013-02-13 19:29:12 -08001162 pwq_activate_delayed_work(work);
Lai Jiangshan3aa62492012-09-18 10:40:00 -07001163}
1164
Tejun Heobf4ede02012-08-03 10:30:46 -07001165/**
Tejun Heo112202d2013-02-13 19:29:12 -08001166 * pwq_dec_nr_in_flight - decrement pwq's nr_in_flight
1167 * @pwq: pwq of interest
Tejun Heobf4ede02012-08-03 10:30:46 -07001168 * @color: color of work which left the queue
Tejun Heobf4ede02012-08-03 10:30:46 -07001169 *
1170 * A work either has completed or is removed from pending queue,
Tejun Heo112202d2013-02-13 19:29:12 -08001171 * decrement nr_in_flight of its pwq and handle workqueue flushing.
Tejun Heobf4ede02012-08-03 10:30:46 -07001172 *
1173 * CONTEXT:
Tejun Heod565ed62013-01-24 11:01:33 -08001174 * spin_lock_irq(pool->lock).
Tejun Heobf4ede02012-08-03 10:30:46 -07001175 */
Tejun Heo112202d2013-02-13 19:29:12 -08001176static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
Tejun Heobf4ede02012-08-03 10:30:46 -07001177{
Tejun Heo8864b4e2013-03-12 11:30:04 -07001178 /* uncolored work items don't participate in flushing or nr_active */
Tejun Heobf4ede02012-08-03 10:30:46 -07001179 if (color == WORK_NO_COLOR)
Tejun Heo8864b4e2013-03-12 11:30:04 -07001180 goto out_put;
Tejun Heobf4ede02012-08-03 10:30:46 -07001181
Tejun Heo112202d2013-02-13 19:29:12 -08001182 pwq->nr_in_flight[color]--;
Tejun Heobf4ede02012-08-03 10:30:46 -07001183
Tejun Heo112202d2013-02-13 19:29:12 -08001184 pwq->nr_active--;
1185 if (!list_empty(&pwq->delayed_works)) {
Lai Jiangshanb3f9f402012-09-18 10:40:00 -07001186 /* one down, submit a delayed one */
Tejun Heo112202d2013-02-13 19:29:12 -08001187 if (pwq->nr_active < pwq->max_active)
1188 pwq_activate_first_delayed(pwq);
Tejun Heobf4ede02012-08-03 10:30:46 -07001189 }
1190
1191 /* is flush in progress and are we at the flushing tip? */
Tejun Heo112202d2013-02-13 19:29:12 -08001192 if (likely(pwq->flush_color != color))
Tejun Heo8864b4e2013-03-12 11:30:04 -07001193 goto out_put;
Tejun Heobf4ede02012-08-03 10:30:46 -07001194
1195 /* are there still in-flight works? */
Tejun Heo112202d2013-02-13 19:29:12 -08001196 if (pwq->nr_in_flight[color])
Tejun Heo8864b4e2013-03-12 11:30:04 -07001197 goto out_put;
Tejun Heobf4ede02012-08-03 10:30:46 -07001198
Tejun Heo112202d2013-02-13 19:29:12 -08001199 /* this pwq is done, clear flush_color */
1200 pwq->flush_color = -1;
Tejun Heobf4ede02012-08-03 10:30:46 -07001201
1202 /*
Tejun Heo112202d2013-02-13 19:29:12 -08001203 * If this was the last pwq, wake up the first flusher. It
Tejun Heobf4ede02012-08-03 10:30:46 -07001204 * will handle the rest.
1205 */
Tejun Heo112202d2013-02-13 19:29:12 -08001206 if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush))
1207 complete(&pwq->wq->first_flusher->done);
Tejun Heo8864b4e2013-03-12 11:30:04 -07001208out_put:
1209 put_pwq(pwq);
Tejun Heobf4ede02012-08-03 10:30:46 -07001210}
1211
Tejun Heo36e227d2012-08-03 10:30:46 -07001212/**
Tejun Heobbb68df2012-08-03 10:30:46 -07001213 * try_to_grab_pending - steal work item from worklist and disable irq
Tejun Heo36e227d2012-08-03 10:30:46 -07001214 * @work: work item to steal
1215 * @is_dwork: @work is a delayed_work
Tejun Heobbb68df2012-08-03 10:30:46 -07001216 * @flags: place to store irq state
Tejun Heo36e227d2012-08-03 10:30:46 -07001217 *
1218 * Try to grab PENDING bit of @work. This function can handle @work in any
Yacine Belkadid185af32013-07-31 14:59:24 -07001219 * stable state - idle, on timer or on worklist.
Tejun Heo36e227d2012-08-03 10:30:46 -07001220 *
Yacine Belkadid185af32013-07-31 14:59:24 -07001221 * Return:
Tejun Heo36e227d2012-08-03 10:30:46 -07001222 * 1 if @work was pending and we successfully stole PENDING
1223 * 0 if @work was idle and we claimed PENDING
1224 * -EAGAIN if PENDING couldn't be grabbed at the moment, safe to busy-retry
Tejun Heobbb68df2012-08-03 10:30:46 -07001225 * -ENOENT if someone else is canceling @work, this state may persist
1226 * for arbitrarily long
Tejun Heo36e227d2012-08-03 10:30:46 -07001227 *
Yacine Belkadid185af32013-07-31 14:59:24 -07001228 * Note:
Tejun Heobbb68df2012-08-03 10:30:46 -07001229 * On >= 0 return, the caller owns @work's PENDING bit. To avoid getting
Tejun Heoe0aecdd2012-08-21 13:18:24 -07001230 * interrupted while holding PENDING and @work off queue, irq must be
1231 * disabled on entry. This, combined with delayed_work->timer being
1232 * irqsafe, ensures that we return -EAGAIN for finite short period of time.
Tejun Heobbb68df2012-08-03 10:30:46 -07001233 *
1234 * On successful return, >= 0, irq is disabled and the caller is
1235 * responsible for releasing it using local_irq_restore(*@flags).
1236 *
Tejun Heoe0aecdd2012-08-21 13:18:24 -07001237 * This function is safe to call from any context including IRQ handler.
Tejun Heobf4ede02012-08-03 10:30:46 -07001238 */
Tejun Heobbb68df2012-08-03 10:30:46 -07001239static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
1240 unsigned long *flags)
Tejun Heobf4ede02012-08-03 10:30:46 -07001241{
Tejun Heod565ed62013-01-24 11:01:33 -08001242 struct worker_pool *pool;
Tejun Heo112202d2013-02-13 19:29:12 -08001243 struct pool_workqueue *pwq;
Tejun Heobf4ede02012-08-03 10:30:46 -07001244
Tejun Heobbb68df2012-08-03 10:30:46 -07001245 local_irq_save(*flags);
1246
Tejun Heo36e227d2012-08-03 10:30:46 -07001247 /* try to steal the timer if it exists */
1248 if (is_dwork) {
1249 struct delayed_work *dwork = to_delayed_work(work);
1250
Tejun Heoe0aecdd2012-08-21 13:18:24 -07001251 /*
1252 * dwork->timer is irqsafe. If del_timer() fails, it's
1253 * guaranteed that the timer is not queued anywhere and not
1254 * running on the local CPU.
1255 */
Tejun Heo36e227d2012-08-03 10:30:46 -07001256 if (likely(del_timer(&dwork->timer)))
1257 return 1;
1258 }
1259
1260 /* try to claim PENDING the normal way */
Tejun Heobf4ede02012-08-03 10:30:46 -07001261 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
1262 return 0;
1263
Thomas Gleixner24acfb72019-03-13 17:55:47 +01001264 rcu_read_lock();
Tejun Heobf4ede02012-08-03 10:30:46 -07001265 /*
1266 * The queueing is in progress, or it is already queued. Try to
1267 * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
1268 */
Tejun Heod565ed62013-01-24 11:01:33 -08001269 pool = get_work_pool(work);
1270 if (!pool)
Tejun Heobbb68df2012-08-03 10:30:46 -07001271 goto fail;
Tejun Heobf4ede02012-08-03 10:30:46 -07001272
Tejun Heod565ed62013-01-24 11:01:33 -08001273 spin_lock(&pool->lock);
Lai Jiangshan0b3dae62013-02-06 18:04:53 -08001274 /*
Tejun Heo112202d2013-02-13 19:29:12 -08001275 * work->data is guaranteed to point to pwq only while the work
1276 * item is queued on pwq->wq, and both updating work->data to point
1277 * to pwq on queueing and to pool on dequeueing are done under
1278 * pwq->pool->lock. This in turn guarantees that, if work->data
1279 * points to pwq which is associated with a locked pool, the work
Lai Jiangshan0b3dae62013-02-06 18:04:53 -08001280 * item is currently queued on that pool.
1281 */
Tejun Heo112202d2013-02-13 19:29:12 -08001282 pwq = get_work_pwq(work);
1283 if (pwq && pwq->pool == pool) {
Tejun Heo16062832013-02-06 18:04:53 -08001284 debug_work_deactivate(work);
Lai Jiangshan3aa62492012-09-18 10:40:00 -07001285
Tejun Heo16062832013-02-06 18:04:53 -08001286 /*
1287 * A delayed work item cannot be grabbed directly because
1288 * it might have linked NO_COLOR work items which, if left
Tejun Heo112202d2013-02-13 19:29:12 -08001289 * on the delayed_list, will confuse pwq->nr_active
Tejun Heo16062832013-02-06 18:04:53 -08001290 * management later on and cause stall. Make sure the work
1291 * item is activated before grabbing.
1292 */
1293 if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
Tejun Heo112202d2013-02-13 19:29:12 -08001294 pwq_activate_delayed_work(work);
Lai Jiangshan3aa62492012-09-18 10:40:00 -07001295
Tejun Heo16062832013-02-06 18:04:53 -08001296 list_del_init(&work->entry);
Lai Jiangshan9c34a702014-07-11 00:11:13 +08001297 pwq_dec_nr_in_flight(pwq, get_work_color(work));
Tejun Heo36e227d2012-08-03 10:30:46 -07001298
Tejun Heo112202d2013-02-13 19:29:12 -08001299 /* work->data points to pwq iff queued, point to pool */
Tejun Heo16062832013-02-06 18:04:53 -08001300 set_work_pool_and_keep_pending(work, pool->id);
Lai Jiangshan4468a002013-02-06 18:04:53 -08001301
Tejun Heo16062832013-02-06 18:04:53 -08001302 spin_unlock(&pool->lock);
Thomas Gleixner24acfb72019-03-13 17:55:47 +01001303 rcu_read_unlock();
Tejun Heo16062832013-02-06 18:04:53 -08001304 return 1;
Tejun Heobf4ede02012-08-03 10:30:46 -07001305 }
Tejun Heod565ed62013-01-24 11:01:33 -08001306 spin_unlock(&pool->lock);
Tejun Heobbb68df2012-08-03 10:30:46 -07001307fail:
Thomas Gleixner24acfb72019-03-13 17:55:47 +01001308 rcu_read_unlock();
Tejun Heobbb68df2012-08-03 10:30:46 -07001309 local_irq_restore(*flags);
1310 if (work_is_canceling(work))
1311 return -ENOENT;
1312 cpu_relax();
Tejun Heo36e227d2012-08-03 10:30:46 -07001313 return -EAGAIN;
Tejun Heobf4ede02012-08-03 10:30:46 -07001314}
1315
1316/**
Tejun Heo706026c2013-01-24 11:01:34 -08001317 * insert_work - insert a work into a pool
Tejun Heo112202d2013-02-13 19:29:12 -08001318 * @pwq: pwq @work belongs to
Tejun Heo4690c4a2010-06-29 10:07:10 +02001319 * @work: work to insert
1320 * @head: insertion point
1321 * @extra_flags: extra WORK_STRUCT_* flags to set
1322 *
Tejun Heo112202d2013-02-13 19:29:12 -08001323 * Insert @work which belongs to @pwq after @head. @extra_flags is or'd to
Tejun Heo706026c2013-01-24 11:01:34 -08001324 * work_struct flags.
Tejun Heo4690c4a2010-06-29 10:07:10 +02001325 *
1326 * CONTEXT:
Tejun Heod565ed62013-01-24 11:01:33 -08001327 * spin_lock_irq(pool->lock).
Tejun Heo4690c4a2010-06-29 10:07:10 +02001328 */
Tejun Heo112202d2013-02-13 19:29:12 -08001329static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
1330 struct list_head *head, unsigned int extra_flags)
Oleg Nesterovb89deed2007-05-09 02:33:52 -07001331{
Tejun Heo112202d2013-02-13 19:29:12 -08001332 struct worker_pool *pool = pwq->pool;
Frederic Weisbeckere1d8aa92009-01-12 23:15:46 +01001333
Tejun Heo4690c4a2010-06-29 10:07:10 +02001334 /* we own @work, set data and link */
Tejun Heo112202d2013-02-13 19:29:12 -08001335 set_work_pwq(work, pwq, extra_flags);
Oleg Nesterov1a4d9b02008-07-25 01:47:47 -07001336 list_add_tail(&work->entry, head);
Tejun Heo8864b4e2013-03-12 11:30:04 -07001337 get_pwq(pwq);
Tejun Heoe22bee72010-06-29 10:07:14 +02001338
1339 /*
Tejun Heoc5aa87b2013-03-13 16:51:36 -07001340 * Ensure either wq_worker_sleeping() sees the above
1341 * list_add_tail() or we see zero nr_running to avoid workers lying
1342 * around lazily while there are works to be processed.
Tejun Heoe22bee72010-06-29 10:07:14 +02001343 */
1344 smp_mb();
1345
Tejun Heo63d95a92012-07-12 14:46:37 -07001346 if (__need_more_worker(pool))
1347 wake_up_worker(pool);
Oleg Nesterovb89deed2007-05-09 02:33:52 -07001348}
1349
Tejun Heoc8efcc22010-12-20 19:32:04 +01001350/*
1351 * Test whether @work is being queued from another work executing on the
Tejun Heo8d03ecf2013-02-13 19:29:10 -08001352 * same workqueue.
Tejun Heoc8efcc22010-12-20 19:32:04 +01001353 */
1354static bool is_chained_work(struct workqueue_struct *wq)
1355{
Tejun Heo8d03ecf2013-02-13 19:29:10 -08001356 struct worker *worker;
Tejun Heoc8efcc22010-12-20 19:32:04 +01001357
Tejun Heo8d03ecf2013-02-13 19:29:10 -08001358 worker = current_wq_worker();
1359 /*
Bart Van Asschebf393fd2019-03-01 13:57:25 -08001360 * Return %true iff I'm a worker executing a work item on @wq. If
Tejun Heo8d03ecf2013-02-13 19:29:10 -08001361 * I'm @worker, it's safe to dereference it without locking.
1362 */
Tejun Heo112202d2013-02-13 19:29:12 -08001363 return worker && worker->current_pwq->wq == wq;
Tejun Heoc8efcc22010-12-20 19:32:04 +01001364}
1365
Mike Galbraithef5571802016-02-09 17:59:38 -05001366/*
1367 * When queueing an unbound work item to a wq, prefer local CPU if allowed
1368 * by wq_unbound_cpumask. Otherwise, round robin among the allowed ones to
1369 * avoid perturbing sensitive tasks.
1370 */
1371static int wq_select_unbound_cpu(int cpu)
1372{
Tejun Heof303fccb2016-02-09 17:59:38 -05001373 static bool printed_dbg_warning;
Mike Galbraithef5571802016-02-09 17:59:38 -05001374 int new_cpu;
1375
Tejun Heof303fccb2016-02-09 17:59:38 -05001376 if (likely(!wq_debug_force_rr_cpu)) {
1377 if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
1378 return cpu;
1379 } else if (!printed_dbg_warning) {
1380 pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n");
1381 printed_dbg_warning = true;
1382 }
1383
Mike Galbraithef5571802016-02-09 17:59:38 -05001384 if (cpumask_empty(wq_unbound_cpumask))
1385 return cpu;
1386
1387 new_cpu = __this_cpu_read(wq_rr_cpu_last);
1388 new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
1389 if (unlikely(new_cpu >= nr_cpu_ids)) {
1390 new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
1391 if (unlikely(new_cpu >= nr_cpu_ids))
1392 return cpu;
1393 }
1394 __this_cpu_write(wq_rr_cpu_last, new_cpu);
1395
1396 return new_cpu;
1397}
1398
Tejun Heod84ff052013-03-12 11:29:59 -07001399static void __queue_work(int cpu, struct workqueue_struct *wq,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001400 struct work_struct *work)
1401{
Tejun Heo112202d2013-02-13 19:29:12 -08001402 struct pool_workqueue *pwq;
Tejun Heoc9178082013-03-12 11:30:04 -07001403 struct worker_pool *last_pool;
Tejun Heo1e19ffc2010-06-29 10:07:12 +02001404 struct list_head *worklist;
Tejun Heo8a2e8e5d2010-08-25 10:33:56 +02001405 unsigned int work_flags;
Joonsoo Kimb75cac92012-08-15 23:25:37 +09001406 unsigned int req_cpu = cpu;
Tejun Heo8930cab2012-08-03 10:30:45 -07001407
1408 /*
1409 * While a work item is PENDING && off queue, a task trying to
1410 * steal the PENDING will busy-loop waiting for it to either get
1411 * queued or lose PENDING. Grabbing PENDING and queueing should
1412 * happen with IRQ disabled.
1413 */
Frederic Weisbecker8e8eb732017-11-06 16:01:19 +01001414 lockdep_assert_irqs_disabled();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001415
Thomas Gleixnerdc186ad2009-11-16 01:09:48 +09001416 debug_work_activate(work);
Tejun Heo1e19ffc2010-06-29 10:07:12 +02001417
Li Bin9ef28a72013-09-09 13:13:58 +08001418 /* if draining, only works from the same workqueue are allowed */
Tejun Heo618b01e2013-03-12 11:30:04 -07001419 if (unlikely(wq->flags & __WQ_DRAINING) &&
Tejun Heoc8efcc22010-12-20 19:32:04 +01001420 WARN_ON_ONCE(!is_chained_work(wq)))
Tejun Heoe41e7042010-08-24 14:22:47 +02001421 return;
Thomas Gleixner24acfb72019-03-13 17:55:47 +01001422 rcu_read_lock();
Tejun Heo9e8cd2f2013-03-12 11:30:04 -07001423retry:
Tejun Heodf2d5ae2013-04-01 11:23:35 -07001424 if (req_cpu == WORK_CPU_UNBOUND)
Mike Galbraithef5571802016-02-09 17:59:38 -05001425 cpu = wq_select_unbound_cpu(raw_smp_processor_id());
Tejun Heodf2d5ae2013-04-01 11:23:35 -07001426
Tejun Heoc9178082013-03-12 11:30:04 -07001427 /* pwq which will be used unless @work is executing elsewhere */
Tejun Heodf2d5ae2013-04-01 11:23:35 -07001428 if (!(wq->flags & WQ_UNBOUND))
Tejun Heo7fb98ea2013-03-12 11:30:00 -07001429 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
Tejun Heodf2d5ae2013-04-01 11:23:35 -07001430 else
1431 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
Tejun Heodbf25762012-08-20 14:51:23 -07001432
Tejun Heoc9178082013-03-12 11:30:04 -07001433 /*
1434 * If @work was previously on a different pool, it might still be
1435 * running there, in which case the work needs to be queued on that
1436 * pool to guarantee non-reentrancy.
1437 */
1438 last_pool = get_work_pool(work);
1439 if (last_pool && last_pool != pwq->pool) {
1440 struct worker *worker;
Tejun Heo18aa9ef2010-06-29 10:07:13 +02001441
Tejun Heoc9178082013-03-12 11:30:04 -07001442 spin_lock(&last_pool->lock);
Tejun Heo18aa9ef2010-06-29 10:07:13 +02001443
Tejun Heoc9178082013-03-12 11:30:04 -07001444 worker = find_worker_executing_work(last_pool, work);
Tejun Heo18aa9ef2010-06-29 10:07:13 +02001445
Tejun Heoc9178082013-03-12 11:30:04 -07001446 if (worker && worker->current_pwq->wq == wq) {
1447 pwq = worker->current_pwq;
Tejun Heo8930cab2012-08-03 10:30:45 -07001448 } else {
Tejun Heoc9178082013-03-12 11:30:04 -07001449 /* meh... not running there, queue here */
1450 spin_unlock(&last_pool->lock);
Tejun Heo112202d2013-02-13 19:29:12 -08001451 spin_lock(&pwq->pool->lock);
Tejun Heo8930cab2012-08-03 10:30:45 -07001452 }
Tejun Heof3421792010-07-02 10:03:51 +02001453 } else {
Tejun Heo112202d2013-02-13 19:29:12 -08001454 spin_lock(&pwq->pool->lock);
Tejun Heo502ca9d2010-06-29 10:07:13 +02001455 }
1456
Tejun Heo9e8cd2f2013-03-12 11:30:04 -07001457 /*
1458 * pwq is determined and locked. For unbound pools, we could have
1459 * raced with pwq release and it could already be dead. If its
1460 * refcnt is zero, repeat pwq selection. Note that pwqs never die
Tejun Heodf2d5ae2013-04-01 11:23:35 -07001461 * without another pwq replacing it in the numa_pwq_tbl or while
1462 * work items are executing on it, so the retrying is guaranteed to
Tejun Heo9e8cd2f2013-03-12 11:30:04 -07001463 * make forward-progress.
1464 */
1465 if (unlikely(!pwq->refcnt)) {
1466 if (wq->flags & WQ_UNBOUND) {
1467 spin_unlock(&pwq->pool->lock);
1468 cpu_relax();
1469 goto retry;
1470 }
1471 /* oops */
1472 WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt",
1473 wq->name, cpu);
1474 }
1475
Tejun Heo112202d2013-02-13 19:29:12 -08001476 /* pwq determined, queue */
1477 trace_workqueue_queue_work(req_cpu, pwq, work);
Tejun Heo502ca9d2010-06-29 10:07:13 +02001478
Thomas Gleixner24acfb72019-03-13 17:55:47 +01001479 if (WARN_ON(!list_empty(&work->entry)))
1480 goto out;
Tejun Heo1e19ffc2010-06-29 10:07:12 +02001481
Tejun Heo112202d2013-02-13 19:29:12 -08001482 pwq->nr_in_flight[pwq->work_color]++;
1483 work_flags = work_color_to_flags(pwq->work_color);
Tejun Heo1e19ffc2010-06-29 10:07:12 +02001484
Tejun Heo112202d2013-02-13 19:29:12 -08001485 if (likely(pwq->nr_active < pwq->max_active)) {
Tejun Heocdadf002010-10-05 10:49:55 +02001486 trace_workqueue_activate_work(work);
Tejun Heo112202d2013-02-13 19:29:12 -08001487 pwq->nr_active++;
1488 worklist = &pwq->pool->worklist;
Tejun Heo82607adc2015-12-08 11:28:04 -05001489 if (list_empty(worklist))
1490 pwq->pool->watchdog_ts = jiffies;
Tejun Heo8a2e8e5d2010-08-25 10:33:56 +02001491 } else {
1492 work_flags |= WORK_STRUCT_DELAYED;
Tejun Heo112202d2013-02-13 19:29:12 -08001493 worklist = &pwq->delayed_works;
Tejun Heo8a2e8e5d2010-08-25 10:33:56 +02001494 }
Tejun Heo1e19ffc2010-06-29 10:07:12 +02001495
Tejun Heo112202d2013-02-13 19:29:12 -08001496 insert_work(pwq, work, worklist, work_flags);
Tejun Heo1e19ffc2010-06-29 10:07:12 +02001497
Thomas Gleixner24acfb72019-03-13 17:55:47 +01001498out:
Tejun Heo112202d2013-02-13 19:29:12 -08001499 spin_unlock(&pwq->pool->lock);
Thomas Gleixner24acfb72019-03-13 17:55:47 +01001500 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001501}
1502
Rolf Eike Beer0fcb78c2006-07-30 03:03:42 -07001503/**
Zhang Ruic1a220e2008-07-23 21:28:39 -07001504 * queue_work_on - queue work on specific cpu
1505 * @cpu: CPU number to execute work on
1506 * @wq: workqueue to use
1507 * @work: work to queue
1508 *
Zhang Ruic1a220e2008-07-23 21:28:39 -07001509 * We queue the work to a specific CPU, the caller must ensure it
1510 * can't go away.
Yacine Belkadid185af32013-07-31 14:59:24 -07001511 *
1512 * Return: %false if @work was already on a queue, %true otherwise.
Zhang Ruic1a220e2008-07-23 21:28:39 -07001513 */
Tejun Heod4283e92012-08-03 10:30:44 -07001514bool queue_work_on(int cpu, struct workqueue_struct *wq,
1515 struct work_struct *work)
Zhang Ruic1a220e2008-07-23 21:28:39 -07001516{
Tejun Heod4283e92012-08-03 10:30:44 -07001517 bool ret = false;
Tejun Heo8930cab2012-08-03 10:30:45 -07001518 unsigned long flags;
1519
1520 local_irq_save(flags);
Zhang Ruic1a220e2008-07-23 21:28:39 -07001521
Tejun Heo22df02b2010-06-29 10:07:10 +02001522 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
Tejun Heo4690c4a2010-06-29 10:07:10 +02001523 __queue_work(cpu, wq, work);
Tejun Heod4283e92012-08-03 10:30:44 -07001524 ret = true;
Zhang Ruic1a220e2008-07-23 21:28:39 -07001525 }
Tejun Heo8930cab2012-08-03 10:30:45 -07001526
1527 local_irq_restore(flags);
Zhang Ruic1a220e2008-07-23 21:28:39 -07001528 return ret;
1529}
Marc Dionnead7b1f82013-05-06 17:44:55 -04001530EXPORT_SYMBOL(queue_work_on);
Zhang Ruic1a220e2008-07-23 21:28:39 -07001531
Alexander Duyck8204e0c2019-01-22 10:39:26 -08001532/**
1533 * workqueue_select_cpu_near - Select a CPU based on NUMA node
1534 * @node: NUMA node ID that we want to select a CPU from
1535 *
1536 * This function will attempt to find a "random" cpu available on a given
1537 * node. If there are no CPUs available on the given node it will return
1538 * WORK_CPU_UNBOUND indicating that we should just schedule to any
1539 * available CPU if we need to schedule this work.
1540 */
1541static int workqueue_select_cpu_near(int node)
1542{
1543 int cpu;
1544
1545 /* No point in doing this if NUMA isn't enabled for workqueues */
1546 if (!wq_numa_enabled)
1547 return WORK_CPU_UNBOUND;
1548
1549 /* Delay binding to CPU if node is not valid or online */
1550 if (node < 0 || node >= MAX_NUMNODES || !node_online(node))
1551 return WORK_CPU_UNBOUND;
1552
1553 /* Use local node/cpu if we are already there */
1554 cpu = raw_smp_processor_id();
1555 if (node == cpu_to_node(cpu))
1556 return cpu;
1557
1558 /* Use "random" otherwise know as "first" online CPU of node */
1559 cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
1560
1561 /* If CPU is valid return that, otherwise just defer */
1562 return cpu < nr_cpu_ids ? cpu : WORK_CPU_UNBOUND;
1563}
1564
1565/**
1566 * queue_work_node - queue work on a "random" cpu for a given NUMA node
1567 * @node: NUMA node that we are targeting the work for
1568 * @wq: workqueue to use
1569 * @work: work to queue
1570 *
1571 * We queue the work to a "random" CPU within a given NUMA node. The basic
1572 * idea here is to provide a way to somehow associate work with a given
1573 * NUMA node.
1574 *
1575 * This function will only make a best effort attempt at getting this onto
1576 * the right NUMA node. If no node is requested or the requested node is
1577 * offline then we just fall back to standard queue_work behavior.
1578 *
1579 * Currently the "random" CPU ends up being the first available CPU in the
1580 * intersection of cpu_online_mask and the cpumask of the node, unless we
1581 * are running on the node. In that case we just use the current CPU.
1582 *
1583 * Return: %false if @work was already on a queue, %true otherwise.
1584 */
1585bool queue_work_node(int node, struct workqueue_struct *wq,
1586 struct work_struct *work)
1587{
1588 unsigned long flags;
1589 bool ret = false;
1590
1591 /*
1592 * This current implementation is specific to unbound workqueues.
1593 * Specifically we only return the first available CPU for a given
1594 * node instead of cycling through individual CPUs within the node.
1595 *
1596 * If this is used with a per-cpu workqueue then the logic in
1597 * workqueue_select_cpu_near would need to be updated to allow for
1598 * some round robin type logic.
1599 */
1600 WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND));
1601
1602 local_irq_save(flags);
1603
1604 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1605 int cpu = workqueue_select_cpu_near(node);
1606
1607 __queue_work(cpu, wq, work);
1608 ret = true;
1609 }
1610
1611 local_irq_restore(flags);
1612 return ret;
1613}
1614EXPORT_SYMBOL_GPL(queue_work_node);
1615
Kees Cook8c20feb2017-10-04 16:27:07 -07001616void delayed_work_timer_fn(struct timer_list *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001617{
Kees Cook8c20feb2017-10-04 16:27:07 -07001618 struct delayed_work *dwork = from_timer(dwork, t, timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001619
Tejun Heoe0aecdd2012-08-21 13:18:24 -07001620 /* should have been called from irqsafe timer with irq already off */
Lai Jiangshan60c057b2013-02-06 18:04:53 -08001621 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001622}
Konstantin Khlebnikov1438ade52013-01-24 16:36:31 +04001623EXPORT_SYMBOL(delayed_work_timer_fn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001624
Tejun Heo7beb2ed2012-08-03 10:30:46 -07001625static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
1626 struct delayed_work *dwork, unsigned long delay)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627{
Tejun Heo7beb2ed2012-08-03 10:30:46 -07001628 struct timer_list *timer = &dwork->timer;
1629 struct work_struct *work = &dwork->work;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001630
Tejun Heo637fdba2017-03-06 15:33:42 -05001631 WARN_ON_ONCE(!wq);
Kees Cook841b86f2017-10-23 09:40:42 +02001632 WARN_ON_ONCE(timer->function != delayed_work_timer_fn);
Tejun Heofc4b5142012-12-04 07:40:39 -08001633 WARN_ON_ONCE(timer_pending(timer));
1634 WARN_ON_ONCE(!list_empty(&work->entry));
Tejun Heo7beb2ed2012-08-03 10:30:46 -07001635
Tejun Heo8852aac2012-12-01 16:23:42 -08001636 /*
1637 * If @delay is 0, queue @dwork->work immediately. This is for
1638 * both optimization and correctness. The earliest @timer can
1639 * expire is on the closest next tick and delayed_work users depend
1640 * on that there's no such delay when @delay is 0.
1641 */
1642 if (!delay) {
1643 __queue_work(cpu, wq, &dwork->work);
1644 return;
1645 }
1646
Lai Jiangshan60c057b2013-02-06 18:04:53 -08001647 dwork->wq = wq;
Tejun Heo12650572012-08-08 09:38:42 -07001648 dwork->cpu = cpu;
Tejun Heo7beb2ed2012-08-03 10:30:46 -07001649 timer->expires = jiffies + delay;
1650
Tejun Heo041bd122016-02-09 16:11:26 -05001651 if (unlikely(cpu != WORK_CPU_UNBOUND))
1652 add_timer_on(timer, cpu);
1653 else
1654 add_timer(timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655}
1656
Rolf Eike Beer0fcb78c2006-07-30 03:03:42 -07001657/**
1658 * queue_delayed_work_on - queue work on specific CPU after delay
1659 * @cpu: CPU number to execute work on
1660 * @wq: workqueue to use
Randy Dunlapaf9997e2006-12-22 01:06:52 -08001661 * @dwork: work to queue
Rolf Eike Beer0fcb78c2006-07-30 03:03:42 -07001662 * @delay: number of jiffies to wait before queueing
1663 *
Yacine Belkadid185af32013-07-31 14:59:24 -07001664 * Return: %false if @work was already on a queue, %true otherwise. If
Tejun Heo715f1302012-08-03 10:30:46 -07001665 * @delay is zero and @dwork is idle, it will be scheduled for immediate
1666 * execution.
Rolf Eike Beer0fcb78c2006-07-30 03:03:42 -07001667 */
Tejun Heod4283e92012-08-03 10:30:44 -07001668bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
1669 struct delayed_work *dwork, unsigned long delay)
Venkatesh Pallipadi7a6bc1c2006-06-28 13:50:33 -07001670{
David Howells52bad642006-11-22 14:54:01 +00001671 struct work_struct *work = &dwork->work;
Tejun Heod4283e92012-08-03 10:30:44 -07001672 bool ret = false;
Tejun Heo8930cab2012-08-03 10:30:45 -07001673 unsigned long flags;
1674
1675 /* read the comment in __queue_work() */
1676 local_irq_save(flags);
Venkatesh Pallipadi7a6bc1c2006-06-28 13:50:33 -07001677
Tejun Heo22df02b2010-06-29 10:07:10 +02001678 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
Tejun Heo7beb2ed2012-08-03 10:30:46 -07001679 __queue_delayed_work(cpu, wq, dwork, delay);
Tejun Heod4283e92012-08-03 10:30:44 -07001680 ret = true;
Venkatesh Pallipadi7a6bc1c2006-06-28 13:50:33 -07001681 }
Tejun Heo8930cab2012-08-03 10:30:45 -07001682
1683 local_irq_restore(flags);
Venkatesh Pallipadi7a6bc1c2006-06-28 13:50:33 -07001684 return ret;
1685}
Marc Dionnead7b1f82013-05-06 17:44:55 -04001686EXPORT_SYMBOL(queue_delayed_work_on);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001687
Tejun Heoc8e55f32010-06-29 10:07:12 +02001688/**
Tejun Heo8376fe22012-08-03 10:30:47 -07001689 * mod_delayed_work_on - modify delay of or queue a delayed work on specific CPU
1690 * @cpu: CPU number to execute work on
1691 * @wq: workqueue to use
1692 * @dwork: work to queue
1693 * @delay: number of jiffies to wait before queueing
1694 *
1695 * If @dwork is idle, equivalent to queue_delayed_work_on(); otherwise,
1696 * modify @dwork's timer so that it expires after @delay. If @delay is
1697 * zero, @work is guaranteed to be scheduled immediately regardless of its
1698 * current state.
1699 *
Yacine Belkadid185af32013-07-31 14:59:24 -07001700 * Return: %false if @dwork was idle and queued, %true if @dwork was
Tejun Heo8376fe22012-08-03 10:30:47 -07001701 * pending and its timer was modified.
1702 *
Tejun Heoe0aecdd2012-08-21 13:18:24 -07001703 * This function is safe to call from any context including IRQ handler.
Tejun Heo8376fe22012-08-03 10:30:47 -07001704 * See try_to_grab_pending() for details.
1705 */
1706bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
1707 struct delayed_work *dwork, unsigned long delay)
1708{
1709 unsigned long flags;
1710 int ret;
1711
1712 do {
1713 ret = try_to_grab_pending(&dwork->work, true, &flags);
1714 } while (unlikely(ret == -EAGAIN));
1715
1716 if (likely(ret >= 0)) {
1717 __queue_delayed_work(cpu, wq, dwork, delay);
1718 local_irq_restore(flags);
1719 }
1720
1721 /* -ENOENT from try_to_grab_pending() becomes %true */
1722 return ret;
1723}
1724EXPORT_SYMBOL_GPL(mod_delayed_work_on);
1725
Tejun Heo05f0fe62018-03-14 12:45:13 -07001726static void rcu_work_rcufn(struct rcu_head *rcu)
1727{
1728 struct rcu_work *rwork = container_of(rcu, struct rcu_work, rcu);
1729
1730 /* read the comment in __queue_work() */
1731 local_irq_disable();
1732 __queue_work(WORK_CPU_UNBOUND, rwork->wq, &rwork->work);
1733 local_irq_enable();
1734}
1735
1736/**
1737 * queue_rcu_work - queue work after a RCU grace period
1738 * @wq: workqueue to use
1739 * @rwork: work to queue
1740 *
1741 * Return: %false if @rwork was already pending, %true otherwise. Note
1742 * that a full RCU grace period is guaranteed only after a %true return.
Bart Van Asschebf393fd2019-03-01 13:57:25 -08001743 * While @rwork is guaranteed to be executed after a %false return, the
Tejun Heo05f0fe62018-03-14 12:45:13 -07001744 * execution may happen before a full RCU grace period has passed.
1745 */
1746bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork)
1747{
1748 struct work_struct *work = &rwork->work;
1749
1750 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1751 rwork->wq = wq;
1752 call_rcu(&rwork->rcu, rcu_work_rcufn);
1753 return true;
1754 }
1755
1756 return false;
1757}
1758EXPORT_SYMBOL(queue_rcu_work);
1759
Tejun Heo8376fe22012-08-03 10:30:47 -07001760/**
Tejun Heoc8e55f32010-06-29 10:07:12 +02001761 * worker_enter_idle - enter idle state
1762 * @worker: worker which is entering idle state
1763 *
1764 * @worker is entering idle state. Update stats and idle timer if
1765 * necessary.
1766 *
1767 * LOCKING:
Tejun Heod565ed62013-01-24 11:01:33 -08001768 * spin_lock_irq(pool->lock).
Tejun Heoc8e55f32010-06-29 10:07:12 +02001769 */
1770static void worker_enter_idle(struct worker *worker)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001771{
Tejun Heobd7bdd42012-07-12 14:46:37 -07001772 struct worker_pool *pool = worker->pool;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001773
Tejun Heo6183c002013-03-12 11:29:57 -07001774 if (WARN_ON_ONCE(worker->flags & WORKER_IDLE) ||
1775 WARN_ON_ONCE(!list_empty(&worker->entry) &&
1776 (worker->hentry.next || worker->hentry.pprev)))
1777 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001778
Lai Jiangshan051e1852014-07-22 13:03:02 +08001779 /* can't use worker_set_flags(), also called from create_worker() */
Tejun Heocb444762010-07-02 10:03:50 +02001780 worker->flags |= WORKER_IDLE;
Tejun Heobd7bdd42012-07-12 14:46:37 -07001781 pool->nr_idle++;
Tejun Heoe22bee72010-06-29 10:07:14 +02001782 worker->last_active = jiffies;
Peter Zijlstrad5abe662006-12-06 20:37:26 -08001783
Tejun Heoc8e55f32010-06-29 10:07:12 +02001784 /* idle_list is LIFO */
Tejun Heobd7bdd42012-07-12 14:46:37 -07001785 list_add(&worker->entry, &pool->idle_list);
Tejun Heodb7bccf2010-06-29 10:07:12 +02001786
Tejun Heo628c78e2012-07-17 12:39:27 -07001787 if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
1788 mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
Tejun Heocb444762010-07-02 10:03:50 +02001789
Tejun Heo544ecf32012-05-14 15:04:50 -07001790 /*
Lai Jiangshane8b3f8d2017-12-01 22:20:36 +08001791 * Sanity check nr_running. Because unbind_workers() releases
Tejun Heod565ed62013-01-24 11:01:33 -08001792 * pool->lock between setting %WORKER_UNBOUND and zapping
Tejun Heo628c78e2012-07-17 12:39:27 -07001793 * nr_running, the warning may trigger spuriously. Check iff
1794 * unbind is not in progress.
Tejun Heo544ecf32012-05-14 15:04:50 -07001795 */
Tejun Heo24647572013-01-24 11:01:33 -08001796 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
Tejun Heobd7bdd42012-07-12 14:46:37 -07001797 pool->nr_workers == pool->nr_idle &&
Tejun Heoe19e3972013-01-24 11:39:44 -08001798 atomic_read(&pool->nr_running));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001799}
1800
Tejun Heoc8e55f32010-06-29 10:07:12 +02001801/**
1802 * worker_leave_idle - leave idle state
1803 * @worker: worker which is leaving idle state
1804 *
1805 * @worker is leaving idle state. Update stats.
1806 *
1807 * LOCKING:
Tejun Heod565ed62013-01-24 11:01:33 -08001808 * spin_lock_irq(pool->lock).
Tejun Heoc8e55f32010-06-29 10:07:12 +02001809 */
1810static void worker_leave_idle(struct worker *worker)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001811{
Tejun Heobd7bdd42012-07-12 14:46:37 -07001812 struct worker_pool *pool = worker->pool;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001813
Tejun Heo6183c002013-03-12 11:29:57 -07001814 if (WARN_ON_ONCE(!(worker->flags & WORKER_IDLE)))
1815 return;
Tejun Heod302f012010-06-29 10:07:13 +02001816 worker_clr_flags(worker, WORKER_IDLE);
Tejun Heobd7bdd42012-07-12 14:46:37 -07001817 pool->nr_idle--;
Tejun Heoc8e55f32010-06-29 10:07:12 +02001818 list_del_init(&worker->entry);
1819}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001820
Lai Jiangshanf7537df2014-07-15 17:24:15 +08001821static struct worker *alloc_worker(int node)
Tejun Heoc34056a2010-06-29 10:07:11 +02001822{
1823 struct worker *worker;
1824
Lai Jiangshanf7537df2014-07-15 17:24:15 +08001825 worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, node);
Tejun Heoc8e55f32010-06-29 10:07:12 +02001826 if (worker) {
1827 INIT_LIST_HEAD(&worker->entry);
Tejun Heoaffee4b2010-06-29 10:07:12 +02001828 INIT_LIST_HEAD(&worker->scheduled);
Lai Jiangshanda028462014-05-20 17:46:31 +08001829 INIT_LIST_HEAD(&worker->node);
Tejun Heoe22bee72010-06-29 10:07:14 +02001830 /* on creation a worker is in !idle && prep state */
1831 worker->flags = WORKER_PREP;
Tejun Heoc8e55f32010-06-29 10:07:12 +02001832 }
Tejun Heoc34056a2010-06-29 10:07:11 +02001833 return worker;
1834}
1835
1836/**
Lai Jiangshan4736cbf2014-05-20 17:46:35 +08001837 * worker_attach_to_pool() - attach a worker to a pool
1838 * @worker: worker to be attached
1839 * @pool: the target pool
1840 *
1841 * Attach @worker to @pool. Once attached, the %WORKER_UNBOUND flag and
1842 * cpu-binding of @worker are kept coordinated with the pool across
1843 * cpu-[un]hotplugs.
1844 */
1845static void worker_attach_to_pool(struct worker *worker,
1846 struct worker_pool *pool)
1847{
Tejun Heo1258fae2018-05-18 08:47:13 -07001848 mutex_lock(&wq_pool_attach_mutex);
Lai Jiangshan4736cbf2014-05-20 17:46:35 +08001849
1850 /*
1851 * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any
1852 * online CPUs. It'll be re-applied when any of the CPUs come up.
1853 */
1854 set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
1855
1856 /*
Tejun Heo1258fae2018-05-18 08:47:13 -07001857 * The wq_pool_attach_mutex ensures %POOL_DISASSOCIATED remains
1858 * stable across this function. See the comments above the flag
1859 * definition for details.
Lai Jiangshan4736cbf2014-05-20 17:46:35 +08001860 */
1861 if (pool->flags & POOL_DISASSOCIATED)
1862 worker->flags |= WORKER_UNBOUND;
1863
1864 list_add_tail(&worker->node, &pool->workers);
Tejun Heoa2d812a2018-05-18 08:47:13 -07001865 worker->pool = pool;
Lai Jiangshan4736cbf2014-05-20 17:46:35 +08001866
Tejun Heo1258fae2018-05-18 08:47:13 -07001867 mutex_unlock(&wq_pool_attach_mutex);
Lai Jiangshan4736cbf2014-05-20 17:46:35 +08001868}
1869
1870/**
Lai Jiangshan60f5a4b2014-05-20 17:46:29 +08001871 * worker_detach_from_pool() - detach a worker from its pool
1872 * @worker: worker which is attached to its pool
Lai Jiangshan60f5a4b2014-05-20 17:46:29 +08001873 *
Lai Jiangshan4736cbf2014-05-20 17:46:35 +08001874 * Undo the attaching which had been done in worker_attach_to_pool(). The
1875 * caller worker shouldn't access to the pool after detached except it has
1876 * other reference to the pool.
Lai Jiangshan60f5a4b2014-05-20 17:46:29 +08001877 */
Tejun Heoa2d812a2018-05-18 08:47:13 -07001878static void worker_detach_from_pool(struct worker *worker)
Lai Jiangshan60f5a4b2014-05-20 17:46:29 +08001879{
Tejun Heoa2d812a2018-05-18 08:47:13 -07001880 struct worker_pool *pool = worker->pool;
Lai Jiangshan60f5a4b2014-05-20 17:46:29 +08001881 struct completion *detach_completion = NULL;
1882
Tejun Heo1258fae2018-05-18 08:47:13 -07001883 mutex_lock(&wq_pool_attach_mutex);
Tejun Heoa2d812a2018-05-18 08:47:13 -07001884
Lai Jiangshanda028462014-05-20 17:46:31 +08001885 list_del(&worker->node);
Tejun Heoa2d812a2018-05-18 08:47:13 -07001886 worker->pool = NULL;
1887
Lai Jiangshanda028462014-05-20 17:46:31 +08001888 if (list_empty(&pool->workers))
Lai Jiangshan60f5a4b2014-05-20 17:46:29 +08001889 detach_completion = pool->detach_completion;
Tejun Heo1258fae2018-05-18 08:47:13 -07001890 mutex_unlock(&wq_pool_attach_mutex);
Lai Jiangshan60f5a4b2014-05-20 17:46:29 +08001891
Lai Jiangshanb62c0752014-06-03 15:32:52 +08001892 /* clear leftover flags without pool->lock after it is detached */
1893 worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND);
1894
Lai Jiangshan60f5a4b2014-05-20 17:46:29 +08001895 if (detach_completion)
1896 complete(detach_completion);
1897}
1898
1899/**
Tejun Heoc34056a2010-06-29 10:07:11 +02001900 * create_worker - create a new workqueue worker
Tejun Heo63d95a92012-07-12 14:46:37 -07001901 * @pool: pool the new worker will belong to
Tejun Heoc34056a2010-06-29 10:07:11 +02001902 *
Lai Jiangshan051e1852014-07-22 13:03:02 +08001903 * Create and start a new worker which is attached to @pool.
Tejun Heoc34056a2010-06-29 10:07:11 +02001904 *
1905 * CONTEXT:
1906 * Might sleep. Does GFP_KERNEL allocations.
1907 *
Yacine Belkadid185af32013-07-31 14:59:24 -07001908 * Return:
Tejun Heoc34056a2010-06-29 10:07:11 +02001909 * Pointer to the newly created worker.
1910 */
Tejun Heobc2ae0f2012-07-17 12:39:27 -07001911static struct worker *create_worker(struct worker_pool *pool)
Tejun Heoc34056a2010-06-29 10:07:11 +02001912{
Tejun Heoc34056a2010-06-29 10:07:11 +02001913 struct worker *worker = NULL;
Tejun Heof3421792010-07-02 10:03:51 +02001914 int id = -1;
Tejun Heoe3c916a2013-04-01 11:23:32 -07001915 char id_buf[16];
Tejun Heoc34056a2010-06-29 10:07:11 +02001916
Lai Jiangshan7cda9aa2014-05-20 17:46:32 +08001917 /* ID is needed to determine kthread name */
1918 id = ida_simple_get(&pool->worker_ida, 0, 0, GFP_KERNEL);
Tejun Heo822d8402013-03-19 13:45:21 -07001919 if (id < 0)
1920 goto fail;
Tejun Heoc34056a2010-06-29 10:07:11 +02001921
Lai Jiangshanf7537df2014-07-15 17:24:15 +08001922 worker = alloc_worker(pool->node);
Tejun Heoc34056a2010-06-29 10:07:11 +02001923 if (!worker)
1924 goto fail;
1925
Tejun Heoc34056a2010-06-29 10:07:11 +02001926 worker->id = id;
1927
Tejun Heo29c91e92013-03-12 11:30:03 -07001928 if (pool->cpu >= 0)
Tejun Heoe3c916a2013-04-01 11:23:32 -07001929 snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
1930 pool->attrs->nice < 0 ? "H" : "");
Tejun Heof3421792010-07-02 10:03:51 +02001931 else
Tejun Heoe3c916a2013-04-01 11:23:32 -07001932 snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
1933
Tejun Heof3f90ad2013-04-01 11:23:34 -07001934 worker->task = kthread_create_on_node(worker_thread, worker, pool->node,
Tejun Heoe3c916a2013-04-01 11:23:32 -07001935 "kworker/%s", id_buf);
Tejun Heoc34056a2010-06-29 10:07:11 +02001936 if (IS_ERR(worker->task))
1937 goto fail;
1938
Oleg Nesterov91151222013-11-14 12:56:18 +01001939 set_user_nice(worker->task, pool->attrs->nice);
Peter Zijlstra25834c72015-05-15 17:43:34 +02001940 kthread_bind_mask(worker->task, pool->attrs->cpumask);
Oleg Nesterov91151222013-11-14 12:56:18 +01001941
Lai Jiangshanda028462014-05-20 17:46:31 +08001942 /* successful, attach the worker to the pool */
Lai Jiangshan4736cbf2014-05-20 17:46:35 +08001943 worker_attach_to_pool(worker, pool);
Tejun Heo822d8402013-03-19 13:45:21 -07001944
Lai Jiangshan051e1852014-07-22 13:03:02 +08001945 /* start the newly created worker */
1946 spin_lock_irq(&pool->lock);
1947 worker->pool->nr_workers++;
1948 worker_enter_idle(worker);
1949 wake_up_process(worker->task);
1950 spin_unlock_irq(&pool->lock);
1951
Tejun Heoc34056a2010-06-29 10:07:11 +02001952 return worker;
Tejun Heo822d8402013-03-19 13:45:21 -07001953
Tejun Heoc34056a2010-06-29 10:07:11 +02001954fail:
Lai Jiangshan9625ab12014-05-20 17:46:27 +08001955 if (id >= 0)
Lai Jiangshan7cda9aa2014-05-20 17:46:32 +08001956 ida_simple_remove(&pool->worker_ida, id);
Tejun Heoc34056a2010-06-29 10:07:11 +02001957 kfree(worker);
1958 return NULL;
1959}
1960
1961/**
Tejun Heoc34056a2010-06-29 10:07:11 +02001962 * destroy_worker - destroy a workqueue worker
1963 * @worker: worker to be destroyed
1964 *
Lai Jiangshan73eb7fe2014-05-20 17:46:28 +08001965 * Destroy @worker and adjust @pool stats accordingly. The worker should
1966 * be idle.
Tejun Heoc8e55f32010-06-29 10:07:12 +02001967 *
1968 * CONTEXT:
Lai Jiangshan60f5a4b2014-05-20 17:46:29 +08001969 * spin_lock_irq(pool->lock).
Tejun Heoc34056a2010-06-29 10:07:11 +02001970 */
1971static void destroy_worker(struct worker *worker)
1972{
Tejun Heobd7bdd42012-07-12 14:46:37 -07001973 struct worker_pool *pool = worker->pool;
Tejun Heoc34056a2010-06-29 10:07:11 +02001974
Tejun Heocd549682013-03-13 19:47:39 -07001975 lockdep_assert_held(&pool->lock);
1976
Tejun Heoc34056a2010-06-29 10:07:11 +02001977 /* sanity check frenzy */
Tejun Heo6183c002013-03-12 11:29:57 -07001978 if (WARN_ON(worker->current_work) ||
Lai Jiangshan73eb7fe2014-05-20 17:46:28 +08001979 WARN_ON(!list_empty(&worker->scheduled)) ||
1980 WARN_ON(!(worker->flags & WORKER_IDLE)))
Tejun Heo6183c002013-03-12 11:29:57 -07001981 return;
Tejun Heoc34056a2010-06-29 10:07:11 +02001982
Lai Jiangshan73eb7fe2014-05-20 17:46:28 +08001983 pool->nr_workers--;
1984 pool->nr_idle--;
Lai Jiangshan5bdfff92014-02-15 22:02:28 +08001985
Tejun Heoc8e55f32010-06-29 10:07:12 +02001986 list_del_init(&worker->entry);
Tejun Heocb444762010-07-02 10:03:50 +02001987 worker->flags |= WORKER_DIE;
Lai Jiangshan60f5a4b2014-05-20 17:46:29 +08001988 wake_up_process(worker->task);
Tejun Heoc34056a2010-06-29 10:07:11 +02001989}
1990
Kees Cook32a6c722017-10-16 15:58:25 -07001991static void idle_worker_timeout(struct timer_list *t)
Tejun Heoe22bee72010-06-29 10:07:14 +02001992{
Kees Cook32a6c722017-10-16 15:58:25 -07001993 struct worker_pool *pool = from_timer(pool, t, idle_timer);
Tejun Heoe22bee72010-06-29 10:07:14 +02001994
Tejun Heod565ed62013-01-24 11:01:33 -08001995 spin_lock_irq(&pool->lock);
Tejun Heoe22bee72010-06-29 10:07:14 +02001996
Lai Jiangshan3347fc92014-05-20 17:46:30 +08001997 while (too_many_workers(pool)) {
Tejun Heoe22bee72010-06-29 10:07:14 +02001998 struct worker *worker;
1999 unsigned long expires;
2000
2001 /* idle_list is kept in LIFO order, check the last one */
Tejun Heo63d95a92012-07-12 14:46:37 -07002002 worker = list_entry(pool->idle_list.prev, struct worker, entry);
Tejun Heoe22bee72010-06-29 10:07:14 +02002003 expires = worker->last_active + IDLE_WORKER_TIMEOUT;
2004
Lai Jiangshan3347fc92014-05-20 17:46:30 +08002005 if (time_before(jiffies, expires)) {
Tejun Heo63d95a92012-07-12 14:46:37 -07002006 mod_timer(&pool->idle_timer, expires);
Lai Jiangshan3347fc92014-05-20 17:46:30 +08002007 break;
Tejun Heoe22bee72010-06-29 10:07:14 +02002008 }
Lai Jiangshan3347fc92014-05-20 17:46:30 +08002009
2010 destroy_worker(worker);
Tejun Heoe22bee72010-06-29 10:07:14 +02002011 }
2012
Tejun Heod565ed62013-01-24 11:01:33 -08002013 spin_unlock_irq(&pool->lock);
Tejun Heoe22bee72010-06-29 10:07:14 +02002014}
2015
Tejun Heo493a1722013-03-12 11:29:59 -07002016static void send_mayday(struct work_struct *work)
Tejun Heoe22bee72010-06-29 10:07:14 +02002017{
Tejun Heo112202d2013-02-13 19:29:12 -08002018 struct pool_workqueue *pwq = get_work_pwq(work);
2019 struct workqueue_struct *wq = pwq->wq;
Tejun Heo493a1722013-03-12 11:29:59 -07002020
Tejun Heo2e109a22013-03-13 19:47:40 -07002021 lockdep_assert_held(&wq_mayday_lock);
Tejun Heoe22bee72010-06-29 10:07:14 +02002022
Tejun Heo493008a2013-03-12 11:30:03 -07002023 if (!wq->rescuer)
Tejun Heo493a1722013-03-12 11:29:59 -07002024 return;
Tejun Heoe22bee72010-06-29 10:07:14 +02002025
2026 /* mayday mayday mayday */
Tejun Heo493a1722013-03-12 11:29:59 -07002027 if (list_empty(&pwq->mayday_node)) {
Lai Jiangshan77668c82014-04-18 11:04:16 -04002028 /*
2029 * If @pwq is for an unbound wq, its base ref may be put at
2030 * any time due to an attribute change. Pin @pwq until the
2031 * rescuer is done with it.
2032 */
2033 get_pwq(pwq);
Tejun Heo493a1722013-03-12 11:29:59 -07002034 list_add_tail(&pwq->mayday_node, &wq->maydays);
Tejun Heoe22bee72010-06-29 10:07:14 +02002035 wake_up_process(wq->rescuer->task);
Tejun Heo493a1722013-03-12 11:29:59 -07002036 }
Tejun Heoe22bee72010-06-29 10:07:14 +02002037}
2038
Kees Cook32a6c722017-10-16 15:58:25 -07002039static void pool_mayday_timeout(struct timer_list *t)
Tejun Heoe22bee72010-06-29 10:07:14 +02002040{
Kees Cook32a6c722017-10-16 15:58:25 -07002041 struct worker_pool *pool = from_timer(pool, t, mayday_timer);
Tejun Heoe22bee72010-06-29 10:07:14 +02002042 struct work_struct *work;
2043
Tejun Heob2d82902014-12-08 12:39:16 -05002044 spin_lock_irq(&pool->lock);
2045 spin_lock(&wq_mayday_lock); /* for wq->maydays */
Tejun Heoe22bee72010-06-29 10:07:14 +02002046
Tejun Heo63d95a92012-07-12 14:46:37 -07002047 if (need_to_create_worker(pool)) {
Tejun Heoe22bee72010-06-29 10:07:14 +02002048 /*
2049 * We've been trying to create a new worker but
2050 * haven't been successful. We might be hitting an
2051 * allocation deadlock. Send distress signals to
2052 * rescuers.
2053 */
Tejun Heo63d95a92012-07-12 14:46:37 -07002054 list_for_each_entry(work, &pool->worklist, entry)
Tejun Heoe22bee72010-06-29 10:07:14 +02002055 send_mayday(work);
2056 }
2057
Tejun Heob2d82902014-12-08 12:39:16 -05002058 spin_unlock(&wq_mayday_lock);
2059 spin_unlock_irq(&pool->lock);
Tejun Heoe22bee72010-06-29 10:07:14 +02002060
Tejun Heo63d95a92012-07-12 14:46:37 -07002061 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
Tejun Heoe22bee72010-06-29 10:07:14 +02002062}
2063
2064/**
2065 * maybe_create_worker - create a new worker if necessary
Tejun Heo63d95a92012-07-12 14:46:37 -07002066 * @pool: pool to create a new worker for
Tejun Heoe22bee72010-06-29 10:07:14 +02002067 *
Tejun Heo63d95a92012-07-12 14:46:37 -07002068 * Create a new worker for @pool if necessary. @pool is guaranteed to
Tejun Heoe22bee72010-06-29 10:07:14 +02002069 * have at least one idle worker on return from this function. If
2070 * creating a new worker takes longer than MAYDAY_INTERVAL, mayday is
Tejun Heo63d95a92012-07-12 14:46:37 -07002071 * sent to all rescuers with works scheduled on @pool to resolve
Tejun Heoe22bee72010-06-29 10:07:14 +02002072 * possible allocation deadlock.
2073 *
Tejun Heoc5aa87b2013-03-13 16:51:36 -07002074 * On return, need_to_create_worker() is guaranteed to be %false and
2075 * may_start_working() %true.
Tejun Heoe22bee72010-06-29 10:07:14 +02002076 *
2077 * LOCKING:
Tejun Heod565ed62013-01-24 11:01:33 -08002078 * spin_lock_irq(pool->lock) which may be released and regrabbed
Tejun Heoe22bee72010-06-29 10:07:14 +02002079 * multiple times. Does GFP_KERNEL allocations. Called only from
2080 * manager.
Tejun Heoe22bee72010-06-29 10:07:14 +02002081 */
Tejun Heo29187a92015-01-16 14:21:16 -05002082static void maybe_create_worker(struct worker_pool *pool)
Tejun Heod565ed62013-01-24 11:01:33 -08002083__releases(&pool->lock)
2084__acquires(&pool->lock)
Tejun Heoe22bee72010-06-29 10:07:14 +02002085{
Tejun Heoe22bee72010-06-29 10:07:14 +02002086restart:
Tejun Heod565ed62013-01-24 11:01:33 -08002087 spin_unlock_irq(&pool->lock);
Tejun Heo9f9c236442010-07-14 11:31:20 +02002088
Tejun Heoe22bee72010-06-29 10:07:14 +02002089 /* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */
Tejun Heo63d95a92012-07-12 14:46:37 -07002090 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
Tejun Heoe22bee72010-06-29 10:07:14 +02002091
2092 while (true) {
Lai Jiangshan051e1852014-07-22 13:03:02 +08002093 if (create_worker(pool) || !need_to_create_worker(pool))
Tejun Heoe22bee72010-06-29 10:07:14 +02002094 break;
2095
Lai Jiangshane212f362014-06-03 15:32:17 +08002096 schedule_timeout_interruptible(CREATE_COOLDOWN);
Tejun Heo9f9c236442010-07-14 11:31:20 +02002097
Tejun Heo63d95a92012-07-12 14:46:37 -07002098 if (!need_to_create_worker(pool))
Tejun Heoe22bee72010-06-29 10:07:14 +02002099 break;
2100 }
2101
Tejun Heo63d95a92012-07-12 14:46:37 -07002102 del_timer_sync(&pool->mayday_timer);
Tejun Heod565ed62013-01-24 11:01:33 -08002103 spin_lock_irq(&pool->lock);
Lai Jiangshan051e1852014-07-22 13:03:02 +08002104 /*
2105 * This is necessary even after a new worker was just successfully
2106 * created as @pool->lock was dropped and the new worker might have
2107 * already become busy.
2108 */
Tejun Heo63d95a92012-07-12 14:46:37 -07002109 if (need_to_create_worker(pool))
Tejun Heoe22bee72010-06-29 10:07:14 +02002110 goto restart;
Tejun Heoe22bee72010-06-29 10:07:14 +02002111}
2112
2113/**
Tejun Heoe22bee72010-06-29 10:07:14 +02002114 * manage_workers - manage worker pool
2115 * @worker: self
2116 *
Tejun Heo706026c2013-01-24 11:01:34 -08002117 * Assume the manager role and manage the worker pool @worker belongs
Tejun Heoe22bee72010-06-29 10:07:14 +02002118 * to. At any given time, there can be only zero or one manager per
Tejun Heo706026c2013-01-24 11:01:34 -08002119 * pool. The exclusion is handled automatically by this function.
Tejun Heoe22bee72010-06-29 10:07:14 +02002120 *
2121 * The caller can safely start processing works on false return. On
2122 * true return, it's guaranteed that need_to_create_worker() is false
2123 * and may_start_working() is true.
2124 *
2125 * CONTEXT:
Tejun Heod565ed62013-01-24 11:01:33 -08002126 * spin_lock_irq(pool->lock) which may be released and regrabbed
Tejun Heoe22bee72010-06-29 10:07:14 +02002127 * multiple times. Does GFP_KERNEL allocations.
2128 *
Yacine Belkadid185af32013-07-31 14:59:24 -07002129 * Return:
Tejun Heo29187a92015-01-16 14:21:16 -05002130 * %false if the pool doesn't need management and the caller can safely
2131 * start processing works, %true if management function was performed and
2132 * the conditions that the caller verified before calling the function may
2133 * no longer be true.
Tejun Heoe22bee72010-06-29 10:07:14 +02002134 */
2135static bool manage_workers(struct worker *worker)
2136{
Tejun Heo63d95a92012-07-12 14:46:37 -07002137 struct worker_pool *pool = worker->pool;
Tejun Heoe22bee72010-06-29 10:07:14 +02002138
Tejun Heo692b4822017-10-09 08:04:13 -07002139 if (pool->flags & POOL_MANAGER_ACTIVE)
Tejun Heo29187a92015-01-16 14:21:16 -05002140 return false;
Tejun Heo692b4822017-10-09 08:04:13 -07002141
2142 pool->flags |= POOL_MANAGER_ACTIVE;
Tejun Heo2607d7a2015-03-09 09:22:28 -04002143 pool->manager = worker;
Tejun Heoe22bee72010-06-29 10:07:14 +02002144
Tejun Heo29187a92015-01-16 14:21:16 -05002145 maybe_create_worker(pool);
Tejun Heoe22bee72010-06-29 10:07:14 +02002146
Tejun Heo2607d7a2015-03-09 09:22:28 -04002147 pool->manager = NULL;
Tejun Heo692b4822017-10-09 08:04:13 -07002148 pool->flags &= ~POOL_MANAGER_ACTIVE;
2149 wake_up(&wq_manager_wait);
Tejun Heo29187a92015-01-16 14:21:16 -05002150 return true;
Tejun Heoe22bee72010-06-29 10:07:14 +02002151}
2152
Tejun Heoa62428c2010-06-29 10:07:10 +02002153/**
2154 * process_one_work - process single work
Tejun Heoc34056a2010-06-29 10:07:11 +02002155 * @worker: self
Tejun Heoa62428c2010-06-29 10:07:10 +02002156 * @work: work to process
2157 *
2158 * Process @work. This function contains all the logics necessary to
2159 * process a single work including synchronization against and
2160 * interaction with other workers on the same cpu, queueing and
2161 * flushing. As long as context requirement is met, any worker can
2162 * call this function to process a work.
2163 *
2164 * CONTEXT:
Tejun Heod565ed62013-01-24 11:01:33 -08002165 * spin_lock_irq(pool->lock) which is released and regrabbed.
Tejun Heoa62428c2010-06-29 10:07:10 +02002166 */
Tejun Heoc34056a2010-06-29 10:07:11 +02002167static void process_one_work(struct worker *worker, struct work_struct *work)
Tejun Heod565ed62013-01-24 11:01:33 -08002168__releases(&pool->lock)
2169__acquires(&pool->lock)
Tejun Heoa62428c2010-06-29 10:07:10 +02002170{
Tejun Heo112202d2013-02-13 19:29:12 -08002171 struct pool_workqueue *pwq = get_work_pwq(work);
Tejun Heobd7bdd42012-07-12 14:46:37 -07002172 struct worker_pool *pool = worker->pool;
Tejun Heo112202d2013-02-13 19:29:12 -08002173 bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
Tejun Heo73f53c42010-06-29 10:07:11 +02002174 int work_color;
Tejun Heo7e116292010-06-29 10:07:13 +02002175 struct worker *collision;
Tejun Heoa62428c2010-06-29 10:07:10 +02002176#ifdef CONFIG_LOCKDEP
2177 /*
2178 * It is permissible to free the struct work_struct from
2179 * inside the function that is called from it, this we need to
2180 * take into account for lockdep too. To avoid bogus "held
2181 * lock freed" warnings as well as problems when looking into
2182 * work->lockdep_map, make a copy and use that here.
2183 */
Peter Zijlstra4d82a1d2012-05-15 08:06:19 -07002184 struct lockdep_map lockdep_map;
2185
2186 lockdep_copy_map(&lockdep_map, &work->lockdep_map);
Tejun Heoa62428c2010-06-29 10:07:10 +02002187#endif
Lai Jiangshan807407c2014-06-03 15:33:28 +08002188 /* ensure we're on the correct CPU */
Lai Jiangshan85327af2014-06-03 15:33:28 +08002189 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
Tejun Heoec22ca52013-01-24 11:01:33 -08002190 raw_smp_processor_id() != pool->cpu);
Tejun Heo25511a42012-07-17 12:39:27 -07002191
Tejun Heo7e116292010-06-29 10:07:13 +02002192 /*
2193 * A single work shouldn't be executed concurrently by
2194 * multiple workers on a single cpu. Check whether anyone is
2195 * already processing the work. If so, defer the work to the
2196 * currently executing one.
2197 */
Tejun Heoc9e7cf22013-01-24 11:01:33 -08002198 collision = find_worker_executing_work(pool, work);
Tejun Heo7e116292010-06-29 10:07:13 +02002199 if (unlikely(collision)) {
2200 move_linked_works(work, &collision->scheduled, NULL);
2201 return;
2202 }
2203
Tejun Heo8930cab2012-08-03 10:30:45 -07002204 /* claim and dequeue */
Tejun Heoa62428c2010-06-29 10:07:10 +02002205 debug_work_deactivate(work);
Tejun Heoc9e7cf22013-01-24 11:01:33 -08002206 hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work);
Tejun Heoc34056a2010-06-29 10:07:11 +02002207 worker->current_work = work;
Tejun Heoa2c1c572012-12-18 10:35:02 -08002208 worker->current_func = work->func;
Tejun Heo112202d2013-02-13 19:29:12 -08002209 worker->current_pwq = pwq;
Tejun Heo73f53c42010-06-29 10:07:11 +02002210 work_color = get_work_color(work);
Tejun Heo7a22ad72010-06-29 10:07:13 +02002211
Tejun Heo8bf89592018-05-18 08:47:13 -07002212 /*
2213 * Record wq name for cmdline and debug reporting, may get
2214 * overridden through set_worker_desc().
2215 */
2216 strscpy(worker->desc, pwq->wq->name, WORKER_DESC_LEN);
2217
Tejun Heoa62428c2010-06-29 10:07:10 +02002218 list_del_init(&work->entry);
2219
Tejun Heo649027d2010-06-29 10:07:14 +02002220 /*
Lai Jiangshan228f1d02014-07-22 13:02:00 +08002221 * CPU intensive works don't participate in concurrency management.
2222 * They're the scheduler's responsibility. This takes @worker out
2223 * of concurrency management and the next code block will chain
2224 * execution of the pending work items.
Tejun Heofb0e7be2010-06-29 10:07:15 +02002225 */
2226 if (unlikely(cpu_intensive))
Lai Jiangshan228f1d02014-07-22 13:02:00 +08002227 worker_set_flags(worker, WORKER_CPU_INTENSIVE);
Tejun Heofb0e7be2010-06-29 10:07:15 +02002228
Tejun Heo974271c42012-07-12 14:46:37 -07002229 /*
Lai Jiangshana489a032014-07-22 13:01:59 +08002230 * Wake up another worker if necessary. The condition is always
2231 * false for normal per-cpu workers since nr_running would always
2232 * be >= 1 at this point. This is used to chain execution of the
2233 * pending work items for WORKER_NOT_RUNNING workers such as the
Lai Jiangshan228f1d02014-07-22 13:02:00 +08002234 * UNBOUND and CPU_INTENSIVE ones.
Tejun Heo974271c42012-07-12 14:46:37 -07002235 */
Lai Jiangshana489a032014-07-22 13:01:59 +08002236 if (need_more_worker(pool))
Tejun Heo63d95a92012-07-12 14:46:37 -07002237 wake_up_worker(pool);
Tejun Heo974271c42012-07-12 14:46:37 -07002238
Tejun Heo8930cab2012-08-03 10:30:45 -07002239 /*
Tejun Heo7c3eed52013-01-24 11:01:33 -08002240 * Record the last pool and clear PENDING which should be the last
Tejun Heod565ed62013-01-24 11:01:33 -08002241 * update to @work. Also, do this inside @pool->lock so that
Tejun Heo23657bb2012-08-13 17:08:19 -07002242 * PENDING and queued state changes happen together while IRQ is
2243 * disabled.
Tejun Heo8930cab2012-08-03 10:30:45 -07002244 */
Tejun Heo7c3eed52013-01-24 11:01:33 -08002245 set_work_pool_and_clear_pending(work, pool->id);
Tejun Heoa62428c2010-06-29 10:07:10 +02002246
Tejun Heod565ed62013-01-24 11:01:33 -08002247 spin_unlock_irq(&pool->lock);
Tejun Heoa62428c2010-06-29 10:07:10 +02002248
Peter Zijlstraa1d14932017-08-23 12:52:32 +02002249 lock_map_acquire(&pwq->wq->lockdep_map);
Tejun Heoa62428c2010-06-29 10:07:10 +02002250 lock_map_acquire(&lockdep_map);
Peter Zijlstrae6f3faa2017-08-23 13:23:30 +02002251 /*
Peter Zijlstraf52be572017-08-29 10:59:39 +02002252 * Strictly speaking we should mark the invariant state without holding
2253 * any locks, that is, before these two lock_map_acquire()'s.
Peter Zijlstrae6f3faa2017-08-23 13:23:30 +02002254 *
2255 * However, that would result in:
2256 *
2257 * A(W1)
2258 * WFC(C)
2259 * A(W1)
2260 * C(C)
2261 *
2262 * Which would create W1->C->W1 dependencies, even though there is no
2263 * actual deadlock possible. There are two solutions, using a
2264 * read-recursive acquire on the work(queue) 'locks', but this will then
Peter Zijlstraf52be572017-08-29 10:59:39 +02002265 * hit the lockdep limitation on recursive locks, or simply discard
Peter Zijlstrae6f3faa2017-08-23 13:23:30 +02002266 * these locks.
2267 *
2268 * AFAICT there is no possible deadlock scenario between the
2269 * flush_work() and complete() primitives (except for single-threaded
2270 * workqueues), so hiding them isn't a problem.
2271 */
Peter Zijlstraf52be572017-08-29 10:59:39 +02002272 lockdep_invariant_state(true);
Arjan van de Vene36c8862010-08-21 13:07:26 -07002273 trace_workqueue_execute_start(work);
Tejun Heoa2c1c572012-12-18 10:35:02 -08002274 worker->current_func(work);
Arjan van de Vene36c8862010-08-21 13:07:26 -07002275 /*
2276 * While we must be careful to not use "work" after this, the trace
2277 * point will only record its address.
2278 */
2279 trace_workqueue_execute_end(work);
Tejun Heoa62428c2010-06-29 10:07:10 +02002280 lock_map_release(&lockdep_map);
Tejun Heo112202d2013-02-13 19:29:12 -08002281 lock_map_release(&pwq->wq->lockdep_map);
Tejun Heoa62428c2010-06-29 10:07:10 +02002282
2283 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
Valentin Ilie044c7822012-08-19 00:52:42 +03002284 pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
2285 " last function: %pf\n",
Tejun Heoa2c1c572012-12-18 10:35:02 -08002286 current->comm, preempt_count(), task_pid_nr(current),
2287 worker->current_func);
Tejun Heoa62428c2010-06-29 10:07:10 +02002288 debug_show_held_locks(current);
2289 dump_stack();
2290 }
2291
Tejun Heob22ce272013-08-28 17:33:37 -04002292 /*
2293 * The following prevents a kworker from hogging CPU on !PREEMPT
2294 * kernels, where a requeueing work item waiting for something to
2295 * happen could deadlock with stop_machine as such work item could
2296 * indefinitely requeue itself while all other CPUs are trapped in
Joe Lawrence789cbbe2014-10-05 13:24:21 -04002297 * stop_machine. At the same time, report a quiescent RCU state so
2298 * the same condition doesn't freeze RCU.
Tejun Heob22ce272013-08-28 17:33:37 -04002299 */
Paul E. McKenneya7e64252017-10-24 08:25:02 -07002300 cond_resched();
Tejun Heob22ce272013-08-28 17:33:37 -04002301
Tejun Heod565ed62013-01-24 11:01:33 -08002302 spin_lock_irq(&pool->lock);
Tejun Heoa62428c2010-06-29 10:07:10 +02002303
Tejun Heofb0e7be2010-06-29 10:07:15 +02002304 /* clear cpu intensive status */
2305 if (unlikely(cpu_intensive))
2306 worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
2307
Johannes Weiner1b69ac62019-02-01 14:20:42 -08002308 /* tag the worker for identification in schedule() */
2309 worker->last_func = worker->current_func;
2310
Tejun Heoa62428c2010-06-29 10:07:10 +02002311 /* we're done with it, release */
Sasha Levin42f85702012-12-17 10:01:23 -05002312 hash_del(&worker->hentry);
Tejun Heoc34056a2010-06-29 10:07:11 +02002313 worker->current_work = NULL;
Tejun Heoa2c1c572012-12-18 10:35:02 -08002314 worker->current_func = NULL;
Tejun Heo112202d2013-02-13 19:29:12 -08002315 worker->current_pwq = NULL;
2316 pwq_dec_nr_in_flight(pwq, work_color);
Tejun Heoa62428c2010-06-29 10:07:10 +02002317}
2318
Tejun Heoaffee4b2010-06-29 10:07:12 +02002319/**
2320 * process_scheduled_works - process scheduled works
2321 * @worker: self
2322 *
2323 * Process all scheduled works. Please note that the scheduled list
2324 * may change while processing a work, so this function repeatedly
2325 * fetches a work from the top and executes it.
2326 *
2327 * CONTEXT:
Tejun Heod565ed62013-01-24 11:01:33 -08002328 * spin_lock_irq(pool->lock) which may be released and regrabbed
Tejun Heoaffee4b2010-06-29 10:07:12 +02002329 * multiple times.
2330 */
2331static void process_scheduled_works(struct worker *worker)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002332{
Tejun Heoaffee4b2010-06-29 10:07:12 +02002333 while (!list_empty(&worker->scheduled)) {
2334 struct work_struct *work = list_first_entry(&worker->scheduled,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002335 struct work_struct, entry);
Tejun Heoc34056a2010-06-29 10:07:11 +02002336 process_one_work(worker, work);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002337 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002338}
2339
Tejun Heo197f6ac2018-05-21 08:04:35 -07002340static void set_pf_worker(bool val)
2341{
2342 mutex_lock(&wq_pool_attach_mutex);
2343 if (val)
2344 current->flags |= PF_WQ_WORKER;
2345 else
2346 current->flags &= ~PF_WQ_WORKER;
2347 mutex_unlock(&wq_pool_attach_mutex);
2348}
2349
Tejun Heo4690c4a2010-06-29 10:07:10 +02002350/**
2351 * worker_thread - the worker thread function
Tejun Heoc34056a2010-06-29 10:07:11 +02002352 * @__worker: self
Tejun Heo4690c4a2010-06-29 10:07:10 +02002353 *
Tejun Heoc5aa87b2013-03-13 16:51:36 -07002354 * The worker thread function. All workers belong to a worker_pool -
2355 * either a per-cpu one or dynamic unbound one. These workers process all
2356 * work items regardless of their specific target workqueue. The only
2357 * exception is work items which belong to workqueues with a rescuer which
2358 * will be explained in rescuer_thread().
Yacine Belkadid185af32013-07-31 14:59:24 -07002359 *
2360 * Return: 0
Tejun Heo4690c4a2010-06-29 10:07:10 +02002361 */
Tejun Heoc34056a2010-06-29 10:07:11 +02002362static int worker_thread(void *__worker)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002363{
Tejun Heoc34056a2010-06-29 10:07:11 +02002364 struct worker *worker = __worker;
Tejun Heobd7bdd42012-07-12 14:46:37 -07002365 struct worker_pool *pool = worker->pool;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002366
Tejun Heoe22bee72010-06-29 10:07:14 +02002367 /* tell the scheduler that this is a workqueue worker */
Tejun Heo197f6ac2018-05-21 08:04:35 -07002368 set_pf_worker(true);
Tejun Heoc8e55f32010-06-29 10:07:12 +02002369woke_up:
Tejun Heod565ed62013-01-24 11:01:33 -08002370 spin_lock_irq(&pool->lock);
Oleg Nesterov3af244332007-05-09 02:34:09 -07002371
Tejun Heoa9ab7752013-03-19 13:45:21 -07002372 /* am I supposed to die? */
2373 if (unlikely(worker->flags & WORKER_DIE)) {
Tejun Heod565ed62013-01-24 11:01:33 -08002374 spin_unlock_irq(&pool->lock);
Tejun Heoa9ab7752013-03-19 13:45:21 -07002375 WARN_ON_ONCE(!list_empty(&worker->entry));
Tejun Heo197f6ac2018-05-21 08:04:35 -07002376 set_pf_worker(false);
Lai Jiangshan60f5a4b2014-05-20 17:46:29 +08002377
2378 set_task_comm(worker->task, "kworker/dying");
Lai Jiangshan7cda9aa2014-05-20 17:46:32 +08002379 ida_simple_remove(&pool->worker_ida, worker->id);
Tejun Heoa2d812a2018-05-18 08:47:13 -07002380 worker_detach_from_pool(worker);
Lai Jiangshan60f5a4b2014-05-20 17:46:29 +08002381 kfree(worker);
Tejun Heoa9ab7752013-03-19 13:45:21 -07002382 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002383 }
Oleg Nesterov3af244332007-05-09 02:34:09 -07002384
Tejun Heoc8e55f32010-06-29 10:07:12 +02002385 worker_leave_idle(worker);
Tejun Heodb7bccf2010-06-29 10:07:12 +02002386recheck:
Tejun Heoe22bee72010-06-29 10:07:14 +02002387 /* no more worker necessary? */
Tejun Heo63d95a92012-07-12 14:46:37 -07002388 if (!need_more_worker(pool))
Tejun Heoe22bee72010-06-29 10:07:14 +02002389 goto sleep;
2390
2391 /* do we need to manage? */
Tejun Heo63d95a92012-07-12 14:46:37 -07002392 if (unlikely(!may_start_working(pool)) && manage_workers(worker))
Tejun Heoe22bee72010-06-29 10:07:14 +02002393 goto recheck;
2394
Tejun Heoc8e55f32010-06-29 10:07:12 +02002395 /*
2396 * ->scheduled list can only be filled while a worker is
2397 * preparing to process a work or actually processing it.
2398 * Make sure nobody diddled with it while I was sleeping.
2399 */
Tejun Heo6183c002013-03-12 11:29:57 -07002400 WARN_ON_ONCE(!list_empty(&worker->scheduled));
Tejun Heoc8e55f32010-06-29 10:07:12 +02002401
Tejun Heoe22bee72010-06-29 10:07:14 +02002402 /*
Tejun Heoa9ab7752013-03-19 13:45:21 -07002403 * Finish PREP stage. We're guaranteed to have at least one idle
2404 * worker or that someone else has already assumed the manager
2405 * role. This is where @worker starts participating in concurrency
2406 * management if applicable and concurrency management is restored
2407 * after being rebound. See rebind_workers() for details.
Tejun Heoe22bee72010-06-29 10:07:14 +02002408 */
Tejun Heoa9ab7752013-03-19 13:45:21 -07002409 worker_clr_flags(worker, WORKER_PREP | WORKER_REBOUND);
Tejun Heoe22bee72010-06-29 10:07:14 +02002410
2411 do {
Tejun Heoc8e55f32010-06-29 10:07:12 +02002412 struct work_struct *work =
Tejun Heobd7bdd42012-07-12 14:46:37 -07002413 list_first_entry(&pool->worklist,
Tejun Heoc8e55f32010-06-29 10:07:12 +02002414 struct work_struct, entry);
2415
Tejun Heo82607adc2015-12-08 11:28:04 -05002416 pool->watchdog_ts = jiffies;
2417
Tejun Heoc8e55f32010-06-29 10:07:12 +02002418 if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
2419 /* optimization path, not strictly necessary */
2420 process_one_work(worker, work);
2421 if (unlikely(!list_empty(&worker->scheduled)))
2422 process_scheduled_works(worker);
2423 } else {
2424 move_linked_works(work, &worker->scheduled, NULL);
2425 process_scheduled_works(worker);
2426 }
Tejun Heo63d95a92012-07-12 14:46:37 -07002427 } while (keep_working(pool));
Tejun Heoc8e55f32010-06-29 10:07:12 +02002428
Lai Jiangshan228f1d02014-07-22 13:02:00 +08002429 worker_set_flags(worker, WORKER_PREP);
Tejun Heod313dd82010-07-02 10:03:51 +02002430sleep:
Tejun Heoc8e55f32010-06-29 10:07:12 +02002431 /*
Tejun Heod565ed62013-01-24 11:01:33 -08002432 * pool->lock is held and there's no work to process and no need to
2433 * manage, sleep. Workers are woken up only while holding
2434 * pool->lock or from local cpu, so setting the current state
2435 * before releasing pool->lock is enough to prevent losing any
2436 * event.
Tejun Heoc8e55f32010-06-29 10:07:12 +02002437 */
2438 worker_enter_idle(worker);
Peter Zijlstrac5a94a62017-08-23 13:58:44 +02002439 __set_current_state(TASK_IDLE);
Tejun Heod565ed62013-01-24 11:01:33 -08002440 spin_unlock_irq(&pool->lock);
Tejun Heoc8e55f32010-06-29 10:07:12 +02002441 schedule();
2442 goto woke_up;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002443}
2444
Tejun Heoe22bee72010-06-29 10:07:14 +02002445/**
2446 * rescuer_thread - the rescuer thread function
Tejun Heo111c2252013-01-17 17:16:24 -08002447 * @__rescuer: self
Tejun Heoe22bee72010-06-29 10:07:14 +02002448 *
2449 * Workqueue rescuer thread function. There's one rescuer for each
Tejun Heo493008a2013-03-12 11:30:03 -07002450 * workqueue which has WQ_MEM_RECLAIM set.
Tejun Heoe22bee72010-06-29 10:07:14 +02002451 *
Tejun Heo706026c2013-01-24 11:01:34 -08002452 * Regular work processing on a pool may block trying to create a new
Tejun Heoe22bee72010-06-29 10:07:14 +02002453 * worker which uses GFP_KERNEL allocation which has slight chance of
2454 * developing into deadlock if some works currently on the same queue
2455 * need to be processed to satisfy the GFP_KERNEL allocation. This is
2456 * the problem rescuer solves.
2457 *
Tejun Heo706026c2013-01-24 11:01:34 -08002458 * When such condition is possible, the pool summons rescuers of all
2459 * workqueues which have works queued on the pool and let them process
Tejun Heoe22bee72010-06-29 10:07:14 +02002460 * those works so that forward progress can be guaranteed.
2461 *
2462 * This should happen rarely.
Yacine Belkadid185af32013-07-31 14:59:24 -07002463 *
2464 * Return: 0
Tejun Heoe22bee72010-06-29 10:07:14 +02002465 */
Tejun Heo111c2252013-01-17 17:16:24 -08002466static int rescuer_thread(void *__rescuer)
Tejun Heoe22bee72010-06-29 10:07:14 +02002467{
Tejun Heo111c2252013-01-17 17:16:24 -08002468 struct worker *rescuer = __rescuer;
2469 struct workqueue_struct *wq = rescuer->rescue_wq;
Tejun Heoe22bee72010-06-29 10:07:14 +02002470 struct list_head *scheduled = &rescuer->scheduled;
Lai Jiangshan4d595b82014-04-18 11:04:16 -04002471 bool should_stop;
Tejun Heoe22bee72010-06-29 10:07:14 +02002472
2473 set_user_nice(current, RESCUER_NICE_LEVEL);
Tejun Heo111c2252013-01-17 17:16:24 -08002474
2475 /*
2476 * Mark rescuer as worker too. As WORKER_PREP is never cleared, it
2477 * doesn't participate in concurrency management.
2478 */
Tejun Heo197f6ac2018-05-21 08:04:35 -07002479 set_pf_worker(true);
Tejun Heoe22bee72010-06-29 10:07:14 +02002480repeat:
Peter Zijlstrac5a94a62017-08-23 13:58:44 +02002481 set_current_state(TASK_IDLE);
Tejun Heoe22bee72010-06-29 10:07:14 +02002482
Lai Jiangshan4d595b82014-04-18 11:04:16 -04002483 /*
2484 * By the time the rescuer is requested to stop, the workqueue
2485 * shouldn't have any work pending, but @wq->maydays may still have
2486 * pwq(s) queued. This can happen by non-rescuer workers consuming
2487 * all the work items before the rescuer got to them. Go through
2488 * @wq->maydays processing before acting on should_stop so that the
2489 * list is always empty on exit.
2490 */
2491 should_stop = kthread_should_stop();
Tejun Heoe22bee72010-06-29 10:07:14 +02002492
Tejun Heo493a1722013-03-12 11:29:59 -07002493 /* see whether any pwq is asking for help */
Tejun Heo2e109a22013-03-13 19:47:40 -07002494 spin_lock_irq(&wq_mayday_lock);
Tejun Heo493a1722013-03-12 11:29:59 -07002495
2496 while (!list_empty(&wq->maydays)) {
2497 struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
2498 struct pool_workqueue, mayday_node);
Tejun Heo112202d2013-02-13 19:29:12 -08002499 struct worker_pool *pool = pwq->pool;
Tejun Heoe22bee72010-06-29 10:07:14 +02002500 struct work_struct *work, *n;
Tejun Heo82607adc2015-12-08 11:28:04 -05002501 bool first = true;
Tejun Heoe22bee72010-06-29 10:07:14 +02002502
2503 __set_current_state(TASK_RUNNING);
Tejun Heo493a1722013-03-12 11:29:59 -07002504 list_del_init(&pwq->mayday_node);
2505
Tejun Heo2e109a22013-03-13 19:47:40 -07002506 spin_unlock_irq(&wq_mayday_lock);
Tejun Heoe22bee72010-06-29 10:07:14 +02002507
Lai Jiangshan51697d392014-05-20 17:46:36 +08002508 worker_attach_to_pool(rescuer, pool);
2509
2510 spin_lock_irq(&pool->lock);
Tejun Heoe22bee72010-06-29 10:07:14 +02002511
2512 /*
2513 * Slurp in all works issued via this workqueue and
2514 * process'em.
2515 */
Tejun Heo0479c8c2014-12-04 10:14:13 -05002516 WARN_ON_ONCE(!list_empty(scheduled));
Tejun Heo82607adc2015-12-08 11:28:04 -05002517 list_for_each_entry_safe(work, n, &pool->worklist, entry) {
2518 if (get_work_pwq(work) == pwq) {
2519 if (first)
2520 pool->watchdog_ts = jiffies;
Tejun Heoe22bee72010-06-29 10:07:14 +02002521 move_linked_works(work, scheduled, &n);
Tejun Heo82607adc2015-12-08 11:28:04 -05002522 }
2523 first = false;
2524 }
Tejun Heoe22bee72010-06-29 10:07:14 +02002525
NeilBrown008847f2014-12-08 12:39:16 -05002526 if (!list_empty(scheduled)) {
2527 process_scheduled_works(rescuer);
2528
2529 /*
2530 * The above execution of rescued work items could
2531 * have created more to rescue through
2532 * pwq_activate_first_delayed() or chained
2533 * queueing. Let's put @pwq back on mayday list so
2534 * that such back-to-back work items, which may be
2535 * being used to relieve memory pressure, don't
2536 * incur MAYDAY_INTERVAL delay inbetween.
2537 */
2538 if (need_to_create_worker(pool)) {
2539 spin_lock(&wq_mayday_lock);
2540 get_pwq(pwq);
2541 list_move_tail(&pwq->mayday_node, &wq->maydays);
2542 spin_unlock(&wq_mayday_lock);
2543 }
2544 }
Tejun Heo75769582011-02-14 14:04:46 +01002545
2546 /*
Lai Jiangshan77668c82014-04-18 11:04:16 -04002547 * Put the reference grabbed by send_mayday(). @pool won't
Lai Jiangshan13b1d622014-07-22 13:03:47 +08002548 * go away while we're still attached to it.
Lai Jiangshan77668c82014-04-18 11:04:16 -04002549 */
2550 put_pwq(pwq);
2551
2552 /*
Lai Jiangshand8ca83e2014-07-16 14:56:36 +08002553 * Leave this pool. If need_more_worker() is %true, notify a
Tejun Heo75769582011-02-14 14:04:46 +01002554 * regular worker; otherwise, we end up with 0 concurrency
2555 * and stalling the execution.
2556 */
Lai Jiangshand8ca83e2014-07-16 14:56:36 +08002557 if (need_more_worker(pool))
Tejun Heo63d95a92012-07-12 14:46:37 -07002558 wake_up_worker(pool);
Tejun Heo75769582011-02-14 14:04:46 +01002559
Lai Jiangshan13b1d622014-07-22 13:03:47 +08002560 spin_unlock_irq(&pool->lock);
2561
Tejun Heoa2d812a2018-05-18 08:47:13 -07002562 worker_detach_from_pool(rescuer);
Lai Jiangshan13b1d622014-07-22 13:03:47 +08002563
2564 spin_lock_irq(&wq_mayday_lock);
Tejun Heoe22bee72010-06-29 10:07:14 +02002565 }
2566
Tejun Heo2e109a22013-03-13 19:47:40 -07002567 spin_unlock_irq(&wq_mayday_lock);
Tejun Heo493a1722013-03-12 11:29:59 -07002568
Lai Jiangshan4d595b82014-04-18 11:04:16 -04002569 if (should_stop) {
2570 __set_current_state(TASK_RUNNING);
Tejun Heo197f6ac2018-05-21 08:04:35 -07002571 set_pf_worker(false);
Lai Jiangshan4d595b82014-04-18 11:04:16 -04002572 return 0;
2573 }
2574
Tejun Heo111c2252013-01-17 17:16:24 -08002575 /* rescuers should never participate in concurrency management */
2576 WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
Tejun Heoe22bee72010-06-29 10:07:14 +02002577 schedule();
2578 goto repeat;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002579}
2580
Tejun Heofca839c2015-12-07 10:58:57 -05002581/**
2582 * check_flush_dependency - check for flush dependency sanity
2583 * @target_wq: workqueue being flushed
2584 * @target_work: work item being flushed (NULL for workqueue flushes)
2585 *
2586 * %current is trying to flush the whole @target_wq or @target_work on it.
2587 * If @target_wq doesn't have %WQ_MEM_RECLAIM, verify that %current is not
2588 * reclaiming memory or running on a workqueue which doesn't have
2589 * %WQ_MEM_RECLAIM as that can break forward-progress guarantee leading to
2590 * a deadlock.
2591 */
2592static void check_flush_dependency(struct workqueue_struct *target_wq,
2593 struct work_struct *target_work)
2594{
2595 work_func_t target_func = target_work ? target_work->func : NULL;
2596 struct worker *worker;
2597
2598 if (target_wq->flags & WQ_MEM_RECLAIM)
2599 return;
2600
2601 worker = current_wq_worker();
2602
2603 WARN_ONCE(current->flags & PF_MEMALLOC,
2604 "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
2605 current->pid, current->comm, target_wq->name, target_func);
Tejun Heo23d11a52016-01-29 05:59:46 -05002606 WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
2607 (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
Tejun Heofca839c2015-12-07 10:58:57 -05002608 "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
2609 worker->current_pwq->wq->name, worker->current_func,
2610 target_wq->name, target_func);
2611}
2612
Oleg Nesterovfc2e4d72007-05-09 02:33:51 -07002613struct wq_barrier {
2614 struct work_struct work;
2615 struct completion done;
Tejun Heo2607d7a2015-03-09 09:22:28 -04002616 struct task_struct *task; /* purely informational */
Oleg Nesterovfc2e4d72007-05-09 02:33:51 -07002617};
2618
2619static void wq_barrier_func(struct work_struct *work)
2620{
2621 struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
2622 complete(&barr->done);
2623}
2624
Tejun Heo4690c4a2010-06-29 10:07:10 +02002625/**
2626 * insert_wq_barrier - insert a barrier work
Tejun Heo112202d2013-02-13 19:29:12 -08002627 * @pwq: pwq to insert barrier into
Tejun Heo4690c4a2010-06-29 10:07:10 +02002628 * @barr: wq_barrier to insert
Tejun Heoaffee4b2010-06-29 10:07:12 +02002629 * @target: target work to attach @barr to
2630 * @worker: worker currently executing @target, NULL if @target is not executing
Tejun Heo4690c4a2010-06-29 10:07:10 +02002631 *
Tejun Heoaffee4b2010-06-29 10:07:12 +02002632 * @barr is linked to @target such that @barr is completed only after
2633 * @target finishes execution. Please note that the ordering
2634 * guarantee is observed only with respect to @target and on the local
2635 * cpu.
2636 *
2637 * Currently, a queued barrier can't be canceled. This is because
2638 * try_to_grab_pending() can't determine whether the work to be
2639 * grabbed is at the head of the queue and thus can't clear LINKED
2640 * flag of the previous work while there must be a valid next work
2641 * after a work with LINKED flag set.
2642 *
2643 * Note that when @worker is non-NULL, @target may be modified
Tejun Heo112202d2013-02-13 19:29:12 -08002644 * underneath us, so we can't reliably determine pwq from @target.
Tejun Heo4690c4a2010-06-29 10:07:10 +02002645 *
2646 * CONTEXT:
Tejun Heod565ed62013-01-24 11:01:33 -08002647 * spin_lock_irq(pool->lock).
Tejun Heo4690c4a2010-06-29 10:07:10 +02002648 */
Tejun Heo112202d2013-02-13 19:29:12 -08002649static void insert_wq_barrier(struct pool_workqueue *pwq,
Tejun Heoaffee4b2010-06-29 10:07:12 +02002650 struct wq_barrier *barr,
2651 struct work_struct *target, struct worker *worker)
Oleg Nesterovfc2e4d72007-05-09 02:33:51 -07002652{
Tejun Heoaffee4b2010-06-29 10:07:12 +02002653 struct list_head *head;
2654 unsigned int linked = 0;
2655
Thomas Gleixnerdc186ad2009-11-16 01:09:48 +09002656 /*
Tejun Heod565ed62013-01-24 11:01:33 -08002657 * debugobject calls are safe here even with pool->lock locked
Thomas Gleixnerdc186ad2009-11-16 01:09:48 +09002658 * as we know for sure that this will not trigger any of the
2659 * checks and call back into the fixup functions where we
2660 * might deadlock.
2661 */
Andrew Mortonca1cab32010-10-26 14:22:34 -07002662 INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
Tejun Heo22df02b2010-06-29 10:07:10 +02002663 __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
Boqun Feng52fa5bc2017-08-17 17:46:12 +08002664
Byungchul Parkfd1a5b02017-10-25 17:56:04 +09002665 init_completion_map(&barr->done, &target->lockdep_map);
2666
Tejun Heo2607d7a2015-03-09 09:22:28 -04002667 barr->task = current;
Oleg Nesterov83c22522007-05-09 02:33:54 -07002668
Tejun Heoaffee4b2010-06-29 10:07:12 +02002669 /*
2670 * If @target is currently being executed, schedule the
2671 * barrier to the worker; otherwise, put it after @target.
2672 */
2673 if (worker)
2674 head = worker->scheduled.next;
2675 else {
2676 unsigned long *bits = work_data_bits(target);
2677
2678 head = target->entry.next;
2679 /* there can already be other linked works, inherit and set */
2680 linked = *bits & WORK_STRUCT_LINKED;
2681 __set_bit(WORK_STRUCT_LINKED_BIT, bits);
2682 }
2683
Thomas Gleixnerdc186ad2009-11-16 01:09:48 +09002684 debug_work_activate(&barr->work);
Tejun Heo112202d2013-02-13 19:29:12 -08002685 insert_work(pwq, &barr->work, head,
Tejun Heoaffee4b2010-06-29 10:07:12 +02002686 work_color_to_flags(WORK_NO_COLOR) | linked);
Oleg Nesterovfc2e4d72007-05-09 02:33:51 -07002687}
2688
Tejun Heo73f53c42010-06-29 10:07:11 +02002689/**
Tejun Heo112202d2013-02-13 19:29:12 -08002690 * flush_workqueue_prep_pwqs - prepare pwqs for workqueue flushing
Tejun Heo73f53c42010-06-29 10:07:11 +02002691 * @wq: workqueue being flushed
2692 * @flush_color: new flush color, < 0 for no-op
2693 * @work_color: new work color, < 0 for no-op
2694 *
Tejun Heo112202d2013-02-13 19:29:12 -08002695 * Prepare pwqs for workqueue flushing.
Tejun Heo73f53c42010-06-29 10:07:11 +02002696 *
Tejun Heo112202d2013-02-13 19:29:12 -08002697 * If @flush_color is non-negative, flush_color on all pwqs should be
2698 * -1. If no pwq has in-flight commands at the specified color, all
2699 * pwq->flush_color's stay at -1 and %false is returned. If any pwq
2700 * has in flight commands, its pwq->flush_color is set to
2701 * @flush_color, @wq->nr_pwqs_to_flush is updated accordingly, pwq
Tejun Heo73f53c42010-06-29 10:07:11 +02002702 * wakeup logic is armed and %true is returned.
2703 *
2704 * The caller should have initialized @wq->first_flusher prior to
2705 * calling this function with non-negative @flush_color. If
2706 * @flush_color is negative, no flush color update is done and %false
2707 * is returned.
2708 *
Tejun Heo112202d2013-02-13 19:29:12 -08002709 * If @work_color is non-negative, all pwqs should have the same
Tejun Heo73f53c42010-06-29 10:07:11 +02002710 * work_color which is previous to @work_color and all will be
2711 * advanced to @work_color.
2712 *
2713 * CONTEXT:
Lai Jiangshan3c25a552013-03-25 16:57:17 -07002714 * mutex_lock(wq->mutex).
Tejun Heo73f53c42010-06-29 10:07:11 +02002715 *
Yacine Belkadid185af32013-07-31 14:59:24 -07002716 * Return:
Tejun Heo73f53c42010-06-29 10:07:11 +02002717 * %true if @flush_color >= 0 and there's something to flush. %false
2718 * otherwise.
2719 */
Tejun Heo112202d2013-02-13 19:29:12 -08002720static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
Tejun Heo73f53c42010-06-29 10:07:11 +02002721 int flush_color, int work_color)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002722{
Tejun Heo73f53c42010-06-29 10:07:11 +02002723 bool wait = false;
Tejun Heo49e3cf42013-03-12 11:29:58 -07002724 struct pool_workqueue *pwq;
Oleg Nesterov14441962007-05-23 13:57:57 -07002725
Tejun Heo73f53c42010-06-29 10:07:11 +02002726 if (flush_color >= 0) {
Tejun Heo6183c002013-03-12 11:29:57 -07002727 WARN_ON_ONCE(atomic_read(&wq->nr_pwqs_to_flush));
Tejun Heo112202d2013-02-13 19:29:12 -08002728 atomic_set(&wq->nr_pwqs_to_flush, 1);
Thomas Gleixnerdc186ad2009-11-16 01:09:48 +09002729 }
Oleg Nesterov14441962007-05-23 13:57:57 -07002730
Tejun Heo49e3cf42013-03-12 11:29:58 -07002731 for_each_pwq(pwq, wq) {
Tejun Heo112202d2013-02-13 19:29:12 -08002732 struct worker_pool *pool = pwq->pool;
Tejun Heo73f53c42010-06-29 10:07:11 +02002733
Lai Jiangshanb09f4fd2013-03-25 16:57:18 -07002734 spin_lock_irq(&pool->lock);
Tejun Heo73f53c42010-06-29 10:07:11 +02002735
2736 if (flush_color >= 0) {
Tejun Heo6183c002013-03-12 11:29:57 -07002737 WARN_ON_ONCE(pwq->flush_color != -1);
Tejun Heo73f53c42010-06-29 10:07:11 +02002738
Tejun Heo112202d2013-02-13 19:29:12 -08002739 if (pwq->nr_in_flight[flush_color]) {
2740 pwq->flush_color = flush_color;
2741 atomic_inc(&wq->nr_pwqs_to_flush);
Tejun Heo73f53c42010-06-29 10:07:11 +02002742 wait = true;
2743 }
2744 }
2745
2746 if (work_color >= 0) {
Tejun Heo6183c002013-03-12 11:29:57 -07002747 WARN_ON_ONCE(work_color != work_next_color(pwq->work_color));
Tejun Heo112202d2013-02-13 19:29:12 -08002748 pwq->work_color = work_color;
Tejun Heo73f53c42010-06-29 10:07:11 +02002749 }
2750
Lai Jiangshanb09f4fd2013-03-25 16:57:18 -07002751 spin_unlock_irq(&pool->lock);
Tejun Heo73f53c42010-06-29 10:07:11 +02002752 }
2753
Tejun Heo112202d2013-02-13 19:29:12 -08002754 if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
Tejun Heo73f53c42010-06-29 10:07:11 +02002755 complete(&wq->first_flusher->done);
2756
2757 return wait;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002758}
2759
Rolf Eike Beer0fcb78c2006-07-30 03:03:42 -07002760/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07002761 * flush_workqueue - ensure that any scheduled work has run to completion.
Rolf Eike Beer0fcb78c2006-07-30 03:03:42 -07002762 * @wq: workqueue to flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002763 *
Tejun Heoc5aa87b2013-03-13 16:51:36 -07002764 * This function sleeps until all work items which were queued on entry
2765 * have finished execution, but it is not livelocked by new incoming ones.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002766 */
Harvey Harrison7ad5b3a2008-02-08 04:19:53 -08002767void flush_workqueue(struct workqueue_struct *wq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002768{
Tejun Heo73f53c42010-06-29 10:07:11 +02002769 struct wq_flusher this_flusher = {
2770 .list = LIST_HEAD_INIT(this_flusher.list),
2771 .flush_color = -1,
Byungchul Parkfd1a5b02017-10-25 17:56:04 +09002772 .done = COMPLETION_INITIALIZER_ONSTACK_MAP(this_flusher.done, wq->lockdep_map),
Tejun Heo73f53c42010-06-29 10:07:11 +02002773 };
2774 int next_color;
Oleg Nesterovb1f4ec172007-05-09 02:34:12 -07002775
Tejun Heo3347fa02016-09-16 15:49:32 -04002776 if (WARN_ON(!wq_online))
2777 return;
2778
Johannes Berg87915ad2018-08-22 11:49:04 +02002779 lock_map_acquire(&wq->lockdep_map);
2780 lock_map_release(&wq->lockdep_map);
2781
Lai Jiangshan3c25a552013-03-25 16:57:17 -07002782 mutex_lock(&wq->mutex);
Tejun Heo73f53c42010-06-29 10:07:11 +02002783
2784 /*
2785 * Start-to-wait phase
2786 */
2787 next_color = work_next_color(wq->work_color);
2788
2789 if (next_color != wq->flush_color) {
2790 /*
2791 * Color space is not full. The current work_color
2792 * becomes our flush_color and work_color is advanced
2793 * by one.
2794 */
Tejun Heo6183c002013-03-12 11:29:57 -07002795 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow));
Tejun Heo73f53c42010-06-29 10:07:11 +02002796 this_flusher.flush_color = wq->work_color;
2797 wq->work_color = next_color;
2798
2799 if (!wq->first_flusher) {
2800 /* no flush in progress, become the first flusher */
Tejun Heo6183c002013-03-12 11:29:57 -07002801 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
Tejun Heo73f53c42010-06-29 10:07:11 +02002802
2803 wq->first_flusher = &this_flusher;
2804
Tejun Heo112202d2013-02-13 19:29:12 -08002805 if (!flush_workqueue_prep_pwqs(wq, wq->flush_color,
Tejun Heo73f53c42010-06-29 10:07:11 +02002806 wq->work_color)) {
2807 /* nothing to flush, done */
2808 wq->flush_color = next_color;
2809 wq->first_flusher = NULL;
2810 goto out_unlock;
2811 }
2812 } else {
2813 /* wait in queue */
Tejun Heo6183c002013-03-12 11:29:57 -07002814 WARN_ON_ONCE(wq->flush_color == this_flusher.flush_color);
Tejun Heo73f53c42010-06-29 10:07:11 +02002815 list_add_tail(&this_flusher.list, &wq->flusher_queue);
Tejun Heo112202d2013-02-13 19:29:12 -08002816 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
Tejun Heo73f53c42010-06-29 10:07:11 +02002817 }
2818 } else {
2819 /*
2820 * Oops, color space is full, wait on overflow queue.
2821 * The next flush completion will assign us
2822 * flush_color and transfer to flusher_queue.
2823 */
2824 list_add_tail(&this_flusher.list, &wq->flusher_overflow);
2825 }
2826
Tejun Heofca839c2015-12-07 10:58:57 -05002827 check_flush_dependency(wq, NULL);
2828
Lai Jiangshan3c25a552013-03-25 16:57:17 -07002829 mutex_unlock(&wq->mutex);
Tejun Heo73f53c42010-06-29 10:07:11 +02002830
2831 wait_for_completion(&this_flusher.done);
2832
2833 /*
2834 * Wake-up-and-cascade phase
2835 *
2836 * First flushers are responsible for cascading flushes and
2837 * handling overflow. Non-first flushers can simply return.
2838 */
2839 if (wq->first_flusher != &this_flusher)
2840 return;
2841
Lai Jiangshan3c25a552013-03-25 16:57:17 -07002842 mutex_lock(&wq->mutex);
Tejun Heo73f53c42010-06-29 10:07:11 +02002843
Tejun Heo4ce48b32010-07-02 10:03:51 +02002844 /* we might have raced, check again with mutex held */
2845 if (wq->first_flusher != &this_flusher)
2846 goto out_unlock;
2847
Tejun Heo73f53c42010-06-29 10:07:11 +02002848 wq->first_flusher = NULL;
2849
Tejun Heo6183c002013-03-12 11:29:57 -07002850 WARN_ON_ONCE(!list_empty(&this_flusher.list));
2851 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
Tejun Heo73f53c42010-06-29 10:07:11 +02002852
2853 while (true) {
2854 struct wq_flusher *next, *tmp;
2855
2856 /* complete all the flushers sharing the current flush color */
2857 list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
2858 if (next->flush_color != wq->flush_color)
2859 break;
2860 list_del_init(&next->list);
2861 complete(&next->done);
2862 }
2863
Tejun Heo6183c002013-03-12 11:29:57 -07002864 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow) &&
2865 wq->flush_color != work_next_color(wq->work_color));
Tejun Heo73f53c42010-06-29 10:07:11 +02002866
2867 /* this flush_color is finished, advance by one */
2868 wq->flush_color = work_next_color(wq->flush_color);
2869
2870 /* one color has been freed, handle overflow queue */
2871 if (!list_empty(&wq->flusher_overflow)) {
2872 /*
2873 * Assign the same color to all overflowed
2874 * flushers, advance work_color and append to
2875 * flusher_queue. This is the start-to-wait
2876 * phase for these overflowed flushers.
2877 */
2878 list_for_each_entry(tmp, &wq->flusher_overflow, list)
2879 tmp->flush_color = wq->work_color;
2880
2881 wq->work_color = work_next_color(wq->work_color);
2882
2883 list_splice_tail_init(&wq->flusher_overflow,
2884 &wq->flusher_queue);
Tejun Heo112202d2013-02-13 19:29:12 -08002885 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
Tejun Heo73f53c42010-06-29 10:07:11 +02002886 }
2887
2888 if (list_empty(&wq->flusher_queue)) {
Tejun Heo6183c002013-03-12 11:29:57 -07002889 WARN_ON_ONCE(wq->flush_color != wq->work_color);
Tejun Heo73f53c42010-06-29 10:07:11 +02002890 break;
2891 }
2892
2893 /*
2894 * Need to flush more colors. Make the next flusher
Tejun Heo112202d2013-02-13 19:29:12 -08002895 * the new first flusher and arm pwqs.
Tejun Heo73f53c42010-06-29 10:07:11 +02002896 */
Tejun Heo6183c002013-03-12 11:29:57 -07002897 WARN_ON_ONCE(wq->flush_color == wq->work_color);
2898 WARN_ON_ONCE(wq->flush_color != next->flush_color);
Tejun Heo73f53c42010-06-29 10:07:11 +02002899
2900 list_del_init(&next->list);
2901 wq->first_flusher = next;
2902
Tejun Heo112202d2013-02-13 19:29:12 -08002903 if (flush_workqueue_prep_pwqs(wq, wq->flush_color, -1))
Tejun Heo73f53c42010-06-29 10:07:11 +02002904 break;
2905
2906 /*
2907 * Meh... this color is already done, clear first
2908 * flusher and repeat cascading.
2909 */
2910 wq->first_flusher = NULL;
2911 }
2912
2913out_unlock:
Lai Jiangshan3c25a552013-03-25 16:57:17 -07002914 mutex_unlock(&wq->mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002915}
Tim Gardner1dadafa2015-08-04 11:26:04 -06002916EXPORT_SYMBOL(flush_workqueue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002917
Tejun Heo9c5a2ba2011-04-05 18:01:44 +02002918/**
2919 * drain_workqueue - drain a workqueue
2920 * @wq: workqueue to drain
2921 *
2922 * Wait until the workqueue becomes empty. While draining is in progress,
2923 * only chain queueing is allowed. IOW, only currently pending or running
2924 * work items on @wq can queue further work items on it. @wq is flushed
Chen Hanxiaob749b1b2015-05-13 06:10:05 -04002925 * repeatedly until it becomes empty. The number of flushing is determined
Tejun Heo9c5a2ba2011-04-05 18:01:44 +02002926 * by the depth of chaining and should be relatively short. Whine if it
2927 * takes too long.
2928 */
2929void drain_workqueue(struct workqueue_struct *wq)
2930{
2931 unsigned int flush_cnt = 0;
Tejun Heo49e3cf42013-03-12 11:29:58 -07002932 struct pool_workqueue *pwq;
Tejun Heo9c5a2ba2011-04-05 18:01:44 +02002933
2934 /*
2935 * __queue_work() needs to test whether there are drainers, is much
2936 * hotter than drain_workqueue() and already looks at @wq->flags.
Tejun Heo618b01e2013-03-12 11:30:04 -07002937 * Use __WQ_DRAINING so that queue doesn't have to check nr_drainers.
Tejun Heo9c5a2ba2011-04-05 18:01:44 +02002938 */
Lai Jiangshan87fc7412013-03-25 16:57:18 -07002939 mutex_lock(&wq->mutex);
Tejun Heo9c5a2ba2011-04-05 18:01:44 +02002940 if (!wq->nr_drainers++)
Tejun Heo618b01e2013-03-12 11:30:04 -07002941 wq->flags |= __WQ_DRAINING;
Lai Jiangshan87fc7412013-03-25 16:57:18 -07002942 mutex_unlock(&wq->mutex);
Tejun Heo9c5a2ba2011-04-05 18:01:44 +02002943reflush:
2944 flush_workqueue(wq);
2945
Lai Jiangshanb09f4fd2013-03-25 16:57:18 -07002946 mutex_lock(&wq->mutex);
Tejun Heo76af4d92013-03-12 11:30:00 -07002947
Tejun Heo49e3cf42013-03-12 11:29:58 -07002948 for_each_pwq(pwq, wq) {
Thomas Tuttlefa2563e2011-09-14 16:22:28 -07002949 bool drained;
Tejun Heo9c5a2ba2011-04-05 18:01:44 +02002950
Lai Jiangshanb09f4fd2013-03-25 16:57:18 -07002951 spin_lock_irq(&pwq->pool->lock);
Tejun Heo112202d2013-02-13 19:29:12 -08002952 drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
Lai Jiangshanb09f4fd2013-03-25 16:57:18 -07002953 spin_unlock_irq(&pwq->pool->lock);
Thomas Tuttlefa2563e2011-09-14 16:22:28 -07002954
2955 if (drained)
Tejun Heo9c5a2ba2011-04-05 18:01:44 +02002956 continue;
2957
2958 if (++flush_cnt == 10 ||
2959 (flush_cnt % 100 == 0 && flush_cnt <= 1000))
Tejun Heoc5aa87b2013-03-13 16:51:36 -07002960 pr_warn("workqueue %s: drain_workqueue() isn't complete after %u tries\n",
Valentin Ilie044c7822012-08-19 00:52:42 +03002961 wq->name, flush_cnt);
Tejun Heo76af4d92013-03-12 11:30:00 -07002962
Lai Jiangshanb09f4fd2013-03-25 16:57:18 -07002963 mutex_unlock(&wq->mutex);
Tejun Heo9c5a2ba2011-04-05 18:01:44 +02002964 goto reflush;
2965 }
2966
Tejun Heo9c5a2ba2011-04-05 18:01:44 +02002967 if (!--wq->nr_drainers)
Tejun Heo618b01e2013-03-12 11:30:04 -07002968 wq->flags &= ~__WQ_DRAINING;
Lai Jiangshan87fc7412013-03-25 16:57:18 -07002969 mutex_unlock(&wq->mutex);
Tejun Heo9c5a2ba2011-04-05 18:01:44 +02002970}
2971EXPORT_SYMBOL_GPL(drain_workqueue);
2972
Johannes Bergd6e89782018-08-22 11:49:03 +02002973static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
2974 bool from_cancel)
Tejun Heobaf59022010-09-16 10:42:16 +02002975{
2976 struct worker *worker = NULL;
Tejun Heoc9e7cf22013-01-24 11:01:33 -08002977 struct worker_pool *pool;
Tejun Heo112202d2013-02-13 19:29:12 -08002978 struct pool_workqueue *pwq;
Tejun Heobaf59022010-09-16 10:42:16 +02002979
2980 might_sleep();
Tejun Heobaf59022010-09-16 10:42:16 +02002981
Thomas Gleixner24acfb72019-03-13 17:55:47 +01002982 rcu_read_lock();
Tejun Heofa1b54e2013-03-12 11:30:00 -07002983 pool = get_work_pool(work);
2984 if (!pool) {
Thomas Gleixner24acfb72019-03-13 17:55:47 +01002985 rcu_read_unlock();
Tejun Heofa1b54e2013-03-12 11:30:00 -07002986 return false;
2987 }
2988
Thomas Gleixner24acfb72019-03-13 17:55:47 +01002989 spin_lock_irq(&pool->lock);
Lai Jiangshan0b3dae62013-02-06 18:04:53 -08002990 /* see the comment in try_to_grab_pending() with the same code */
Tejun Heo112202d2013-02-13 19:29:12 -08002991 pwq = get_work_pwq(work);
2992 if (pwq) {
2993 if (unlikely(pwq->pool != pool))
Tejun Heobaf59022010-09-16 10:42:16 +02002994 goto already_gone;
Tejun Heo606a5022012-08-20 14:51:23 -07002995 } else {
Tejun Heoc9e7cf22013-01-24 11:01:33 -08002996 worker = find_worker_executing_work(pool, work);
Tejun Heobaf59022010-09-16 10:42:16 +02002997 if (!worker)
2998 goto already_gone;
Tejun Heo112202d2013-02-13 19:29:12 -08002999 pwq = worker->current_pwq;
Tejun Heo606a5022012-08-20 14:51:23 -07003000 }
Tejun Heobaf59022010-09-16 10:42:16 +02003001
Tejun Heofca839c2015-12-07 10:58:57 -05003002 check_flush_dependency(pwq->wq, work);
3003
Tejun Heo112202d2013-02-13 19:29:12 -08003004 insert_wq_barrier(pwq, barr, work, worker);
Tejun Heod565ed62013-01-24 11:01:33 -08003005 spin_unlock_irq(&pool->lock);
Tejun Heobaf59022010-09-16 10:42:16 +02003006
Tejun Heoe159489b2011-01-09 23:32:15 +01003007 /*
Peter Zijlstraa1d14932017-08-23 12:52:32 +02003008 * Force a lock recursion deadlock when using flush_work() inside a
3009 * single-threaded or rescuer equipped workqueue.
3010 *
3011 * For single threaded workqueues the deadlock happens when the work
3012 * is after the work issuing the flush_work(). For rescuer equipped
3013 * workqueues the deadlock happens when the rescuer stalls, blocking
3014 * forward progress.
Tejun Heoe159489b2011-01-09 23:32:15 +01003015 */
Johannes Bergd6e89782018-08-22 11:49:03 +02003016 if (!from_cancel &&
3017 (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer)) {
Tejun Heo112202d2013-02-13 19:29:12 -08003018 lock_map_acquire(&pwq->wq->lockdep_map);
Peter Zijlstraa1d14932017-08-23 12:52:32 +02003019 lock_map_release(&pwq->wq->lockdep_map);
3020 }
Thomas Gleixner24acfb72019-03-13 17:55:47 +01003021 rcu_read_unlock();
Tejun Heobaf59022010-09-16 10:42:16 +02003022 return true;
3023already_gone:
Tejun Heod565ed62013-01-24 11:01:33 -08003024 spin_unlock_irq(&pool->lock);
Thomas Gleixner24acfb72019-03-13 17:55:47 +01003025 rcu_read_unlock();
Tejun Heobaf59022010-09-16 10:42:16 +02003026 return false;
3027}
3028
Johannes Bergd6e89782018-08-22 11:49:03 +02003029static bool __flush_work(struct work_struct *work, bool from_cancel)
3030{
3031 struct wq_barrier barr;
3032
3033 if (WARN_ON(!wq_online))
3034 return false;
3035
Tetsuo Handa4d43d392019-01-23 09:44:12 +09003036 if (WARN_ON(!work->func))
3037 return false;
3038
Johannes Berg87915ad2018-08-22 11:49:04 +02003039 if (!from_cancel) {
3040 lock_map_acquire(&work->lockdep_map);
3041 lock_map_release(&work->lockdep_map);
3042 }
3043
Johannes Bergd6e89782018-08-22 11:49:03 +02003044 if (start_flush_work(work, &barr, from_cancel)) {
3045 wait_for_completion(&barr.done);
3046 destroy_work_on_stack(&barr.work);
3047 return true;
3048 } else {
3049 return false;
3050 }
3051}
3052
Oleg Nesterovdb700892008-07-25 01:47:49 -07003053/**
Tejun Heo401a8d02010-09-16 10:36:00 +02003054 * flush_work - wait for a work to finish executing the last queueing instance
3055 * @work: the work to flush
Oleg Nesterovdb700892008-07-25 01:47:49 -07003056 *
Tejun Heo606a5022012-08-20 14:51:23 -07003057 * Wait until @work has finished execution. @work is guaranteed to be idle
3058 * on return if it hasn't been requeued since flush started.
Tejun Heo401a8d02010-09-16 10:36:00 +02003059 *
Yacine Belkadid185af32013-07-31 14:59:24 -07003060 * Return:
Tejun Heo401a8d02010-09-16 10:36:00 +02003061 * %true if flush_work() waited for the work to finish execution,
3062 * %false if it was already idle.
Oleg Nesterovdb700892008-07-25 01:47:49 -07003063 */
Tejun Heo401a8d02010-09-16 10:36:00 +02003064bool flush_work(struct work_struct *work)
Oleg Nesterovdb700892008-07-25 01:47:49 -07003065{
Johannes Bergd6e89782018-08-22 11:49:03 +02003066 return __flush_work(work, false);
Oleg Nesterovdb700892008-07-25 01:47:49 -07003067}
3068EXPORT_SYMBOL_GPL(flush_work);
3069
Tejun Heo8603e1b32015-03-05 08:04:13 -05003070struct cwt_wait {
Ingo Molnarac6424b2017-06-20 12:06:13 +02003071 wait_queue_entry_t wait;
Tejun Heo8603e1b32015-03-05 08:04:13 -05003072 struct work_struct *work;
3073};
3074
Ingo Molnarac6424b2017-06-20 12:06:13 +02003075static int cwt_wakefn(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
Tejun Heo8603e1b32015-03-05 08:04:13 -05003076{
3077 struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
3078
3079 if (cwait->work != key)
3080 return 0;
3081 return autoremove_wake_function(wait, mode, sync, key);
3082}
3083
Tejun Heo36e227d2012-08-03 10:30:46 -07003084static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
Tejun Heo401a8d02010-09-16 10:36:00 +02003085{
Tejun Heo8603e1b32015-03-05 08:04:13 -05003086 static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq);
Tejun Heobbb68df2012-08-03 10:30:46 -07003087 unsigned long flags;
Oleg Nesterov1f1f6422007-07-15 23:41:44 -07003088 int ret;
3089
3090 do {
Tejun Heobbb68df2012-08-03 10:30:46 -07003091 ret = try_to_grab_pending(work, is_dwork, &flags);
3092 /*
Tejun Heo8603e1b32015-03-05 08:04:13 -05003093 * If someone else is already canceling, wait for it to
3094 * finish. flush_work() doesn't work for PREEMPT_NONE
3095 * because we may get scheduled between @work's completion
3096 * and the other canceling task resuming and clearing
3097 * CANCELING - flush_work() will return false immediately
3098 * as @work is no longer busy, try_to_grab_pending() will
3099 * return -ENOENT as @work is still being canceled and the
3100 * other canceling task won't be able to clear CANCELING as
3101 * we're hogging the CPU.
3102 *
3103 * Let's wait for completion using a waitqueue. As this
3104 * may lead to the thundering herd problem, use a custom
3105 * wake function which matches @work along with exclusive
3106 * wait and wakeup.
Tejun Heobbb68df2012-08-03 10:30:46 -07003107 */
Tejun Heo8603e1b32015-03-05 08:04:13 -05003108 if (unlikely(ret == -ENOENT)) {
3109 struct cwt_wait cwait;
3110
3111 init_wait(&cwait.wait);
3112 cwait.wait.func = cwt_wakefn;
3113 cwait.work = work;
3114
3115 prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait,
3116 TASK_UNINTERRUPTIBLE);
3117 if (work_is_canceling(work))
3118 schedule();
3119 finish_wait(&cancel_waitq, &cwait.wait);
3120 }
Oleg Nesterov1f1f6422007-07-15 23:41:44 -07003121 } while (unlikely(ret < 0));
3122
Tejun Heobbb68df2012-08-03 10:30:46 -07003123 /* tell other tasks trying to grab @work to back off */
3124 mark_work_canceling(work);
3125 local_irq_restore(flags);
3126
Tejun Heo3347fa02016-09-16 15:49:32 -04003127 /*
3128 * This allows canceling during early boot. We know that @work
3129 * isn't executing.
3130 */
3131 if (wq_online)
Johannes Bergd6e89782018-08-22 11:49:03 +02003132 __flush_work(work, true);
Tejun Heo3347fa02016-09-16 15:49:32 -04003133
Tejun Heo7a22ad72010-06-29 10:07:13 +02003134 clear_work_data(work);
Tejun Heo8603e1b32015-03-05 08:04:13 -05003135
3136 /*
3137 * Paired with prepare_to_wait() above so that either
3138 * waitqueue_active() is visible here or !work_is_canceling() is
3139 * visible there.
3140 */
3141 smp_mb();
3142 if (waitqueue_active(&cancel_waitq))
3143 __wake_up(&cancel_waitq, TASK_NORMAL, 1, work);
3144
Oleg Nesterov1f1f6422007-07-15 23:41:44 -07003145 return ret;
3146}
3147
Oleg Nesterov6e84d642007-05-09 02:34:46 -07003148/**
Tejun Heo401a8d02010-09-16 10:36:00 +02003149 * cancel_work_sync - cancel a work and wait for it to finish
3150 * @work: the work to cancel
Oleg Nesterov6e84d642007-05-09 02:34:46 -07003151 *
Tejun Heo401a8d02010-09-16 10:36:00 +02003152 * Cancel @work and wait for its execution to finish. This function
3153 * can be used even if the work re-queues itself or migrates to
3154 * another workqueue. On return from this function, @work is
3155 * guaranteed to be not pending or executing on any CPU.
Oleg Nesterov1f1f6422007-07-15 23:41:44 -07003156 *
Tejun Heo401a8d02010-09-16 10:36:00 +02003157 * cancel_work_sync(&delayed_work->work) must not be used for
3158 * delayed_work's. Use cancel_delayed_work_sync() instead.
Oleg Nesterov6e84d642007-05-09 02:34:46 -07003159 *
Tejun Heo401a8d02010-09-16 10:36:00 +02003160 * The caller must ensure that the workqueue on which @work was last
Oleg Nesterov6e84d642007-05-09 02:34:46 -07003161 * queued can't be destroyed before this function returns.
Tejun Heo401a8d02010-09-16 10:36:00 +02003162 *
Yacine Belkadid185af32013-07-31 14:59:24 -07003163 * Return:
Tejun Heo401a8d02010-09-16 10:36:00 +02003164 * %true if @work was pending, %false otherwise.
Oleg Nesterov6e84d642007-05-09 02:34:46 -07003165 */
Tejun Heo401a8d02010-09-16 10:36:00 +02003166bool cancel_work_sync(struct work_struct *work)
Oleg Nesterov6e84d642007-05-09 02:34:46 -07003167{
Tejun Heo36e227d2012-08-03 10:30:46 -07003168 return __cancel_work_timer(work, false);
Oleg Nesterovb89deed2007-05-09 02:33:52 -07003169}
Oleg Nesterov28e53bd2007-05-09 02:34:22 -07003170EXPORT_SYMBOL_GPL(cancel_work_sync);
Oleg Nesterovb89deed2007-05-09 02:33:52 -07003171
Oleg Nesterov6e84d642007-05-09 02:34:46 -07003172/**
Tejun Heo401a8d02010-09-16 10:36:00 +02003173 * flush_delayed_work - wait for a dwork to finish executing the last queueing
3174 * @dwork: the delayed work to flush
Oleg Nesterov6e84d642007-05-09 02:34:46 -07003175 *
Tejun Heo401a8d02010-09-16 10:36:00 +02003176 * Delayed timer is cancelled and the pending work is queued for
3177 * immediate execution. Like flush_work(), this function only
3178 * considers the last queueing instance of @dwork.
Oleg Nesterov1f1f6422007-07-15 23:41:44 -07003179 *
Yacine Belkadid185af32013-07-31 14:59:24 -07003180 * Return:
Tejun Heo401a8d02010-09-16 10:36:00 +02003181 * %true if flush_work() waited for the work to finish execution,
3182 * %false if it was already idle.
Oleg Nesterov6e84d642007-05-09 02:34:46 -07003183 */
Tejun Heo401a8d02010-09-16 10:36:00 +02003184bool flush_delayed_work(struct delayed_work *dwork)
3185{
Tejun Heo8930cab2012-08-03 10:30:45 -07003186 local_irq_disable();
Tejun Heo401a8d02010-09-16 10:36:00 +02003187 if (del_timer_sync(&dwork->timer))
Lai Jiangshan60c057b2013-02-06 18:04:53 -08003188 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
Tejun Heo8930cab2012-08-03 10:30:45 -07003189 local_irq_enable();
Tejun Heo401a8d02010-09-16 10:36:00 +02003190 return flush_work(&dwork->work);
3191}
3192EXPORT_SYMBOL(flush_delayed_work);
3193
Tejun Heo05f0fe62018-03-14 12:45:13 -07003194/**
3195 * flush_rcu_work - wait for a rwork to finish executing the last queueing
3196 * @rwork: the rcu work to flush
3197 *
3198 * Return:
3199 * %true if flush_rcu_work() waited for the work to finish execution,
3200 * %false if it was already idle.
3201 */
3202bool flush_rcu_work(struct rcu_work *rwork)
3203{
3204 if (test_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&rwork->work))) {
3205 rcu_barrier();
3206 flush_work(&rwork->work);
3207 return true;
3208 } else {
3209 return flush_work(&rwork->work);
3210 }
3211}
3212EXPORT_SYMBOL(flush_rcu_work);
3213
Jens Axboef72b8792016-08-24 15:51:50 -06003214static bool __cancel_work(struct work_struct *work, bool is_dwork)
3215{
3216 unsigned long flags;
3217 int ret;
3218
3219 do {
3220 ret = try_to_grab_pending(work, is_dwork, &flags);
3221 } while (unlikely(ret == -EAGAIN));
3222
3223 if (unlikely(ret < 0))
3224 return false;
3225
3226 set_work_pool_and_clear_pending(work, get_work_pool_id(work));
3227 local_irq_restore(flags);
3228 return ret;
3229}
3230
Tejun Heo401a8d02010-09-16 10:36:00 +02003231/**
Tejun Heo57b30ae2012-08-21 13:18:24 -07003232 * cancel_delayed_work - cancel a delayed work
3233 * @dwork: delayed_work to cancel
Tejun Heo09383492010-09-16 10:48:29 +02003234 *
Yacine Belkadid185af32013-07-31 14:59:24 -07003235 * Kill off a pending delayed_work.
3236 *
3237 * Return: %true if @dwork was pending and canceled; %false if it wasn't
3238 * pending.
3239 *
3240 * Note:
3241 * The work callback function may still be running on return, unless
3242 * it returns %true and the work doesn't re-arm itself. Explicitly flush or
3243 * use cancel_delayed_work_sync() to wait on it.
Tejun Heo09383492010-09-16 10:48:29 +02003244 *
Tejun Heo57b30ae2012-08-21 13:18:24 -07003245 * This function is safe to call from any context including IRQ handler.
Tejun Heo09383492010-09-16 10:48:29 +02003246 */
Tejun Heo57b30ae2012-08-21 13:18:24 -07003247bool cancel_delayed_work(struct delayed_work *dwork)
Tejun Heo09383492010-09-16 10:48:29 +02003248{
Jens Axboef72b8792016-08-24 15:51:50 -06003249 return __cancel_work(&dwork->work, true);
Tejun Heo09383492010-09-16 10:48:29 +02003250}
Tejun Heo57b30ae2012-08-21 13:18:24 -07003251EXPORT_SYMBOL(cancel_delayed_work);
Tejun Heo09383492010-09-16 10:48:29 +02003252
3253/**
Tejun Heo401a8d02010-09-16 10:36:00 +02003254 * cancel_delayed_work_sync - cancel a delayed work and wait for it to finish
3255 * @dwork: the delayed work cancel
3256 *
3257 * This is cancel_work_sync() for delayed works.
3258 *
Yacine Belkadid185af32013-07-31 14:59:24 -07003259 * Return:
Tejun Heo401a8d02010-09-16 10:36:00 +02003260 * %true if @dwork was pending, %false otherwise.
3261 */
3262bool cancel_delayed_work_sync(struct delayed_work *dwork)
Oleg Nesterov6e84d642007-05-09 02:34:46 -07003263{
Tejun Heo36e227d2012-08-03 10:30:46 -07003264 return __cancel_work_timer(&dwork->work, true);
Oleg Nesterov6e84d642007-05-09 02:34:46 -07003265}
Oleg Nesterovf5a421a2007-07-15 23:41:44 -07003266EXPORT_SYMBOL(cancel_delayed_work_sync);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003267
Rolf Eike Beer0fcb78c2006-07-30 03:03:42 -07003268/**
Tejun Heo31ddd872010-10-19 11:14:49 +02003269 * schedule_on_each_cpu - execute a function synchronously on each online CPU
Andrew Mortonb6136772006-06-25 05:47:49 -07003270 * @func: the function to call
Andrew Mortonb6136772006-06-25 05:47:49 -07003271 *
Tejun Heo31ddd872010-10-19 11:14:49 +02003272 * schedule_on_each_cpu() executes @func on each online CPU using the
3273 * system workqueue and blocks until all CPUs have completed.
Andrew Mortonb6136772006-06-25 05:47:49 -07003274 * schedule_on_each_cpu() is very slow.
Tejun Heo31ddd872010-10-19 11:14:49 +02003275 *
Yacine Belkadid185af32013-07-31 14:59:24 -07003276 * Return:
Tejun Heo31ddd872010-10-19 11:14:49 +02003277 * 0 on success, -errno on failure.
Andrew Mortonb6136772006-06-25 05:47:49 -07003278 */
David Howells65f27f32006-11-22 14:55:48 +00003279int schedule_on_each_cpu(work_func_t func)
Christoph Lameter15316ba82006-01-08 01:00:43 -08003280{
3281 int cpu;
Namhyung Kim38f51562010-08-08 14:24:09 +02003282 struct work_struct __percpu *works;
Christoph Lameter15316ba82006-01-08 01:00:43 -08003283
Andrew Mortonb6136772006-06-25 05:47:49 -07003284 works = alloc_percpu(struct work_struct);
3285 if (!works)
Christoph Lameter15316ba82006-01-08 01:00:43 -08003286 return -ENOMEM;
Andrew Mortonb6136772006-06-25 05:47:49 -07003287
Gautham R Shenoy95402b32008-01-25 21:08:02 +01003288 get_online_cpus();
Tejun Heo93981802009-11-17 14:06:20 -08003289
Christoph Lameter15316ba82006-01-08 01:00:43 -08003290 for_each_online_cpu(cpu) {
Ingo Molnar9bfb1832006-12-18 20:05:09 +01003291 struct work_struct *work = per_cpu_ptr(works, cpu);
3292
3293 INIT_WORK(work, func);
Tejun Heob71ab8c2010-06-29 10:07:14 +02003294 schedule_work_on(cpu, work);
Andi Kleen65a64462009-10-14 06:22:47 +02003295 }
Tejun Heo93981802009-11-17 14:06:20 -08003296
3297 for_each_online_cpu(cpu)
3298 flush_work(per_cpu_ptr(works, cpu));
3299
Gautham R Shenoy95402b32008-01-25 21:08:02 +01003300 put_online_cpus();
Andrew Mortonb6136772006-06-25 05:47:49 -07003301 free_percpu(works);
Christoph Lameter15316ba82006-01-08 01:00:43 -08003302 return 0;
3303}
3304
Alan Sterneef6a7d2010-02-12 17:39:21 +09003305/**
James Bottomley1fa44ec2006-02-23 12:43:43 -06003306 * execute_in_process_context - reliably execute the routine with user context
3307 * @fn: the function to execute
James Bottomley1fa44ec2006-02-23 12:43:43 -06003308 * @ew: guaranteed storage for the execute work structure (must
3309 * be available when the work executes)
3310 *
3311 * Executes the function immediately if process context is available,
3312 * otherwise schedules the function for delayed execution.
3313 *
Yacine Belkadid185af32013-07-31 14:59:24 -07003314 * Return: 0 - function was executed
James Bottomley1fa44ec2006-02-23 12:43:43 -06003315 * 1 - function was scheduled for execution
3316 */
David Howells65f27f32006-11-22 14:55:48 +00003317int execute_in_process_context(work_func_t fn, struct execute_work *ew)
James Bottomley1fa44ec2006-02-23 12:43:43 -06003318{
3319 if (!in_interrupt()) {
David Howells65f27f32006-11-22 14:55:48 +00003320 fn(&ew->work);
James Bottomley1fa44ec2006-02-23 12:43:43 -06003321 return 0;
3322 }
3323
David Howells65f27f32006-11-22 14:55:48 +00003324 INIT_WORK(&ew->work, fn);
James Bottomley1fa44ec2006-02-23 12:43:43 -06003325 schedule_work(&ew->work);
3326
3327 return 1;
3328}
3329EXPORT_SYMBOL_GPL(execute_in_process_context);
3330
Tejun Heo7a4e3442013-03-12 11:30:00 -07003331/**
3332 * free_workqueue_attrs - free a workqueue_attrs
3333 * @attrs: workqueue_attrs to free
3334 *
3335 * Undo alloc_workqueue_attrs().
3336 */
3337void free_workqueue_attrs(struct workqueue_attrs *attrs)
3338{
3339 if (attrs) {
3340 free_cpumask_var(attrs->cpumask);
3341 kfree(attrs);
3342 }
3343}
3344
3345/**
3346 * alloc_workqueue_attrs - allocate a workqueue_attrs
3347 * @gfp_mask: allocation mask to use
3348 *
3349 * Allocate a new workqueue_attrs, initialize with default settings and
Yacine Belkadid185af32013-07-31 14:59:24 -07003350 * return it.
3351 *
3352 * Return: The allocated new workqueue_attr on success. %NULL on failure.
Tejun Heo7a4e3442013-03-12 11:30:00 -07003353 */
3354struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask)
3355{
3356 struct workqueue_attrs *attrs;
3357
3358 attrs = kzalloc(sizeof(*attrs), gfp_mask);
3359 if (!attrs)
3360 goto fail;
3361 if (!alloc_cpumask_var(&attrs->cpumask, gfp_mask))
3362 goto fail;
3363
Tejun Heo13e2e552013-04-01 11:23:31 -07003364 cpumask_copy(attrs->cpumask, cpu_possible_mask);
Tejun Heo7a4e3442013-03-12 11:30:00 -07003365 return attrs;
3366fail:
3367 free_workqueue_attrs(attrs);
3368 return NULL;
3369}
3370
Tejun Heo29c91e92013-03-12 11:30:03 -07003371static void copy_workqueue_attrs(struct workqueue_attrs *to,
3372 const struct workqueue_attrs *from)
3373{
3374 to->nice = from->nice;
3375 cpumask_copy(to->cpumask, from->cpumask);
Shaohua Li2865a8f2013-08-01 09:56:36 +08003376 /*
3377 * Unlike hash and equality test, this function doesn't ignore
3378 * ->no_numa as it is used for both pool and wq attrs. Instead,
3379 * get_unbound_pool() explicitly clears ->no_numa after copying.
3380 */
3381 to->no_numa = from->no_numa;
Tejun Heo29c91e92013-03-12 11:30:03 -07003382}
3383
Tejun Heo29c91e92013-03-12 11:30:03 -07003384/* hash value of the content of @attr */
3385static u32 wqattrs_hash(const struct workqueue_attrs *attrs)
3386{
3387 u32 hash = 0;
3388
3389 hash = jhash_1word(attrs->nice, hash);
Tejun Heo13e2e552013-04-01 11:23:31 -07003390 hash = jhash(cpumask_bits(attrs->cpumask),
3391 BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash);
Tejun Heo29c91e92013-03-12 11:30:03 -07003392 return hash;
3393}
3394
3395/* content equality test */
3396static bool wqattrs_equal(const struct workqueue_attrs *a,
3397 const struct workqueue_attrs *b)
3398{
3399 if (a->nice != b->nice)
3400 return false;
3401 if (!cpumask_equal(a->cpumask, b->cpumask))
3402 return false;
3403 return true;
3404}
3405
Tejun Heo7a4e3442013-03-12 11:30:00 -07003406/**
3407 * init_worker_pool - initialize a newly zalloc'd worker_pool
3408 * @pool: worker_pool to initialize
3409 *
Shailendra Verma402dd892015-05-23 10:38:14 +05303410 * Initialize a newly zalloc'd @pool. It also allocates @pool->attrs.
Yacine Belkadid185af32013-07-31 14:59:24 -07003411 *
3412 * Return: 0 on success, -errno on failure. Even on failure, all fields
Tejun Heo29c91e92013-03-12 11:30:03 -07003413 * inside @pool proper are initialized and put_unbound_pool() can be called
3414 * on @pool safely to release it.
Tejun Heo7a4e3442013-03-12 11:30:00 -07003415 */
3416static int init_worker_pool(struct worker_pool *pool)
Tejun Heo4e1a1f92013-03-12 11:30:00 -07003417{
3418 spin_lock_init(&pool->lock);
Tejun Heo29c91e92013-03-12 11:30:03 -07003419 pool->id = -1;
3420 pool->cpu = -1;
Tejun Heof3f90ad2013-04-01 11:23:34 -07003421 pool->node = NUMA_NO_NODE;
Tejun Heo4e1a1f92013-03-12 11:30:00 -07003422 pool->flags |= POOL_DISASSOCIATED;
Tejun Heo82607adc2015-12-08 11:28:04 -05003423 pool->watchdog_ts = jiffies;
Tejun Heo4e1a1f92013-03-12 11:30:00 -07003424 INIT_LIST_HEAD(&pool->worklist);
3425 INIT_LIST_HEAD(&pool->idle_list);
3426 hash_init(pool->busy_hash);
3427
Kees Cook32a6c722017-10-16 15:58:25 -07003428 timer_setup(&pool->idle_timer, idle_worker_timeout, TIMER_DEFERRABLE);
Tejun Heo4e1a1f92013-03-12 11:30:00 -07003429
Kees Cook32a6c722017-10-16 15:58:25 -07003430 timer_setup(&pool->mayday_timer, pool_mayday_timeout, 0);
Tejun Heo4e1a1f92013-03-12 11:30:00 -07003431
Lai Jiangshanda028462014-05-20 17:46:31 +08003432 INIT_LIST_HEAD(&pool->workers);
Tejun Heo7a4e3442013-03-12 11:30:00 -07003433
Lai Jiangshan7cda9aa2014-05-20 17:46:32 +08003434 ida_init(&pool->worker_ida);
Tejun Heo29c91e92013-03-12 11:30:03 -07003435 INIT_HLIST_NODE(&pool->hash_node);
3436 pool->refcnt = 1;
3437
3438 /* shouldn't fail above this point */
Tejun Heo7a4e3442013-03-12 11:30:00 -07003439 pool->attrs = alloc_workqueue_attrs(GFP_KERNEL);
3440 if (!pool->attrs)
3441 return -ENOMEM;
3442 return 0;
Tejun Heo4e1a1f92013-03-12 11:30:00 -07003443}
3444
Bart Van Assche669de8b2019-02-14 15:00:54 -08003445#ifdef CONFIG_LOCKDEP
3446static void wq_init_lockdep(struct workqueue_struct *wq)
3447{
3448 char *lock_name;
3449
3450 lockdep_register_key(&wq->key);
3451 lock_name = kasprintf(GFP_KERNEL, "%s%s", "(wq_completion)", wq->name);
3452 if (!lock_name)
3453 lock_name = wq->name;
Qian Cai69a106c2019-03-06 19:27:31 -05003454
3455 wq->lock_name = lock_name;
Bart Van Assche669de8b2019-02-14 15:00:54 -08003456 lockdep_init_map(&wq->lockdep_map, lock_name, &wq->key, 0);
3457}
3458
3459static void wq_unregister_lockdep(struct workqueue_struct *wq)
3460{
3461 lockdep_unregister_key(&wq->key);
3462}
3463
3464static void wq_free_lockdep(struct workqueue_struct *wq)
3465{
3466 if (wq->lock_name != wq->name)
3467 kfree(wq->lock_name);
3468}
3469#else
3470static void wq_init_lockdep(struct workqueue_struct *wq)
3471{
3472}
3473
3474static void wq_unregister_lockdep(struct workqueue_struct *wq)
3475{
3476}
3477
3478static void wq_free_lockdep(struct workqueue_struct *wq)
3479{
3480}
3481#endif
3482
Tejun Heoe2dca7a2015-03-09 09:22:28 -04003483static void rcu_free_wq(struct rcu_head *rcu)
3484{
3485 struct workqueue_struct *wq =
3486 container_of(rcu, struct workqueue_struct, rcu);
3487
Bart Van Assche669de8b2019-02-14 15:00:54 -08003488 wq_free_lockdep(wq);
3489
Tejun Heoe2dca7a2015-03-09 09:22:28 -04003490 if (!(wq->flags & WQ_UNBOUND))
3491 free_percpu(wq->cpu_pwqs);
3492 else
3493 free_workqueue_attrs(wq->unbound_attrs);
3494
3495 kfree(wq->rescuer);
3496 kfree(wq);
3497}
3498
Tejun Heo29c91e92013-03-12 11:30:03 -07003499static void rcu_free_pool(struct rcu_head *rcu)
3500{
3501 struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
3502
Lai Jiangshan7cda9aa2014-05-20 17:46:32 +08003503 ida_destroy(&pool->worker_ida);
Tejun Heo29c91e92013-03-12 11:30:03 -07003504 free_workqueue_attrs(pool->attrs);
3505 kfree(pool);
3506}
3507
3508/**
3509 * put_unbound_pool - put a worker_pool
3510 * @pool: worker_pool to put
3511 *
Thomas Gleixner24acfb72019-03-13 17:55:47 +01003512 * Put @pool. If its refcnt reaches zero, it gets destroyed in RCU
Tejun Heoc5aa87b2013-03-13 16:51:36 -07003513 * safe manner. get_unbound_pool() calls this function on its failure path
3514 * and this function should be able to release pools which went through,
3515 * successfully or not, init_worker_pool().
Tejun Heoa892cac2013-04-01 11:23:32 -07003516 *
3517 * Should be called with wq_pool_mutex held.
Tejun Heo29c91e92013-03-12 11:30:03 -07003518 */
3519static void put_unbound_pool(struct worker_pool *pool)
3520{
Lai Jiangshan60f5a4b2014-05-20 17:46:29 +08003521 DECLARE_COMPLETION_ONSTACK(detach_completion);
Tejun Heo29c91e92013-03-12 11:30:03 -07003522 struct worker *worker;
3523
Tejun Heoa892cac2013-04-01 11:23:32 -07003524 lockdep_assert_held(&wq_pool_mutex);
3525
3526 if (--pool->refcnt)
Tejun Heo29c91e92013-03-12 11:30:03 -07003527 return;
Tejun Heo29c91e92013-03-12 11:30:03 -07003528
3529 /* sanity checks */
Lai Jiangshan61d0fbb2014-06-03 15:31:45 +08003530 if (WARN_ON(!(pool->cpu < 0)) ||
Tejun Heoa892cac2013-04-01 11:23:32 -07003531 WARN_ON(!list_empty(&pool->worklist)))
Tejun Heo29c91e92013-03-12 11:30:03 -07003532 return;
Tejun Heo29c91e92013-03-12 11:30:03 -07003533
3534 /* release id and unhash */
3535 if (pool->id >= 0)
3536 idr_remove(&worker_pool_idr, pool->id);
3537 hash_del(&pool->hash_node);
3538
Tejun Heoc5aa87b2013-03-13 16:51:36 -07003539 /*
Tejun Heo692b4822017-10-09 08:04:13 -07003540 * Become the manager and destroy all workers. This prevents
3541 * @pool's workers from blocking on attach_mutex. We're the last
3542 * manager and @pool gets freed with the flag set.
Tejun Heoc5aa87b2013-03-13 16:51:36 -07003543 */
Lai Jiangshan60f5a4b2014-05-20 17:46:29 +08003544 spin_lock_irq(&pool->lock);
Tejun Heo692b4822017-10-09 08:04:13 -07003545 wait_event_lock_irq(wq_manager_wait,
3546 !(pool->flags & POOL_MANAGER_ACTIVE), pool->lock);
3547 pool->flags |= POOL_MANAGER_ACTIVE;
3548
Lai Jiangshan1037de32014-05-22 16:44:07 +08003549 while ((worker = first_idle_worker(pool)))
Tejun Heo29c91e92013-03-12 11:30:03 -07003550 destroy_worker(worker);
3551 WARN_ON(pool->nr_workers || pool->nr_idle);
Tejun Heo29c91e92013-03-12 11:30:03 -07003552 spin_unlock_irq(&pool->lock);
Lai Jiangshan60f5a4b2014-05-20 17:46:29 +08003553
Tejun Heo1258fae2018-05-18 08:47:13 -07003554 mutex_lock(&wq_pool_attach_mutex);
Lai Jiangshanda028462014-05-20 17:46:31 +08003555 if (!list_empty(&pool->workers))
Lai Jiangshan60f5a4b2014-05-20 17:46:29 +08003556 pool->detach_completion = &detach_completion;
Tejun Heo1258fae2018-05-18 08:47:13 -07003557 mutex_unlock(&wq_pool_attach_mutex);
Lai Jiangshan60f5a4b2014-05-20 17:46:29 +08003558
3559 if (pool->detach_completion)
3560 wait_for_completion(pool->detach_completion);
3561
Tejun Heo29c91e92013-03-12 11:30:03 -07003562 /* shut down the timers */
3563 del_timer_sync(&pool->idle_timer);
3564 del_timer_sync(&pool->mayday_timer);
3565
Thomas Gleixner24acfb72019-03-13 17:55:47 +01003566 /* RCU protected to allow dereferences from get_work_pool() */
Paul E. McKenney25b00772018-11-06 19:18:45 -08003567 call_rcu(&pool->rcu, rcu_free_pool);
Tejun Heo29c91e92013-03-12 11:30:03 -07003568}
3569
3570/**
3571 * get_unbound_pool - get a worker_pool with the specified attributes
3572 * @attrs: the attributes of the worker_pool to get
3573 *
3574 * Obtain a worker_pool which has the same attributes as @attrs, bump the
3575 * reference count and return it. If there already is a matching
3576 * worker_pool, it will be used; otherwise, this function attempts to
Yacine Belkadid185af32013-07-31 14:59:24 -07003577 * create a new one.
Tejun Heoa892cac2013-04-01 11:23:32 -07003578 *
3579 * Should be called with wq_pool_mutex held.
Yacine Belkadid185af32013-07-31 14:59:24 -07003580 *
3581 * Return: On success, a worker_pool with the same attributes as @attrs.
3582 * On failure, %NULL.
Tejun Heo29c91e92013-03-12 11:30:03 -07003583 */
3584static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
3585{
Tejun Heo29c91e92013-03-12 11:30:03 -07003586 u32 hash = wqattrs_hash(attrs);
3587 struct worker_pool *pool;
Tejun Heof3f90ad2013-04-01 11:23:34 -07003588 int node;
Xunlei Pange22735842015-10-09 11:53:12 +08003589 int target_node = NUMA_NO_NODE;
Tejun Heo29c91e92013-03-12 11:30:03 -07003590
Tejun Heoa892cac2013-04-01 11:23:32 -07003591 lockdep_assert_held(&wq_pool_mutex);
Tejun Heo29c91e92013-03-12 11:30:03 -07003592
3593 /* do we already have a matching pool? */
Tejun Heo29c91e92013-03-12 11:30:03 -07003594 hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {
3595 if (wqattrs_equal(pool->attrs, attrs)) {
3596 pool->refcnt++;
Lai Jiangshan3fb18232014-07-22 13:04:49 +08003597 return pool;
Tejun Heo29c91e92013-03-12 11:30:03 -07003598 }
3599 }
Tejun Heo29c91e92013-03-12 11:30:03 -07003600
Xunlei Pange22735842015-10-09 11:53:12 +08003601 /* if cpumask is contained inside a NUMA node, we belong to that node */
3602 if (wq_numa_enabled) {
3603 for_each_node(node) {
3604 if (cpumask_subset(attrs->cpumask,
3605 wq_numa_possible_cpumask[node])) {
3606 target_node = node;
3607 break;
3608 }
3609 }
3610 }
3611
Tejun Heo29c91e92013-03-12 11:30:03 -07003612 /* nope, create a new one */
Xunlei Pange22735842015-10-09 11:53:12 +08003613 pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, target_node);
Tejun Heo29c91e92013-03-12 11:30:03 -07003614 if (!pool || init_worker_pool(pool) < 0)
3615 goto fail;
3616
Tejun Heo8864b4e2013-03-12 11:30:04 -07003617 lockdep_set_subclass(&pool->lock, 1); /* see put_pwq() */
Tejun Heo29c91e92013-03-12 11:30:03 -07003618 copy_workqueue_attrs(pool->attrs, attrs);
Xunlei Pange22735842015-10-09 11:53:12 +08003619 pool->node = target_node;
Tejun Heo29c91e92013-03-12 11:30:03 -07003620
Shaohua Li2865a8f2013-08-01 09:56:36 +08003621 /*
3622 * no_numa isn't a worker_pool attribute, always clear it. See
3623 * 'struct workqueue_attrs' comments for detail.
3624 */
3625 pool->attrs->no_numa = false;
3626
Tejun Heo29c91e92013-03-12 11:30:03 -07003627 if (worker_pool_assign_id(pool) < 0)
3628 goto fail;
3629
3630 /* create and start the initial worker */
Tejun Heo3347fa02016-09-16 15:49:32 -04003631 if (wq_online && !create_worker(pool))
Tejun Heo29c91e92013-03-12 11:30:03 -07003632 goto fail;
3633
Tejun Heo29c91e92013-03-12 11:30:03 -07003634 /* install */
Tejun Heo29c91e92013-03-12 11:30:03 -07003635 hash_add(unbound_pool_hash, &pool->hash_node, hash);
Lai Jiangshan3fb18232014-07-22 13:04:49 +08003636
Tejun Heo29c91e92013-03-12 11:30:03 -07003637 return pool;
3638fail:
Tejun Heo29c91e92013-03-12 11:30:03 -07003639 if (pool)
3640 put_unbound_pool(pool);
3641 return NULL;
3642}
3643
Tejun Heo8864b4e2013-03-12 11:30:04 -07003644static void rcu_free_pwq(struct rcu_head *rcu)
3645{
3646 kmem_cache_free(pwq_cache,
3647 container_of(rcu, struct pool_workqueue, rcu));
3648}
3649
3650/*
3651 * Scheduled on system_wq by put_pwq() when an unbound pwq hits zero refcnt
3652 * and needs to be destroyed.
3653 */
3654static void pwq_unbound_release_workfn(struct work_struct *work)
3655{
3656 struct pool_workqueue *pwq = container_of(work, struct pool_workqueue,
3657 unbound_release_work);
3658 struct workqueue_struct *wq = pwq->wq;
3659 struct worker_pool *pool = pwq->pool;
Tejun Heobc0caf02013-04-01 11:23:31 -07003660 bool is_last;
Tejun Heo8864b4e2013-03-12 11:30:04 -07003661
3662 if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
3663 return;
3664
Lai Jiangshan3c25a552013-03-25 16:57:17 -07003665 mutex_lock(&wq->mutex);
Tejun Heo8864b4e2013-03-12 11:30:04 -07003666 list_del_rcu(&pwq->pwqs_node);
Tejun Heobc0caf02013-04-01 11:23:31 -07003667 is_last = list_empty(&wq->pwqs);
Lai Jiangshan3c25a552013-03-25 16:57:17 -07003668 mutex_unlock(&wq->mutex);
Tejun Heo8864b4e2013-03-12 11:30:04 -07003669
Tejun Heoa892cac2013-04-01 11:23:32 -07003670 mutex_lock(&wq_pool_mutex);
Tejun Heo8864b4e2013-03-12 11:30:04 -07003671 put_unbound_pool(pool);
Tejun Heoa892cac2013-04-01 11:23:32 -07003672 mutex_unlock(&wq_pool_mutex);
3673
Paul E. McKenney25b00772018-11-06 19:18:45 -08003674 call_rcu(&pwq->rcu, rcu_free_pwq);
Tejun Heo8864b4e2013-03-12 11:30:04 -07003675
3676 /*
3677 * If we're the last pwq going away, @wq is already dead and no one
Tejun Heoe2dca7a2015-03-09 09:22:28 -04003678 * is gonna access it anymore. Schedule RCU free.
Tejun Heo8864b4e2013-03-12 11:30:04 -07003679 */
Bart Van Assche669de8b2019-02-14 15:00:54 -08003680 if (is_last) {
3681 wq_unregister_lockdep(wq);
Paul E. McKenney25b00772018-11-06 19:18:45 -08003682 call_rcu(&wq->rcu, rcu_free_wq);
Bart Van Assche669de8b2019-02-14 15:00:54 -08003683 }
Tejun Heo8864b4e2013-03-12 11:30:04 -07003684}
3685
Tejun Heo0fbd95a2013-03-13 16:51:35 -07003686/**
Tejun Heo699ce092013-03-13 16:51:35 -07003687 * pwq_adjust_max_active - update a pwq's max_active to the current setting
Tejun Heo0fbd95a2013-03-13 16:51:35 -07003688 * @pwq: target pool_workqueue
Tejun Heo0fbd95a2013-03-13 16:51:35 -07003689 *
Tejun Heo699ce092013-03-13 16:51:35 -07003690 * If @pwq isn't freezing, set @pwq->max_active to the associated
3691 * workqueue's saved_max_active and activate delayed work items
3692 * accordingly. If @pwq is freezing, clear @pwq->max_active to zero.
Tejun Heo0fbd95a2013-03-13 16:51:35 -07003693 */
Tejun Heo699ce092013-03-13 16:51:35 -07003694static void pwq_adjust_max_active(struct pool_workqueue *pwq)
Tejun Heo0fbd95a2013-03-13 16:51:35 -07003695{
Tejun Heo699ce092013-03-13 16:51:35 -07003696 struct workqueue_struct *wq = pwq->wq;
3697 bool freezable = wq->flags & WQ_FREEZABLE;
Tejun Heo3347fa02016-09-16 15:49:32 -04003698 unsigned long flags;
Tejun Heo0fbd95a2013-03-13 16:51:35 -07003699
Tejun Heo699ce092013-03-13 16:51:35 -07003700 /* for @wq->saved_max_active */
Lai Jiangshana357fc02013-03-25 16:57:19 -07003701 lockdep_assert_held(&wq->mutex);
Tejun Heo699ce092013-03-13 16:51:35 -07003702
3703 /* fast exit for non-freezable wqs */
3704 if (!freezable && pwq->max_active == wq->saved_max_active)
3705 return;
3706
Tejun Heo3347fa02016-09-16 15:49:32 -04003707 /* this function can be called during early boot w/ irq disabled */
3708 spin_lock_irqsave(&pwq->pool->lock, flags);
Tejun Heo699ce092013-03-13 16:51:35 -07003709
Lai Jiangshan74b414e2014-05-22 19:01:16 +08003710 /*
3711 * During [un]freezing, the caller is responsible for ensuring that
3712 * this function is called at least once after @workqueue_freezing
3713 * is updated and visible.
3714 */
3715 if (!freezable || !workqueue_freezing) {
Tejun Heo699ce092013-03-13 16:51:35 -07003716 pwq->max_active = wq->saved_max_active;
3717
3718 while (!list_empty(&pwq->delayed_works) &&
3719 pwq->nr_active < pwq->max_active)
3720 pwq_activate_first_delayed(pwq);
Lai Jiangshan951a0782013-03-20 10:52:30 -07003721
3722 /*
3723 * Need to kick a worker after thawed or an unbound wq's
3724 * max_active is bumped. It's a slow path. Do it always.
3725 */
3726 wake_up_worker(pwq->pool);
Tejun Heo699ce092013-03-13 16:51:35 -07003727 } else {
3728 pwq->max_active = 0;
3729 }
3730
Tejun Heo3347fa02016-09-16 15:49:32 -04003731 spin_unlock_irqrestore(&pwq->pool->lock, flags);
Tejun Heo0fbd95a2013-03-13 16:51:35 -07003732}
3733
Tejun Heoe50aba92013-04-01 11:23:35 -07003734/* initialize newly alloced @pwq which is associated with @wq and @pool */
Tejun Heof147f292013-04-01 11:23:35 -07003735static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
3736 struct worker_pool *pool)
Tejun Heod2c1d402013-03-12 11:30:04 -07003737{
3738 BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
3739
Tejun Heoe50aba92013-04-01 11:23:35 -07003740 memset(pwq, 0, sizeof(*pwq));
3741
Tejun Heod2c1d402013-03-12 11:30:04 -07003742 pwq->pool = pool;
3743 pwq->wq = wq;
3744 pwq->flush_color = -1;
Tejun Heo8864b4e2013-03-12 11:30:04 -07003745 pwq->refcnt = 1;
Tejun Heod2c1d402013-03-12 11:30:04 -07003746 INIT_LIST_HEAD(&pwq->delayed_works);
Tejun Heo1befcf32013-04-01 11:23:35 -07003747 INIT_LIST_HEAD(&pwq->pwqs_node);
Tejun Heod2c1d402013-03-12 11:30:04 -07003748 INIT_LIST_HEAD(&pwq->mayday_node);
Tejun Heo8864b4e2013-03-12 11:30:04 -07003749 INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
Tejun Heof147f292013-04-01 11:23:35 -07003750}
Tejun Heod2c1d402013-03-12 11:30:04 -07003751
Tejun Heof147f292013-04-01 11:23:35 -07003752/* sync @pwq with the current state of its associated wq and link it */
Tejun Heo1befcf32013-04-01 11:23:35 -07003753static void link_pwq(struct pool_workqueue *pwq)
Tejun Heof147f292013-04-01 11:23:35 -07003754{
3755 struct workqueue_struct *wq = pwq->wq;
3756
3757 lockdep_assert_held(&wq->mutex);
Tejun Heo75ccf592013-03-12 11:30:04 -07003758
Tejun Heo1befcf32013-04-01 11:23:35 -07003759 /* may be called multiple times, ignore if already linked */
3760 if (!list_empty(&pwq->pwqs_node))
3761 return;
3762
Lai Jiangshan29b1cb42014-07-22 13:04:27 +08003763 /* set the matching work_color */
Tejun Heo75ccf592013-03-12 11:30:04 -07003764 pwq->work_color = wq->work_color;
Tejun Heo983ca252013-03-13 16:51:35 -07003765
3766 /* sync max_active to the current setting */
3767 pwq_adjust_max_active(pwq);
3768
3769 /* link in @pwq */
Tejun Heo9e8cd2f2013-03-12 11:30:04 -07003770 list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
Tejun Heof147f292013-04-01 11:23:35 -07003771}
Lai Jiangshana357fc02013-03-25 16:57:19 -07003772
Tejun Heof147f292013-04-01 11:23:35 -07003773/* obtain a pool matching @attr and create a pwq associating the pool and @wq */
3774static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
3775 const struct workqueue_attrs *attrs)
3776{
3777 struct worker_pool *pool;
3778 struct pool_workqueue *pwq;
3779
3780 lockdep_assert_held(&wq_pool_mutex);
3781
3782 pool = get_unbound_pool(attrs);
3783 if (!pool)
3784 return NULL;
3785
Tejun Heoe50aba92013-04-01 11:23:35 -07003786 pwq = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL, pool->node);
Tejun Heof147f292013-04-01 11:23:35 -07003787 if (!pwq) {
3788 put_unbound_pool(pool);
3789 return NULL;
Tejun Heodf2d5ae2013-04-01 11:23:35 -07003790 }
Tejun Heo6029a912013-04-01 11:23:34 -07003791
Tejun Heof147f292013-04-01 11:23:35 -07003792 init_pwq(pwq, wq, pool);
3793 return pwq;
Tejun Heod2c1d402013-03-12 11:30:04 -07003794}
3795
Tejun Heo4c16bd32013-04-01 11:23:36 -07003796/**
Gong Zhaogang30186c62015-05-11 11:02:47 -04003797 * wq_calc_node_cpumask - calculate a wq_attrs' cpumask for the specified node
Lai Jiangshan042f7df12015-04-30 17:16:12 +08003798 * @attrs: the wq_attrs of the default pwq of the target workqueue
Tejun Heo4c16bd32013-04-01 11:23:36 -07003799 * @node: the target NUMA node
3800 * @cpu_going_down: if >= 0, the CPU to consider as offline
3801 * @cpumask: outarg, the resulting cpumask
3802 *
3803 * Calculate the cpumask a workqueue with @attrs should use on @node. If
3804 * @cpu_going_down is >= 0, that cpu is considered offline during
Yacine Belkadid185af32013-07-31 14:59:24 -07003805 * calculation. The result is stored in @cpumask.
Tejun Heo4c16bd32013-04-01 11:23:36 -07003806 *
3807 * If NUMA affinity is not enabled, @attrs->cpumask is always used. If
3808 * enabled and @node has online CPUs requested by @attrs, the returned
3809 * cpumask is the intersection of the possible CPUs of @node and
3810 * @attrs->cpumask.
3811 *
3812 * The caller is responsible for ensuring that the cpumask of @node stays
3813 * stable.
Yacine Belkadid185af32013-07-31 14:59:24 -07003814 *
3815 * Return: %true if the resulting @cpumask is different from @attrs->cpumask,
3816 * %false if equal.
Tejun Heo4c16bd32013-04-01 11:23:36 -07003817 */
3818static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
3819 int cpu_going_down, cpumask_t *cpumask)
3820{
Tejun Heod55262c2013-04-01 11:23:38 -07003821 if (!wq_numa_enabled || attrs->no_numa)
Tejun Heo4c16bd32013-04-01 11:23:36 -07003822 goto use_dfl;
3823
3824 /* does @node have any online CPUs @attrs wants? */
3825 cpumask_and(cpumask, cpumask_of_node(node), attrs->cpumask);
3826 if (cpu_going_down >= 0)
3827 cpumask_clear_cpu(cpu_going_down, cpumask);
3828
3829 if (cpumask_empty(cpumask))
3830 goto use_dfl;
3831
3832 /* yeap, return possible CPUs in @node that @attrs wants */
3833 cpumask_and(cpumask, attrs->cpumask, wq_numa_possible_cpumask[node]);
Michael Bringmann1ad0f0a2017-07-27 16:27:14 -05003834
3835 if (cpumask_empty(cpumask)) {
3836 pr_warn_once("WARNING: workqueue cpumask: online intersect > "
3837 "possible intersect\n");
3838 return false;
3839 }
3840
Tejun Heo4c16bd32013-04-01 11:23:36 -07003841 return !cpumask_equal(cpumask, attrs->cpumask);
3842
3843use_dfl:
3844 cpumask_copy(cpumask, attrs->cpumask);
3845 return false;
3846}
3847
Tejun Heo1befcf32013-04-01 11:23:35 -07003848/* install @pwq into @wq's numa_pwq_tbl[] for @node and return the old pwq */
3849static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
3850 int node,
3851 struct pool_workqueue *pwq)
3852{
3853 struct pool_workqueue *old_pwq;
3854
Lai Jiangshan5b95e1a2015-05-12 20:32:29 +08003855 lockdep_assert_held(&wq_pool_mutex);
Tejun Heo1befcf32013-04-01 11:23:35 -07003856 lockdep_assert_held(&wq->mutex);
3857
3858 /* link_pwq() can handle duplicate calls */
3859 link_pwq(pwq);
3860
3861 old_pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
3862 rcu_assign_pointer(wq->numa_pwq_tbl[node], pwq);
3863 return old_pwq;
3864}
3865
Lai Jiangshan2d5f0762015-04-27 17:58:38 +08003866/* context to store the prepared attrs & pwqs before applying */
3867struct apply_wqattrs_ctx {
3868 struct workqueue_struct *wq; /* target workqueue */
3869 struct workqueue_attrs *attrs; /* attrs to apply */
Lai Jiangshan042f7df12015-04-30 17:16:12 +08003870 struct list_head list; /* queued for batching commit */
Lai Jiangshan2d5f0762015-04-27 17:58:38 +08003871 struct pool_workqueue *dfl_pwq;
3872 struct pool_workqueue *pwq_tbl[];
3873};
3874
3875/* free the resources after success or abort */
3876static void apply_wqattrs_cleanup(struct apply_wqattrs_ctx *ctx)
3877{
3878 if (ctx) {
3879 int node;
3880
3881 for_each_node(node)
3882 put_pwq_unlocked(ctx->pwq_tbl[node]);
3883 put_pwq_unlocked(ctx->dfl_pwq);
3884
3885 free_workqueue_attrs(ctx->attrs);
3886
3887 kfree(ctx);
3888 }
3889}
3890
3891/* allocate the attrs and pwqs for later installation */
3892static struct apply_wqattrs_ctx *
3893apply_wqattrs_prepare(struct workqueue_struct *wq,
3894 const struct workqueue_attrs *attrs)
3895{
3896 struct apply_wqattrs_ctx *ctx;
3897 struct workqueue_attrs *new_attrs, *tmp_attrs;
3898 int node;
3899
3900 lockdep_assert_held(&wq_pool_mutex);
3901
Kees Cookacafe7e2018-05-08 13:45:50 -07003902 ctx = kzalloc(struct_size(ctx, pwq_tbl, nr_node_ids), GFP_KERNEL);
Lai Jiangshan2d5f0762015-04-27 17:58:38 +08003903
3904 new_attrs = alloc_workqueue_attrs(GFP_KERNEL);
3905 tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL);
3906 if (!ctx || !new_attrs || !tmp_attrs)
3907 goto out_free;
3908
Lai Jiangshan042f7df12015-04-30 17:16:12 +08003909 /*
3910 * Calculate the attrs of the default pwq.
3911 * If the user configured cpumask doesn't overlap with the
3912 * wq_unbound_cpumask, we fallback to the wq_unbound_cpumask.
3913 */
Lai Jiangshan2d5f0762015-04-27 17:58:38 +08003914 copy_workqueue_attrs(new_attrs, attrs);
Frederic Weisbeckerb05a7922015-04-27 17:58:39 +08003915 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, wq_unbound_cpumask);
Lai Jiangshan042f7df12015-04-30 17:16:12 +08003916 if (unlikely(cpumask_empty(new_attrs->cpumask)))
3917 cpumask_copy(new_attrs->cpumask, wq_unbound_cpumask);
Lai Jiangshan2d5f0762015-04-27 17:58:38 +08003918
3919 /*
3920 * We may create multiple pwqs with differing cpumasks. Make a
3921 * copy of @new_attrs which will be modified and used to obtain
3922 * pools.
3923 */
3924 copy_workqueue_attrs(tmp_attrs, new_attrs);
3925
3926 /*
3927 * If something goes wrong during CPU up/down, we'll fall back to
3928 * the default pwq covering whole @attrs->cpumask. Always create
3929 * it even if we don't use it immediately.
3930 */
3931 ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
3932 if (!ctx->dfl_pwq)
3933 goto out_free;
3934
3935 for_each_node(node) {
Lai Jiangshan042f7df12015-04-30 17:16:12 +08003936 if (wq_calc_node_cpumask(new_attrs, node, -1, tmp_attrs->cpumask)) {
Lai Jiangshan2d5f0762015-04-27 17:58:38 +08003937 ctx->pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
3938 if (!ctx->pwq_tbl[node])
3939 goto out_free;
3940 } else {
3941 ctx->dfl_pwq->refcnt++;
3942 ctx->pwq_tbl[node] = ctx->dfl_pwq;
3943 }
3944 }
3945
Lai Jiangshan042f7df12015-04-30 17:16:12 +08003946 /* save the user configured attrs and sanitize it. */
3947 copy_workqueue_attrs(new_attrs, attrs);
3948 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
Lai Jiangshan2d5f0762015-04-27 17:58:38 +08003949 ctx->attrs = new_attrs;
Lai Jiangshan042f7df12015-04-30 17:16:12 +08003950
Lai Jiangshan2d5f0762015-04-27 17:58:38 +08003951 ctx->wq = wq;
3952 free_workqueue_attrs(tmp_attrs);
3953 return ctx;
3954
3955out_free:
3956 free_workqueue_attrs(tmp_attrs);
3957 free_workqueue_attrs(new_attrs);
3958 apply_wqattrs_cleanup(ctx);
3959 return NULL;
3960}
3961
3962/* set attrs and install prepared pwqs, @ctx points to old pwqs on return */
3963static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx)
3964{
3965 int node;
3966
3967 /* all pwqs have been created successfully, let's install'em */
3968 mutex_lock(&ctx->wq->mutex);
3969
3970 copy_workqueue_attrs(ctx->wq->unbound_attrs, ctx->attrs);
3971
3972 /* save the previous pwq and install the new one */
3973 for_each_node(node)
3974 ctx->pwq_tbl[node] = numa_pwq_tbl_install(ctx->wq, node,
3975 ctx->pwq_tbl[node]);
3976
3977 /* @dfl_pwq might not have been used, ensure it's linked */
3978 link_pwq(ctx->dfl_pwq);
3979 swap(ctx->wq->dfl_pwq, ctx->dfl_pwq);
3980
3981 mutex_unlock(&ctx->wq->mutex);
3982}
3983
Lai Jiangshana0111cf2015-05-19 18:03:47 +08003984static void apply_wqattrs_lock(void)
3985{
3986 /* CPUs should stay stable across pwq creations and installations */
3987 get_online_cpus();
3988 mutex_lock(&wq_pool_mutex);
3989}
3990
3991static void apply_wqattrs_unlock(void)
3992{
3993 mutex_unlock(&wq_pool_mutex);
3994 put_online_cpus();
3995}
3996
3997static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
3998 const struct workqueue_attrs *attrs)
3999{
4000 struct apply_wqattrs_ctx *ctx;
Lai Jiangshana0111cf2015-05-19 18:03:47 +08004001
4002 /* only unbound workqueues can change attributes */
4003 if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
4004 return -EINVAL;
4005
4006 /* creating multiple pwqs breaks ordering guarantee */
Tejun Heo0a94efb2017-07-23 08:36:15 -04004007 if (!list_empty(&wq->pwqs)) {
4008 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
4009 return -EINVAL;
4010
4011 wq->flags &= ~__WQ_ORDERED;
4012 }
Lai Jiangshana0111cf2015-05-19 18:03:47 +08004013
4014 ctx = apply_wqattrs_prepare(wq, attrs);
wanghaibin62011712016-01-07 20:38:59 +08004015 if (!ctx)
4016 return -ENOMEM;
Lai Jiangshana0111cf2015-05-19 18:03:47 +08004017
4018 /* the ctx has been prepared successfully, let's commit it */
wanghaibin62011712016-01-07 20:38:59 +08004019 apply_wqattrs_commit(ctx);
Lai Jiangshana0111cf2015-05-19 18:03:47 +08004020 apply_wqattrs_cleanup(ctx);
4021
wanghaibin62011712016-01-07 20:38:59 +08004022 return 0;
Lai Jiangshana0111cf2015-05-19 18:03:47 +08004023}
4024
Tejun Heo9e8cd2f2013-03-12 11:30:04 -07004025/**
4026 * apply_workqueue_attrs - apply new workqueue_attrs to an unbound workqueue
4027 * @wq: the target workqueue
4028 * @attrs: the workqueue_attrs to apply, allocated with alloc_workqueue_attrs()
4029 *
Tejun Heo4c16bd32013-04-01 11:23:36 -07004030 * Apply @attrs to an unbound workqueue @wq. Unless disabled, on NUMA
4031 * machines, this function maps a separate pwq to each NUMA node with
4032 * possibles CPUs in @attrs->cpumask so that work items are affine to the
4033 * NUMA node it was issued on. Older pwqs are released as in-flight work
4034 * items finish. Note that a work item which repeatedly requeues itself
4035 * back-to-back will stay on its current pwq.
Tejun Heo9e8cd2f2013-03-12 11:30:04 -07004036 *
Yacine Belkadid185af32013-07-31 14:59:24 -07004037 * Performs GFP_KERNEL allocations.
4038 *
4039 * Return: 0 on success and -errno on failure.
Tejun Heo9e8cd2f2013-03-12 11:30:04 -07004040 */
4041int apply_workqueue_attrs(struct workqueue_struct *wq,
4042 const struct workqueue_attrs *attrs)
4043{
Lai Jiangshana0111cf2015-05-19 18:03:47 +08004044 int ret;
Tejun Heo9e8cd2f2013-03-12 11:30:04 -07004045
Lai Jiangshana0111cf2015-05-19 18:03:47 +08004046 apply_wqattrs_lock();
4047 ret = apply_workqueue_attrs_locked(wq, attrs);
4048 apply_wqattrs_unlock();
Lai Jiangshan2d5f0762015-04-27 17:58:38 +08004049
Tejun Heo48621252013-04-01 11:23:31 -07004050 return ret;
Tejun Heo9e8cd2f2013-03-12 11:30:04 -07004051}
NeilBrown6106c0f2018-01-11 15:06:40 +11004052EXPORT_SYMBOL_GPL(apply_workqueue_attrs);
Tejun Heo9e8cd2f2013-03-12 11:30:04 -07004053
Tejun Heo4c16bd32013-04-01 11:23:36 -07004054/**
4055 * wq_update_unbound_numa - update NUMA affinity of a wq for CPU hot[un]plug
4056 * @wq: the target workqueue
4057 * @cpu: the CPU coming up or going down
4058 * @online: whether @cpu is coming up or going down
4059 *
4060 * This function is to be called from %CPU_DOWN_PREPARE, %CPU_ONLINE and
4061 * %CPU_DOWN_FAILED. @cpu is being hot[un]plugged, update NUMA affinity of
4062 * @wq accordingly.
4063 *
4064 * If NUMA affinity can't be adjusted due to memory allocation failure, it
4065 * falls back to @wq->dfl_pwq which may not be optimal but is always
4066 * correct.
4067 *
4068 * Note that when the last allowed CPU of a NUMA node goes offline for a
4069 * workqueue with a cpumask spanning multiple nodes, the workers which were
4070 * already executing the work items for the workqueue will lose their CPU
4071 * affinity and may execute on any CPU. This is similar to how per-cpu
4072 * workqueues behave on CPU_DOWN. If a workqueue user wants strict
4073 * affinity, it's the user's responsibility to flush the work item from
4074 * CPU_DOWN_PREPARE.
4075 */
4076static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
4077 bool online)
4078{
4079 int node = cpu_to_node(cpu);
4080 int cpu_off = online ? -1 : cpu;
4081 struct pool_workqueue *old_pwq = NULL, *pwq;
4082 struct workqueue_attrs *target_attrs;
4083 cpumask_t *cpumask;
4084
4085 lockdep_assert_held(&wq_pool_mutex);
4086
Lai Jiangshanf7142ed2015-05-12 20:32:30 +08004087 if (!wq_numa_enabled || !(wq->flags & WQ_UNBOUND) ||
4088 wq->unbound_attrs->no_numa)
Tejun Heo4c16bd32013-04-01 11:23:36 -07004089 return;
4090
4091 /*
4092 * We don't wanna alloc/free wq_attrs for each wq for each CPU.
4093 * Let's use a preallocated one. The following buf is protected by
4094 * CPU hotplug exclusion.
4095 */
4096 target_attrs = wq_update_unbound_numa_attrs_buf;
4097 cpumask = target_attrs->cpumask;
4098
Tejun Heo4c16bd32013-04-01 11:23:36 -07004099 copy_workqueue_attrs(target_attrs, wq->unbound_attrs);
4100 pwq = unbound_pwq_by_node(wq, node);
4101
4102 /*
4103 * Let's determine what needs to be done. If the target cpumask is
Lai Jiangshan042f7df12015-04-30 17:16:12 +08004104 * different from the default pwq's, we need to compare it to @pwq's
4105 * and create a new one if they don't match. If the target cpumask
4106 * equals the default pwq's, the default pwq should be used.
Tejun Heo4c16bd32013-04-01 11:23:36 -07004107 */
Lai Jiangshan042f7df12015-04-30 17:16:12 +08004108 if (wq_calc_node_cpumask(wq->dfl_pwq->pool->attrs, node, cpu_off, cpumask)) {
Tejun Heo4c16bd32013-04-01 11:23:36 -07004109 if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
Lai Jiangshanf7142ed2015-05-12 20:32:30 +08004110 return;
Tejun Heo4c16bd32013-04-01 11:23:36 -07004111 } else {
Daeseok Youn534a3fb2014-04-18 09:08:14 +09004112 goto use_dfl_pwq;
Tejun Heo4c16bd32013-04-01 11:23:36 -07004113 }
4114
Tejun Heo4c16bd32013-04-01 11:23:36 -07004115 /* create a new pwq */
4116 pwq = alloc_unbound_pwq(wq, target_attrs);
4117 if (!pwq) {
Fabian Frederick2d916032014-05-12 13:59:35 -04004118 pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
4119 wq->name);
Daeseok Youn77f300b2014-04-16 14:32:29 +09004120 goto use_dfl_pwq;
Tejun Heo4c16bd32013-04-01 11:23:36 -07004121 }
4122
Lai Jiangshanf7142ed2015-05-12 20:32:30 +08004123 /* Install the new pwq. */
Tejun Heo4c16bd32013-04-01 11:23:36 -07004124 mutex_lock(&wq->mutex);
4125 old_pwq = numa_pwq_tbl_install(wq, node, pwq);
4126 goto out_unlock;
4127
4128use_dfl_pwq:
Lai Jiangshanf7142ed2015-05-12 20:32:30 +08004129 mutex_lock(&wq->mutex);
Tejun Heo4c16bd32013-04-01 11:23:36 -07004130 spin_lock_irq(&wq->dfl_pwq->pool->lock);
4131 get_pwq(wq->dfl_pwq);
4132 spin_unlock_irq(&wq->dfl_pwq->pool->lock);
4133 old_pwq = numa_pwq_tbl_install(wq, node, wq->dfl_pwq);
4134out_unlock:
4135 mutex_unlock(&wq->mutex);
4136 put_pwq_unlocked(old_pwq);
4137}
4138
Tejun Heo30cdf2492013-03-12 11:29:57 -07004139static int alloc_and_link_pwqs(struct workqueue_struct *wq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004140{
Tejun Heo49e3cf42013-03-12 11:29:58 -07004141 bool highpri = wq->flags & WQ_HIGHPRI;
Tejun Heo8a2b7532013-09-05 12:30:04 -04004142 int cpu, ret;
Frederic Weisbeckere1d8aa92009-01-12 23:15:46 +01004143
Tejun Heo30cdf2492013-03-12 11:29:57 -07004144 if (!(wq->flags & WQ_UNBOUND)) {
Tejun Heo420c0dd2013-03-12 11:29:59 -07004145 wq->cpu_pwqs = alloc_percpu(struct pool_workqueue);
4146 if (!wq->cpu_pwqs)
Tejun Heo30cdf2492013-03-12 11:29:57 -07004147 return -ENOMEM;
4148
4149 for_each_possible_cpu(cpu) {
Tejun Heo7fb98ea2013-03-12 11:30:00 -07004150 struct pool_workqueue *pwq =
4151 per_cpu_ptr(wq->cpu_pwqs, cpu);
Tejun Heo7a62c2c2013-03-12 11:30:03 -07004152 struct worker_pool *cpu_pools =
Tejun Heof02ae732013-03-12 11:30:03 -07004153 per_cpu(cpu_worker_pools, cpu);
Tejun Heo30cdf2492013-03-12 11:29:57 -07004154
Tejun Heof147f292013-04-01 11:23:35 -07004155 init_pwq(pwq, wq, &cpu_pools[highpri]);
4156
4157 mutex_lock(&wq->mutex);
Tejun Heo1befcf32013-04-01 11:23:35 -07004158 link_pwq(pwq);
Tejun Heof147f292013-04-01 11:23:35 -07004159 mutex_unlock(&wq->mutex);
Tejun Heo30cdf2492013-03-12 11:29:57 -07004160 }
Tejun Heo9e8cd2f2013-03-12 11:30:04 -07004161 return 0;
Tejun Heo8a2b7532013-09-05 12:30:04 -04004162 } else if (wq->flags & __WQ_ORDERED) {
4163 ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
4164 /* there should only be single pwq for ordering guarantee */
4165 WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node ||
4166 wq->pwqs.prev != &wq->dfl_pwq->pwqs_node),
4167 "ordering guarantee broken for workqueue %s\n", wq->name);
4168 return ret;
Tejun Heo30cdf2492013-03-12 11:29:57 -07004169 } else {
Tejun Heo9e8cd2f2013-03-12 11:30:04 -07004170 return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
Tejun Heo30cdf2492013-03-12 11:29:57 -07004171 }
Oleg Nesterov3af244332007-05-09 02:34:09 -07004172}
4173
Tejun Heof3421792010-07-02 10:03:51 +02004174static int wq_clamp_max_active(int max_active, unsigned int flags,
4175 const char *name)
Tejun Heob71ab8c2010-06-29 10:07:14 +02004176{
Tejun Heof3421792010-07-02 10:03:51 +02004177 int lim = flags & WQ_UNBOUND ? WQ_UNBOUND_MAX_ACTIVE : WQ_MAX_ACTIVE;
4178
4179 if (max_active < 1 || max_active > lim)
Valentin Ilie044c7822012-08-19 00:52:42 +03004180 pr_warn("workqueue: max_active %d requested for %s is out of range, clamping between %d and %d\n",
4181 max_active, name, 1, lim);
Tejun Heob71ab8c2010-06-29 10:07:14 +02004182
Tejun Heof3421792010-07-02 10:03:51 +02004183 return clamp_val(max_active, 1, lim);
Tejun Heob71ab8c2010-06-29 10:07:14 +02004184}
4185
Tejun Heo983c7512018-01-08 05:38:32 -08004186/*
4187 * Workqueues which may be used during memory reclaim should have a rescuer
4188 * to guarantee forward progress.
4189 */
4190static int init_rescuer(struct workqueue_struct *wq)
4191{
4192 struct worker *rescuer;
4193 int ret;
4194
4195 if (!(wq->flags & WQ_MEM_RECLAIM))
4196 return 0;
4197
4198 rescuer = alloc_worker(NUMA_NO_NODE);
4199 if (!rescuer)
4200 return -ENOMEM;
4201
4202 rescuer->rescue_wq = wq;
4203 rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", wq->name);
4204 ret = PTR_ERR_OR_ZERO(rescuer->task);
4205 if (ret) {
4206 kfree(rescuer);
4207 return ret;
4208 }
4209
4210 wq->rescuer = rescuer;
4211 kthread_bind_mask(rescuer->task, cpu_possible_mask);
4212 wake_up_process(rescuer->task);
4213
4214 return 0;
4215}
4216
Mathieu Malaterrea2775bb2019-03-12 21:21:26 +01004217__printf(1, 4)
Bart Van Assche669de8b2019-02-14 15:00:54 -08004218struct workqueue_struct *alloc_workqueue(const char *fmt,
4219 unsigned int flags,
4220 int max_active, ...)
Oleg Nesterov3af244332007-05-09 02:34:09 -07004221{
Tejun Heodf2d5ae2013-04-01 11:23:35 -07004222 size_t tbl_size = 0;
Tejun Heoecf68812013-04-01 11:23:34 -07004223 va_list args;
Oleg Nesterov3af244332007-05-09 02:34:09 -07004224 struct workqueue_struct *wq;
Tejun Heo49e3cf42013-03-12 11:29:58 -07004225 struct pool_workqueue *pwq;
Tejun Heob196be82012-01-10 15:11:35 -08004226
Tejun Heo5c0338c2017-07-18 18:41:52 -04004227 /*
4228 * Unbound && max_active == 1 used to imply ordered, which is no
4229 * longer the case on NUMA machines due to per-node pools. While
4230 * alloc_ordered_workqueue() is the right way to create an ordered
4231 * workqueue, keep the previous behavior to avoid subtle breakages
4232 * on NUMA.
4233 */
4234 if ((flags & WQ_UNBOUND) && max_active == 1)
4235 flags |= __WQ_ORDERED;
4236
Viresh Kumarcee22a12013-04-08 16:45:40 +05304237 /* see the comment above the definition of WQ_POWER_EFFICIENT */
4238 if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
4239 flags |= WQ_UNBOUND;
4240
Tejun Heoecf68812013-04-01 11:23:34 -07004241 /* allocate wq and format name */
Tejun Heodf2d5ae2013-04-01 11:23:35 -07004242 if (flags & WQ_UNBOUND)
Lai Jiangshanddcb57e2014-07-22 13:05:40 +08004243 tbl_size = nr_node_ids * sizeof(wq->numa_pwq_tbl[0]);
Tejun Heodf2d5ae2013-04-01 11:23:35 -07004244
4245 wq = kzalloc(sizeof(*wq) + tbl_size, GFP_KERNEL);
Tejun Heob196be82012-01-10 15:11:35 -08004246 if (!wq)
Tejun Heod2c1d402013-03-12 11:30:04 -07004247 return NULL;
Tejun Heob196be82012-01-10 15:11:35 -08004248
Tejun Heo6029a912013-04-01 11:23:34 -07004249 if (flags & WQ_UNBOUND) {
4250 wq->unbound_attrs = alloc_workqueue_attrs(GFP_KERNEL);
4251 if (!wq->unbound_attrs)
4252 goto err_free_wq;
4253 }
4254
Bart Van Assche669de8b2019-02-14 15:00:54 -08004255 va_start(args, max_active);
Tejun Heoecf68812013-04-01 11:23:34 -07004256 vsnprintf(wq->name, sizeof(wq->name), fmt, args);
Tejun Heob196be82012-01-10 15:11:35 -08004257 va_end(args);
Oleg Nesterov3af244332007-05-09 02:34:09 -07004258
Tejun Heod320c032010-06-29 10:07:14 +02004259 max_active = max_active ?: WQ_DFL_ACTIVE;
Tejun Heob196be82012-01-10 15:11:35 -08004260 max_active = wq_clamp_max_active(max_active, flags, wq->name);
Oleg Nesterov3af244332007-05-09 02:34:09 -07004261
Tejun Heob196be82012-01-10 15:11:35 -08004262 /* init wq */
Tejun Heo97e37d72010-06-29 10:07:10 +02004263 wq->flags = flags;
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02004264 wq->saved_max_active = max_active;
Lai Jiangshan3c25a552013-03-25 16:57:17 -07004265 mutex_init(&wq->mutex);
Tejun Heo112202d2013-02-13 19:29:12 -08004266 atomic_set(&wq->nr_pwqs_to_flush, 0);
Tejun Heo30cdf2492013-03-12 11:29:57 -07004267 INIT_LIST_HEAD(&wq->pwqs);
Tejun Heo73f53c42010-06-29 10:07:11 +02004268 INIT_LIST_HEAD(&wq->flusher_queue);
4269 INIT_LIST_HEAD(&wq->flusher_overflow);
Tejun Heo493a1722013-03-12 11:29:59 -07004270 INIT_LIST_HEAD(&wq->maydays);
Oleg Nesterov3af244332007-05-09 02:34:09 -07004271
Bart Van Assche669de8b2019-02-14 15:00:54 -08004272 wq_init_lockdep(wq);
Oleg Nesterovcce1a162007-05-09 02:34:13 -07004273 INIT_LIST_HEAD(&wq->list);
Oleg Nesterov3af244332007-05-09 02:34:09 -07004274
Tejun Heo30cdf2492013-03-12 11:29:57 -07004275 if (alloc_and_link_pwqs(wq) < 0)
Tejun Heod2c1d402013-03-12 11:30:04 -07004276 goto err_free_wq;
Oleg Nesterov3af244332007-05-09 02:34:09 -07004277
Tejun Heo40c17f72018-01-08 05:38:37 -08004278 if (wq_online && init_rescuer(wq) < 0)
Tejun Heo983c7512018-01-08 05:38:32 -08004279 goto err_destroy;
Oleg Nesterov3af244332007-05-09 02:34:09 -07004280
Tejun Heo226223a2013-03-12 11:30:05 -07004281 if ((wq->flags & WQ_SYSFS) && workqueue_sysfs_register(wq))
4282 goto err_destroy;
4283
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02004284 /*
Lai Jiangshan68e13a62013-03-25 16:57:17 -07004285 * wq_pool_mutex protects global freeze state and workqueues list.
4286 * Grab it, adjust max_active and add the new @wq to workqueues
4287 * list.
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02004288 */
Lai Jiangshan68e13a62013-03-25 16:57:17 -07004289 mutex_lock(&wq_pool_mutex);
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02004290
Lai Jiangshana357fc02013-03-25 16:57:19 -07004291 mutex_lock(&wq->mutex);
Tejun Heo699ce092013-03-13 16:51:35 -07004292 for_each_pwq(pwq, wq)
4293 pwq_adjust_max_active(pwq);
Lai Jiangshana357fc02013-03-25 16:57:19 -07004294 mutex_unlock(&wq->mutex);
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02004295
Tejun Heoe2dca7a2015-03-09 09:22:28 -04004296 list_add_tail_rcu(&wq->list, &workqueues);
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02004297
Lai Jiangshan68e13a62013-03-25 16:57:17 -07004298 mutex_unlock(&wq_pool_mutex);
Tejun Heo15376632010-06-29 10:07:11 +02004299
Oleg Nesterov3af244332007-05-09 02:34:09 -07004300 return wq;
Tejun Heod2c1d402013-03-12 11:30:04 -07004301
4302err_free_wq:
Bart Van Assche009bb422019-03-03 14:00:46 -08004303 wq_unregister_lockdep(wq);
4304 wq_free_lockdep(wq);
Tejun Heo6029a912013-04-01 11:23:34 -07004305 free_workqueue_attrs(wq->unbound_attrs);
Tejun Heod2c1d402013-03-12 11:30:04 -07004306 kfree(wq);
4307 return NULL;
4308err_destroy:
4309 destroy_workqueue(wq);
Tejun Heo4690c4a2010-06-29 10:07:10 +02004310 return NULL;
Oleg Nesterov3af244332007-05-09 02:34:09 -07004311}
Bart Van Assche669de8b2019-02-14 15:00:54 -08004312EXPORT_SYMBOL_GPL(alloc_workqueue);
Oleg Nesterov3af244332007-05-09 02:34:09 -07004313
4314/**
4315 * destroy_workqueue - safely terminate a workqueue
4316 * @wq: target workqueue
4317 *
4318 * Safely destroy a workqueue. All work currently pending will be done first.
4319 */
4320void destroy_workqueue(struct workqueue_struct *wq)
4321{
Tejun Heo49e3cf42013-03-12 11:29:58 -07004322 struct pool_workqueue *pwq;
Tejun Heo4c16bd32013-04-01 11:23:36 -07004323 int node;
Oleg Nesterov3af244332007-05-09 02:34:09 -07004324
Tejun Heo9c5a2ba2011-04-05 18:01:44 +02004325 /* drain it before proceeding with destruction */
4326 drain_workqueue(wq);
Tejun Heoc8efcc22010-12-20 19:32:04 +01004327
Tejun Heo6183c002013-03-12 11:29:57 -07004328 /* sanity checks */
Lai Jiangshanb09f4fd2013-03-25 16:57:18 -07004329 mutex_lock(&wq->mutex);
Tejun Heo49e3cf42013-03-12 11:29:58 -07004330 for_each_pwq(pwq, wq) {
Tejun Heo6183c002013-03-12 11:29:57 -07004331 int i;
4332
Tejun Heo76af4d92013-03-12 11:30:00 -07004333 for (i = 0; i < WORK_NR_COLORS; i++) {
4334 if (WARN_ON(pwq->nr_in_flight[i])) {
Lai Jiangshanb09f4fd2013-03-25 16:57:18 -07004335 mutex_unlock(&wq->mutex);
Tejun Heofa07fb62016-09-05 08:54:06 -04004336 show_workqueue_state();
Tejun Heo6183c002013-03-12 11:29:57 -07004337 return;
Tejun Heo76af4d92013-03-12 11:30:00 -07004338 }
4339 }
4340
Lai Jiangshan5c529592013-04-04 10:05:38 +08004341 if (WARN_ON((pwq != wq->dfl_pwq) && (pwq->refcnt > 1)) ||
Tejun Heo8864b4e2013-03-12 11:30:04 -07004342 WARN_ON(pwq->nr_active) ||
Tejun Heo76af4d92013-03-12 11:30:00 -07004343 WARN_ON(!list_empty(&pwq->delayed_works))) {
Lai Jiangshanb09f4fd2013-03-25 16:57:18 -07004344 mutex_unlock(&wq->mutex);
Tejun Heofa07fb62016-09-05 08:54:06 -04004345 show_workqueue_state();
Tejun Heo6183c002013-03-12 11:29:57 -07004346 return;
Tejun Heo76af4d92013-03-12 11:30:00 -07004347 }
Tejun Heo6183c002013-03-12 11:29:57 -07004348 }
Lai Jiangshanb09f4fd2013-03-25 16:57:18 -07004349 mutex_unlock(&wq->mutex);
Tejun Heo6183c002013-03-12 11:29:57 -07004350
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02004351 /*
4352 * wq list is used to freeze wq, remove from list after
4353 * flushing is complete in case freeze races us.
4354 */
Lai Jiangshan68e13a62013-03-25 16:57:17 -07004355 mutex_lock(&wq_pool_mutex);
Tejun Heoe2dca7a2015-03-09 09:22:28 -04004356 list_del_rcu(&wq->list);
Lai Jiangshan68e13a62013-03-25 16:57:17 -07004357 mutex_unlock(&wq_pool_mutex);
Oleg Nesterov3af244332007-05-09 02:34:09 -07004358
Tejun Heo226223a2013-03-12 11:30:05 -07004359 workqueue_sysfs_unregister(wq);
4360
Tejun Heoe2dca7a2015-03-09 09:22:28 -04004361 if (wq->rescuer)
Tejun Heoe22bee72010-06-29 10:07:14 +02004362 kthread_stop(wq->rescuer->task);
Tejun Heoe22bee72010-06-29 10:07:14 +02004363
Tejun Heo8864b4e2013-03-12 11:30:04 -07004364 if (!(wq->flags & WQ_UNBOUND)) {
Bart Van Assche669de8b2019-02-14 15:00:54 -08004365 wq_unregister_lockdep(wq);
Tejun Heo8864b4e2013-03-12 11:30:04 -07004366 /*
4367 * The base ref is never dropped on per-cpu pwqs. Directly
Tejun Heoe2dca7a2015-03-09 09:22:28 -04004368 * schedule RCU free.
Tejun Heo8864b4e2013-03-12 11:30:04 -07004369 */
Paul E. McKenney25b00772018-11-06 19:18:45 -08004370 call_rcu(&wq->rcu, rcu_free_wq);
Tejun Heo8864b4e2013-03-12 11:30:04 -07004371 } else {
4372 /*
4373 * We're the sole accessor of @wq at this point. Directly
Tejun Heo4c16bd32013-04-01 11:23:36 -07004374 * access numa_pwq_tbl[] and dfl_pwq to put the base refs.
4375 * @wq will be freed when the last pwq is released.
Tejun Heo8864b4e2013-03-12 11:30:04 -07004376 */
Tejun Heo4c16bd32013-04-01 11:23:36 -07004377 for_each_node(node) {
4378 pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
4379 RCU_INIT_POINTER(wq->numa_pwq_tbl[node], NULL);
4380 put_pwq_unlocked(pwq);
4381 }
4382
4383 /*
4384 * Put dfl_pwq. @wq may be freed any time after dfl_pwq is
4385 * put. Don't access it afterwards.
4386 */
4387 pwq = wq->dfl_pwq;
4388 wq->dfl_pwq = NULL;
Tejun Heodce90d42013-04-01 11:23:35 -07004389 put_pwq_unlocked(pwq);
Tejun Heo29c91e92013-03-12 11:30:03 -07004390 }
Oleg Nesterov3af244332007-05-09 02:34:09 -07004391}
4392EXPORT_SYMBOL_GPL(destroy_workqueue);
4393
Tejun Heodcd989c2010-06-29 10:07:14 +02004394/**
4395 * workqueue_set_max_active - adjust max_active of a workqueue
4396 * @wq: target workqueue
4397 * @max_active: new max_active value.
4398 *
4399 * Set max_active of @wq to @max_active.
4400 *
4401 * CONTEXT:
4402 * Don't call from IRQ context.
4403 */
4404void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
4405{
Tejun Heo49e3cf42013-03-12 11:29:58 -07004406 struct pool_workqueue *pwq;
Tejun Heodcd989c2010-06-29 10:07:14 +02004407
Tejun Heo8719dce2013-03-12 11:30:04 -07004408 /* disallow meddling with max_active for ordered workqueues */
Tejun Heo0a94efb2017-07-23 08:36:15 -04004409 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
Tejun Heo8719dce2013-03-12 11:30:04 -07004410 return;
4411
Tejun Heof3421792010-07-02 10:03:51 +02004412 max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
Tejun Heodcd989c2010-06-29 10:07:14 +02004413
Lai Jiangshana357fc02013-03-25 16:57:19 -07004414 mutex_lock(&wq->mutex);
Tejun Heodcd989c2010-06-29 10:07:14 +02004415
Tejun Heo0a94efb2017-07-23 08:36:15 -04004416 wq->flags &= ~__WQ_ORDERED;
Tejun Heodcd989c2010-06-29 10:07:14 +02004417 wq->saved_max_active = max_active;
4418
Tejun Heo699ce092013-03-13 16:51:35 -07004419 for_each_pwq(pwq, wq)
4420 pwq_adjust_max_active(pwq);
Tejun Heodcd989c2010-06-29 10:07:14 +02004421
Lai Jiangshana357fc02013-03-25 16:57:19 -07004422 mutex_unlock(&wq->mutex);
Tejun Heodcd989c2010-06-29 10:07:14 +02004423}
4424EXPORT_SYMBOL_GPL(workqueue_set_max_active);
4425
4426/**
Lukas Wunner27d4ee02018-02-11 10:38:28 +01004427 * current_work - retrieve %current task's work struct
4428 *
4429 * Determine if %current task is a workqueue worker and what it's working on.
4430 * Useful to find out the context that the %current task is running in.
4431 *
4432 * Return: work struct if %current task is a workqueue worker, %NULL otherwise.
4433 */
4434struct work_struct *current_work(void)
4435{
4436 struct worker *worker = current_wq_worker();
4437
4438 return worker ? worker->current_work : NULL;
4439}
4440EXPORT_SYMBOL(current_work);
4441
4442/**
Tejun Heoe62676162013-03-12 17:41:37 -07004443 * current_is_workqueue_rescuer - is %current workqueue rescuer?
4444 *
4445 * Determine whether %current is a workqueue rescuer. Can be used from
4446 * work functions to determine whether it's being run off the rescuer task.
Yacine Belkadid185af32013-07-31 14:59:24 -07004447 *
4448 * Return: %true if %current is a workqueue rescuer. %false otherwise.
Tejun Heoe62676162013-03-12 17:41:37 -07004449 */
4450bool current_is_workqueue_rescuer(void)
4451{
4452 struct worker *worker = current_wq_worker();
4453
Lai Jiangshan6a092df2013-03-20 03:28:03 +08004454 return worker && worker->rescue_wq;
Tejun Heoe62676162013-03-12 17:41:37 -07004455}
4456
4457/**
Tejun Heodcd989c2010-06-29 10:07:14 +02004458 * workqueue_congested - test whether a workqueue is congested
4459 * @cpu: CPU in question
4460 * @wq: target workqueue
4461 *
4462 * Test whether @wq's cpu workqueue for @cpu is congested. There is
4463 * no synchronization around this function and the test result is
4464 * unreliable and only useful as advisory hints or for debugging.
4465 *
Tejun Heod3251852013-05-10 11:10:17 -07004466 * If @cpu is WORK_CPU_UNBOUND, the test is performed on the local CPU.
4467 * Note that both per-cpu and unbound workqueues may be associated with
4468 * multiple pool_workqueues which have separate congested states. A
4469 * workqueue being congested on one CPU doesn't mean the workqueue is also
4470 * contested on other CPUs / NUMA nodes.
4471 *
Yacine Belkadid185af32013-07-31 14:59:24 -07004472 * Return:
Tejun Heodcd989c2010-06-29 10:07:14 +02004473 * %true if congested, %false otherwise.
4474 */
Tejun Heod84ff052013-03-12 11:29:59 -07004475bool workqueue_congested(int cpu, struct workqueue_struct *wq)
Tejun Heodcd989c2010-06-29 10:07:14 +02004476{
Tejun Heo7fb98ea2013-03-12 11:30:00 -07004477 struct pool_workqueue *pwq;
Tejun Heo76af4d92013-03-12 11:30:00 -07004478 bool ret;
4479
Thomas Gleixner24acfb72019-03-13 17:55:47 +01004480 rcu_read_lock();
4481 preempt_disable();
Tejun Heo7fb98ea2013-03-12 11:30:00 -07004482
Tejun Heod3251852013-05-10 11:10:17 -07004483 if (cpu == WORK_CPU_UNBOUND)
4484 cpu = smp_processor_id();
4485
Tejun Heo7fb98ea2013-03-12 11:30:00 -07004486 if (!(wq->flags & WQ_UNBOUND))
4487 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
4488 else
Tejun Heodf2d5ae2013-04-01 11:23:35 -07004489 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
Tejun Heodcd989c2010-06-29 10:07:14 +02004490
Tejun Heo76af4d92013-03-12 11:30:00 -07004491 ret = !list_empty(&pwq->delayed_works);
Thomas Gleixner24acfb72019-03-13 17:55:47 +01004492 preempt_enable();
4493 rcu_read_unlock();
Tejun Heo76af4d92013-03-12 11:30:00 -07004494
4495 return ret;
Tejun Heodcd989c2010-06-29 10:07:14 +02004496}
4497EXPORT_SYMBOL_GPL(workqueue_congested);
4498
4499/**
Tejun Heodcd989c2010-06-29 10:07:14 +02004500 * work_busy - test whether a work is currently pending or running
4501 * @work: the work to be tested
4502 *
4503 * Test whether @work is currently pending or running. There is no
4504 * synchronization around this function and the test result is
4505 * unreliable and only useful as advisory hints or for debugging.
Tejun Heodcd989c2010-06-29 10:07:14 +02004506 *
Yacine Belkadid185af32013-07-31 14:59:24 -07004507 * Return:
Tejun Heodcd989c2010-06-29 10:07:14 +02004508 * OR'd bitmask of WORK_BUSY_* bits.
4509 */
4510unsigned int work_busy(struct work_struct *work)
4511{
Tejun Heofa1b54e2013-03-12 11:30:00 -07004512 struct worker_pool *pool;
Tejun Heodcd989c2010-06-29 10:07:14 +02004513 unsigned long flags;
4514 unsigned int ret = 0;
4515
Tejun Heodcd989c2010-06-29 10:07:14 +02004516 if (work_pending(work))
4517 ret |= WORK_BUSY_PENDING;
Tejun Heodcd989c2010-06-29 10:07:14 +02004518
Thomas Gleixner24acfb72019-03-13 17:55:47 +01004519 rcu_read_lock();
Tejun Heofa1b54e2013-03-12 11:30:00 -07004520 pool = get_work_pool(work);
Lai Jiangshan038366c2013-02-06 18:04:53 -08004521 if (pool) {
Thomas Gleixner24acfb72019-03-13 17:55:47 +01004522 spin_lock_irqsave(&pool->lock, flags);
Lai Jiangshan038366c2013-02-06 18:04:53 -08004523 if (find_worker_executing_work(pool, work))
4524 ret |= WORK_BUSY_RUNNING;
Thomas Gleixner24acfb72019-03-13 17:55:47 +01004525 spin_unlock_irqrestore(&pool->lock, flags);
Lai Jiangshan038366c2013-02-06 18:04:53 -08004526 }
Thomas Gleixner24acfb72019-03-13 17:55:47 +01004527 rcu_read_unlock();
Tejun Heodcd989c2010-06-29 10:07:14 +02004528
4529 return ret;
4530}
4531EXPORT_SYMBOL_GPL(work_busy);
4532
Tejun Heo3d1cb202013-04-30 15:27:22 -07004533/**
4534 * set_worker_desc - set description for the current work item
4535 * @fmt: printf-style format string
4536 * @...: arguments for the format string
4537 *
4538 * This function can be called by a running work function to describe what
4539 * the work item is about. If the worker task gets dumped, this
4540 * information will be printed out together to help debugging. The
4541 * description can be at most WORKER_DESC_LEN including the trailing '\0'.
4542 */
4543void set_worker_desc(const char *fmt, ...)
4544{
4545 struct worker *worker = current_wq_worker();
4546 va_list args;
4547
4548 if (worker) {
4549 va_start(args, fmt);
4550 vsnprintf(worker->desc, sizeof(worker->desc), fmt, args);
4551 va_end(args);
Tejun Heo3d1cb202013-04-30 15:27:22 -07004552 }
4553}
Steffen Maier5c750d52018-05-17 19:14:57 +02004554EXPORT_SYMBOL_GPL(set_worker_desc);
Tejun Heo3d1cb202013-04-30 15:27:22 -07004555
4556/**
4557 * print_worker_info - print out worker information and description
4558 * @log_lvl: the log level to use when printing
4559 * @task: target task
4560 *
4561 * If @task is a worker and currently executing a work item, print out the
4562 * name of the workqueue being serviced and worker description set with
4563 * set_worker_desc() by the currently executing work item.
4564 *
4565 * This function can be safely called on any task as long as the
4566 * task_struct itself is accessible. While safe, this function isn't
4567 * synchronized and may print out mixups or garbages of limited length.
4568 */
4569void print_worker_info(const char *log_lvl, struct task_struct *task)
4570{
4571 work_func_t *fn = NULL;
4572 char name[WQ_NAME_LEN] = { };
4573 char desc[WORKER_DESC_LEN] = { };
4574 struct pool_workqueue *pwq = NULL;
4575 struct workqueue_struct *wq = NULL;
Tejun Heo3d1cb202013-04-30 15:27:22 -07004576 struct worker *worker;
4577
4578 if (!(task->flags & PF_WQ_WORKER))
4579 return;
4580
4581 /*
4582 * This function is called without any synchronization and @task
4583 * could be in any state. Be careful with dereferences.
4584 */
Petr Mladeke7005912016-10-11 13:55:17 -07004585 worker = kthread_probe_data(task);
Tejun Heo3d1cb202013-04-30 15:27:22 -07004586
4587 /*
Tejun Heo8bf89592018-05-18 08:47:13 -07004588 * Carefully copy the associated workqueue's workfn, name and desc.
4589 * Keep the original last '\0' in case the original is garbage.
Tejun Heo3d1cb202013-04-30 15:27:22 -07004590 */
4591 probe_kernel_read(&fn, &worker->current_func, sizeof(fn));
4592 probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq));
4593 probe_kernel_read(&wq, &pwq->wq, sizeof(wq));
4594 probe_kernel_read(name, wq->name, sizeof(name) - 1);
Tejun Heo8bf89592018-05-18 08:47:13 -07004595 probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
Tejun Heo3d1cb202013-04-30 15:27:22 -07004596
4597 if (fn || name[0] || desc[0]) {
4598 printk("%sWorkqueue: %s %pf", log_lvl, name, fn);
Tejun Heo8bf89592018-05-18 08:47:13 -07004599 if (strcmp(name, desc))
Tejun Heo3d1cb202013-04-30 15:27:22 -07004600 pr_cont(" (%s)", desc);
4601 pr_cont("\n");
4602 }
4603}
4604
Tejun Heo3494fc32015-03-09 09:22:28 -04004605static void pr_cont_pool_info(struct worker_pool *pool)
4606{
4607 pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask);
4608 if (pool->node != NUMA_NO_NODE)
4609 pr_cont(" node=%d", pool->node);
4610 pr_cont(" flags=0x%x nice=%d", pool->flags, pool->attrs->nice);
4611}
4612
4613static void pr_cont_work(bool comma, struct work_struct *work)
4614{
4615 if (work->func == wq_barrier_func) {
4616 struct wq_barrier *barr;
4617
4618 barr = container_of(work, struct wq_barrier, work);
4619
4620 pr_cont("%s BAR(%d)", comma ? "," : "",
4621 task_pid_nr(barr->task));
4622 } else {
4623 pr_cont("%s %pf", comma ? "," : "", work->func);
4624 }
4625}
4626
4627static void show_pwq(struct pool_workqueue *pwq)
4628{
4629 struct worker_pool *pool = pwq->pool;
4630 struct work_struct *work;
4631 struct worker *worker;
4632 bool has_in_flight = false, has_pending = false;
4633 int bkt;
4634
4635 pr_info(" pwq %d:", pool->id);
4636 pr_cont_pool_info(pool);
4637
4638 pr_cont(" active=%d/%d%s\n", pwq->nr_active, pwq->max_active,
4639 !list_empty(&pwq->mayday_node) ? " MAYDAY" : "");
4640
4641 hash_for_each(pool->busy_hash, bkt, worker, hentry) {
4642 if (worker->current_pwq == pwq) {
4643 has_in_flight = true;
4644 break;
4645 }
4646 }
4647 if (has_in_flight) {
4648 bool comma = false;
4649
4650 pr_info(" in-flight:");
4651 hash_for_each(pool->busy_hash, bkt, worker, hentry) {
4652 if (worker->current_pwq != pwq)
4653 continue;
4654
4655 pr_cont("%s %d%s:%pf", comma ? "," : "",
4656 task_pid_nr(worker->task),
4657 worker == pwq->wq->rescuer ? "(RESCUER)" : "",
4658 worker->current_func);
4659 list_for_each_entry(work, &worker->scheduled, entry)
4660 pr_cont_work(false, work);
4661 comma = true;
4662 }
4663 pr_cont("\n");
4664 }
4665
4666 list_for_each_entry(work, &pool->worklist, entry) {
4667 if (get_work_pwq(work) == pwq) {
4668 has_pending = true;
4669 break;
4670 }
4671 }
4672 if (has_pending) {
4673 bool comma = false;
4674
4675 pr_info(" pending:");
4676 list_for_each_entry(work, &pool->worklist, entry) {
4677 if (get_work_pwq(work) != pwq)
4678 continue;
4679
4680 pr_cont_work(comma, work);
4681 comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
4682 }
4683 pr_cont("\n");
4684 }
4685
4686 if (!list_empty(&pwq->delayed_works)) {
4687 bool comma = false;
4688
4689 pr_info(" delayed:");
4690 list_for_each_entry(work, &pwq->delayed_works, entry) {
4691 pr_cont_work(comma, work);
4692 comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
4693 }
4694 pr_cont("\n");
4695 }
4696}
4697
4698/**
4699 * show_workqueue_state - dump workqueue state
4700 *
Roger Lu7b776af2016-07-01 11:05:02 +08004701 * Called from a sysrq handler or try_to_freeze_tasks() and prints out
4702 * all busy workqueues and pools.
Tejun Heo3494fc32015-03-09 09:22:28 -04004703 */
4704void show_workqueue_state(void)
4705{
4706 struct workqueue_struct *wq;
4707 struct worker_pool *pool;
4708 unsigned long flags;
4709 int pi;
4710
Thomas Gleixner24acfb72019-03-13 17:55:47 +01004711 rcu_read_lock();
Tejun Heo3494fc32015-03-09 09:22:28 -04004712
4713 pr_info("Showing busy workqueues and worker pools:\n");
4714
4715 list_for_each_entry_rcu(wq, &workqueues, list) {
4716 struct pool_workqueue *pwq;
4717 bool idle = true;
4718
4719 for_each_pwq(pwq, wq) {
4720 if (pwq->nr_active || !list_empty(&pwq->delayed_works)) {
4721 idle = false;
4722 break;
4723 }
4724 }
4725 if (idle)
4726 continue;
4727
4728 pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
4729
4730 for_each_pwq(pwq, wq) {
4731 spin_lock_irqsave(&pwq->pool->lock, flags);
4732 if (pwq->nr_active || !list_empty(&pwq->delayed_works))
4733 show_pwq(pwq);
4734 spin_unlock_irqrestore(&pwq->pool->lock, flags);
Sergey Senozhatsky62635ea2018-01-11 09:53:35 +09004735 /*
4736 * We could be printing a lot from atomic context, e.g.
4737 * sysrq-t -> show_workqueue_state(). Avoid triggering
4738 * hard lockup.
4739 */
4740 touch_nmi_watchdog();
Tejun Heo3494fc32015-03-09 09:22:28 -04004741 }
4742 }
4743
4744 for_each_pool(pool, pi) {
4745 struct worker *worker;
4746 bool first = true;
4747
4748 spin_lock_irqsave(&pool->lock, flags);
4749 if (pool->nr_workers == pool->nr_idle)
4750 goto next_pool;
4751
4752 pr_info("pool %d:", pool->id);
4753 pr_cont_pool_info(pool);
Tejun Heo82607adc2015-12-08 11:28:04 -05004754 pr_cont(" hung=%us workers=%d",
4755 jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000,
4756 pool->nr_workers);
Tejun Heo3494fc32015-03-09 09:22:28 -04004757 if (pool->manager)
4758 pr_cont(" manager: %d",
4759 task_pid_nr(pool->manager->task));
4760 list_for_each_entry(worker, &pool->idle_list, entry) {
4761 pr_cont(" %s%d", first ? "idle: " : "",
4762 task_pid_nr(worker->task));
4763 first = false;
4764 }
4765 pr_cont("\n");
4766 next_pool:
4767 spin_unlock_irqrestore(&pool->lock, flags);
Sergey Senozhatsky62635ea2018-01-11 09:53:35 +09004768 /*
4769 * We could be printing a lot from atomic context, e.g.
4770 * sysrq-t -> show_workqueue_state(). Avoid triggering
4771 * hard lockup.
4772 */
4773 touch_nmi_watchdog();
Tejun Heo3494fc32015-03-09 09:22:28 -04004774 }
4775
Thomas Gleixner24acfb72019-03-13 17:55:47 +01004776 rcu_read_unlock();
Tejun Heo3494fc32015-03-09 09:22:28 -04004777}
4778
Tejun Heo6b598082018-05-18 08:47:13 -07004779/* used to show worker information through /proc/PID/{comm,stat,status} */
4780void wq_worker_comm(char *buf, size_t size, struct task_struct *task)
4781{
Tejun Heo6b598082018-05-18 08:47:13 -07004782 int off;
4783
4784 /* always show the actual comm */
4785 off = strscpy(buf, task->comm, size);
4786 if (off < 0)
4787 return;
4788
Tejun Heo197f6ac2018-05-21 08:04:35 -07004789 /* stabilize PF_WQ_WORKER and worker pool association */
Tejun Heo6b598082018-05-18 08:47:13 -07004790 mutex_lock(&wq_pool_attach_mutex);
4791
Tejun Heo197f6ac2018-05-21 08:04:35 -07004792 if (task->flags & PF_WQ_WORKER) {
4793 struct worker *worker = kthread_data(task);
4794 struct worker_pool *pool = worker->pool;
Tejun Heo6b598082018-05-18 08:47:13 -07004795
Tejun Heo197f6ac2018-05-21 08:04:35 -07004796 if (pool) {
4797 spin_lock_irq(&pool->lock);
4798 /*
4799 * ->desc tracks information (wq name or
4800 * set_worker_desc()) for the latest execution. If
4801 * current, prepend '+', otherwise '-'.
4802 */
4803 if (worker->desc[0] != '\0') {
4804 if (worker->current_work)
4805 scnprintf(buf + off, size - off, "+%s",
4806 worker->desc);
4807 else
4808 scnprintf(buf + off, size - off, "-%s",
4809 worker->desc);
4810 }
4811 spin_unlock_irq(&pool->lock);
Tejun Heo6b598082018-05-18 08:47:13 -07004812 }
Tejun Heo6b598082018-05-18 08:47:13 -07004813 }
4814
4815 mutex_unlock(&wq_pool_attach_mutex);
4816}
4817
Mathieu Malaterre66448bc2018-05-22 21:47:32 +02004818#ifdef CONFIG_SMP
4819
Tejun Heodb7bccf2010-06-29 10:07:12 +02004820/*
4821 * CPU hotplug.
4822 *
Tejun Heoe22bee72010-06-29 10:07:14 +02004823 * There are two challenges in supporting CPU hotplug. Firstly, there
Tejun Heo112202d2013-02-13 19:29:12 -08004824 * are a lot of assumptions on strong associations among work, pwq and
Tejun Heo706026c2013-01-24 11:01:34 -08004825 * pool which make migrating pending and scheduled works very
Tejun Heoe22bee72010-06-29 10:07:14 +02004826 * difficult to implement without impacting hot paths. Secondly,
Tejun Heo94cf58b2013-01-24 11:01:33 -08004827 * worker pools serve mix of short, long and very long running works making
Tejun Heoe22bee72010-06-29 10:07:14 +02004828 * blocked draining impractical.
4829 *
Tejun Heo24647572013-01-24 11:01:33 -08004830 * This is solved by allowing the pools to be disassociated from the CPU
Tejun Heo628c78e2012-07-17 12:39:27 -07004831 * running as an unbound one and allowing it to be reattached later if the
4832 * cpu comes back online.
Tejun Heodb7bccf2010-06-29 10:07:12 +02004833 */
4834
Lai Jiangshane8b3f8d2017-12-01 22:20:36 +08004835static void unbind_workers(int cpu)
Tejun Heodb7bccf2010-06-29 10:07:12 +02004836{
Tejun Heo4ce62e92012-07-13 22:16:44 -07004837 struct worker_pool *pool;
Tejun Heodb7bccf2010-06-29 10:07:12 +02004838 struct worker *worker;
Tejun Heodb7bccf2010-06-29 10:07:12 +02004839
Tejun Heof02ae732013-03-12 11:30:03 -07004840 for_each_cpu_worker_pool(pool, cpu) {
Tejun Heo1258fae2018-05-18 08:47:13 -07004841 mutex_lock(&wq_pool_attach_mutex);
Tejun Heo94cf58b2013-01-24 11:01:33 -08004842 spin_lock_irq(&pool->lock);
4843
4844 /*
Lai Jiangshan92f9c5c2014-05-20 17:46:34 +08004845 * We've blocked all attach/detach operations. Make all workers
Tejun Heo94cf58b2013-01-24 11:01:33 -08004846 * unbound and set DISASSOCIATED. Before this, all workers
4847 * except for the ones which are still executing works from
4848 * before the last CPU down must be on the cpu. After
4849 * this, they may become diasporas.
4850 */
Lai Jiangshanda028462014-05-20 17:46:31 +08004851 for_each_pool_worker(worker, pool)
Tejun Heoc9e7cf22013-01-24 11:01:33 -08004852 worker->flags |= WORKER_UNBOUND;
Tejun Heodb7bccf2010-06-29 10:07:12 +02004853
Tejun Heo24647572013-01-24 11:01:33 -08004854 pool->flags |= POOL_DISASSOCIATED;
Tejun Heof2d5a0e2012-07-17 12:39:26 -07004855
Tejun Heo94cf58b2013-01-24 11:01:33 -08004856 spin_unlock_irq(&pool->lock);
Tejun Heo1258fae2018-05-18 08:47:13 -07004857 mutex_unlock(&wq_pool_attach_mutex);
Tejun Heoe22bee72010-06-29 10:07:14 +02004858
Lai Jiangshaneb283422013-03-08 15:18:28 -08004859 /*
4860 * Call schedule() so that we cross rq->lock and thus can
4861 * guarantee sched callbacks see the %WORKER_UNBOUND flag.
4862 * This is necessary as scheduler callbacks may be invoked
4863 * from other cpus.
4864 */
4865 schedule();
Tejun Heo628c78e2012-07-17 12:39:27 -07004866
Lai Jiangshaneb283422013-03-08 15:18:28 -08004867 /*
4868 * Sched callbacks are disabled now. Zap nr_running.
4869 * After this, nr_running stays zero and need_more_worker()
4870 * and keep_working() are always true as long as the
4871 * worklist is not empty. This pool now behaves as an
4872 * unbound (in terms of concurrency management) pool which
4873 * are served by workers tied to the pool.
4874 */
Tejun Heoe19e3972013-01-24 11:39:44 -08004875 atomic_set(&pool->nr_running, 0);
Lai Jiangshaneb283422013-03-08 15:18:28 -08004876
4877 /*
4878 * With concurrency management just turned off, a busy
4879 * worker blocking could lead to lengthy stalls. Kick off
4880 * unbound chain execution of currently pending work items.
4881 */
4882 spin_lock_irq(&pool->lock);
4883 wake_up_worker(pool);
4884 spin_unlock_irq(&pool->lock);
4885 }
Tejun Heodb7bccf2010-06-29 10:07:12 +02004886}
4887
Tejun Heobd7c0892013-03-19 13:45:21 -07004888/**
4889 * rebind_workers - rebind all workers of a pool to the associated CPU
4890 * @pool: pool of interest
4891 *
Tejun Heoa9ab7752013-03-19 13:45:21 -07004892 * @pool->cpu is coming online. Rebind all workers to the CPU.
Tejun Heobd7c0892013-03-19 13:45:21 -07004893 */
4894static void rebind_workers(struct worker_pool *pool)
4895{
Tejun Heoa9ab7752013-03-19 13:45:21 -07004896 struct worker *worker;
Tejun Heobd7c0892013-03-19 13:45:21 -07004897
Tejun Heo1258fae2018-05-18 08:47:13 -07004898 lockdep_assert_held(&wq_pool_attach_mutex);
Tejun Heobd7c0892013-03-19 13:45:21 -07004899
Tejun Heoa9ab7752013-03-19 13:45:21 -07004900 /*
4901 * Restore CPU affinity of all workers. As all idle workers should
4902 * be on the run-queue of the associated CPU before any local
Shailendra Verma402dd892015-05-23 10:38:14 +05304903 * wake-ups for concurrency management happen, restore CPU affinity
Tejun Heoa9ab7752013-03-19 13:45:21 -07004904 * of all workers first and then clear UNBOUND. As we're called
4905 * from CPU_ONLINE, the following shouldn't fail.
4906 */
Lai Jiangshanda028462014-05-20 17:46:31 +08004907 for_each_pool_worker(worker, pool)
Tejun Heoa9ab7752013-03-19 13:45:21 -07004908 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
4909 pool->attrs->cpumask) < 0);
4910
4911 spin_lock_irq(&pool->lock);
Wanpeng Lif7c17d22016-05-11 17:55:18 +08004912
Lai Jiangshan3de5e882014-06-03 15:33:27 +08004913 pool->flags &= ~POOL_DISASSOCIATED;
Tejun Heoa9ab7752013-03-19 13:45:21 -07004914
Lai Jiangshanda028462014-05-20 17:46:31 +08004915 for_each_pool_worker(worker, pool) {
Tejun Heoa9ab7752013-03-19 13:45:21 -07004916 unsigned int worker_flags = worker->flags;
Tejun Heobd7c0892013-03-19 13:45:21 -07004917
4918 /*
Tejun Heoa9ab7752013-03-19 13:45:21 -07004919 * A bound idle worker should actually be on the runqueue
4920 * of the associated CPU for local wake-ups targeting it to
4921 * work. Kick all idle workers so that they migrate to the
4922 * associated CPU. Doing this in the same loop as
4923 * replacing UNBOUND with REBOUND is safe as no worker will
4924 * be bound before @pool->lock is released.
Tejun Heobd7c0892013-03-19 13:45:21 -07004925 */
Tejun Heoa9ab7752013-03-19 13:45:21 -07004926 if (worker_flags & WORKER_IDLE)
4927 wake_up_process(worker->task);
4928
4929 /*
4930 * We want to clear UNBOUND but can't directly call
4931 * worker_clr_flags() or adjust nr_running. Atomically
4932 * replace UNBOUND with another NOT_RUNNING flag REBOUND.
4933 * @worker will clear REBOUND using worker_clr_flags() when
4934 * it initiates the next execution cycle thus restoring
4935 * concurrency management. Note that when or whether
4936 * @worker clears REBOUND doesn't affect correctness.
4937 *
Mark Rutlandc95491e2017-10-23 14:07:22 -07004938 * WRITE_ONCE() is necessary because @worker->flags may be
Tejun Heoa9ab7752013-03-19 13:45:21 -07004939 * tested without holding any lock in
4940 * wq_worker_waking_up(). Without it, NOT_RUNNING test may
4941 * fail incorrectly leading to premature concurrency
4942 * management operations.
4943 */
4944 WARN_ON_ONCE(!(worker_flags & WORKER_UNBOUND));
4945 worker_flags |= WORKER_REBOUND;
4946 worker_flags &= ~WORKER_UNBOUND;
Mark Rutlandc95491e2017-10-23 14:07:22 -07004947 WRITE_ONCE(worker->flags, worker_flags);
Tejun Heobd7c0892013-03-19 13:45:21 -07004948 }
4949
Tejun Heoa9ab7752013-03-19 13:45:21 -07004950 spin_unlock_irq(&pool->lock);
Tejun Heobd7c0892013-03-19 13:45:21 -07004951}
4952
Tejun Heo7dbc7252013-03-19 13:45:21 -07004953/**
4954 * restore_unbound_workers_cpumask - restore cpumask of unbound workers
4955 * @pool: unbound pool of interest
4956 * @cpu: the CPU which is coming up
4957 *
4958 * An unbound pool may end up with a cpumask which doesn't have any online
4959 * CPUs. When a worker of such pool get scheduled, the scheduler resets
4960 * its cpus_allowed. If @cpu is in @pool's cpumask which didn't have any
4961 * online CPU before, cpus_allowed of all its workers should be restored.
4962 */
4963static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
4964{
4965 static cpumask_t cpumask;
4966 struct worker *worker;
Tejun Heo7dbc7252013-03-19 13:45:21 -07004967
Tejun Heo1258fae2018-05-18 08:47:13 -07004968 lockdep_assert_held(&wq_pool_attach_mutex);
Tejun Heo7dbc7252013-03-19 13:45:21 -07004969
4970 /* is @cpu allowed for @pool? */
4971 if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
4972 return;
4973
Tejun Heo7dbc7252013-03-19 13:45:21 -07004974 cpumask_and(&cpumask, pool->attrs->cpumask, cpu_online_mask);
Tejun Heo7dbc7252013-03-19 13:45:21 -07004975
4976 /* as we're called from CPU_ONLINE, the following shouldn't fail */
Lai Jiangshanda028462014-05-20 17:46:31 +08004977 for_each_pool_worker(worker, pool)
Peter Zijlstrad945b5e2016-06-16 14:38:42 +02004978 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, &cpumask) < 0);
Tejun Heo7dbc7252013-03-19 13:45:21 -07004979}
4980
Thomas Gleixner7ee681b2016-07-13 17:16:29 +00004981int workqueue_prepare_cpu(unsigned int cpu)
Oleg Nesterov3af244332007-05-09 02:34:09 -07004982{
Thomas Gleixner7ee681b2016-07-13 17:16:29 +00004983 struct worker_pool *pool;
4984
4985 for_each_cpu_worker_pool(pool, cpu) {
4986 if (pool->nr_workers)
4987 continue;
4988 if (!create_worker(pool))
4989 return -ENOMEM;
4990 }
4991 return 0;
4992}
4993
4994int workqueue_online_cpu(unsigned int cpu)
4995{
Tejun Heo4ce62e92012-07-13 22:16:44 -07004996 struct worker_pool *pool;
Tejun Heo4c16bd32013-04-01 11:23:36 -07004997 struct workqueue_struct *wq;
Tejun Heo7dbc7252013-03-19 13:45:21 -07004998 int pi;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004999
Thomas Gleixner7ee681b2016-07-13 17:16:29 +00005000 mutex_lock(&wq_pool_mutex);
Oleg Nesterov00dfcaf2008-04-29 01:00:27 -07005001
Thomas Gleixner7ee681b2016-07-13 17:16:29 +00005002 for_each_pool(pool, pi) {
Tejun Heo1258fae2018-05-18 08:47:13 -07005003 mutex_lock(&wq_pool_attach_mutex);
Tejun Heo7dbc7252013-03-19 13:45:21 -07005004
Thomas Gleixner7ee681b2016-07-13 17:16:29 +00005005 if (pool->cpu == cpu)
5006 rebind_workers(pool);
5007 else if (pool->cpu < 0)
5008 restore_unbound_workers_cpumask(pool, cpu);
Tejun Heo94cf58b2013-01-24 11:01:33 -08005009
Tejun Heo1258fae2018-05-18 08:47:13 -07005010 mutex_unlock(&wq_pool_attach_mutex);
Tejun Heo65758202012-07-17 12:39:26 -07005011 }
Thomas Gleixner7ee681b2016-07-13 17:16:29 +00005012
5013 /* update NUMA affinity of unbound workqueues */
5014 list_for_each_entry(wq, &workqueues, list)
5015 wq_update_unbound_numa(wq, cpu, true);
5016
5017 mutex_unlock(&wq_pool_mutex);
5018 return 0;
Tejun Heo65758202012-07-17 12:39:26 -07005019}
5020
Thomas Gleixner7ee681b2016-07-13 17:16:29 +00005021int workqueue_offline_cpu(unsigned int cpu)
Tejun Heo65758202012-07-17 12:39:26 -07005022{
Tejun Heo4c16bd32013-04-01 11:23:36 -07005023 struct workqueue_struct *wq;
Tejun Heo8db25e72012-07-17 12:39:28 -07005024
Thomas Gleixner7ee681b2016-07-13 17:16:29 +00005025 /* unbinding per-cpu workers should happen on the local CPU */
Lai Jiangshane8b3f8d2017-12-01 22:20:36 +08005026 if (WARN_ON(cpu != smp_processor_id()))
5027 return -1;
5028
5029 unbind_workers(cpu);
Tejun Heo4c16bd32013-04-01 11:23:36 -07005030
Thomas Gleixner7ee681b2016-07-13 17:16:29 +00005031 /* update NUMA affinity of unbound workqueues */
5032 mutex_lock(&wq_pool_mutex);
5033 list_for_each_entry(wq, &workqueues, list)
5034 wq_update_unbound_numa(wq, cpu, false);
5035 mutex_unlock(&wq_pool_mutex);
Tejun Heo4c16bd32013-04-01 11:23:36 -07005036
Thomas Gleixner7ee681b2016-07-13 17:16:29 +00005037 return 0;
Tejun Heo65758202012-07-17 12:39:26 -07005038}
5039
Rusty Russell2d3854a2008-11-05 13:39:10 +11005040struct work_for_cpu {
Tejun Heoed48ece2012-09-18 12:48:43 -07005041 struct work_struct work;
Rusty Russell2d3854a2008-11-05 13:39:10 +11005042 long (*fn)(void *);
5043 void *arg;
5044 long ret;
5045};
5046
Tejun Heoed48ece2012-09-18 12:48:43 -07005047static void work_for_cpu_fn(struct work_struct *work)
Rusty Russell2d3854a2008-11-05 13:39:10 +11005048{
Tejun Heoed48ece2012-09-18 12:48:43 -07005049 struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work);
5050
Rusty Russell2d3854a2008-11-05 13:39:10 +11005051 wfc->ret = wfc->fn(wfc->arg);
5052}
5053
5054/**
Anna-Maria Gleixner22aceb32016-03-10 12:07:38 +01005055 * work_on_cpu - run a function in thread context on a particular cpu
Rusty Russell2d3854a2008-11-05 13:39:10 +11005056 * @cpu: the cpu to run on
5057 * @fn: the function to run
5058 * @arg: the function arg
5059 *
Rusty Russell31ad9082009-01-16 15:31:15 -08005060 * It is up to the caller to ensure that the cpu doesn't go offline.
Andrew Morton6b44003e2009-04-09 09:50:37 -06005061 * The caller must not hold any locks which would prevent @fn from completing.
Yacine Belkadid185af32013-07-31 14:59:24 -07005062 *
5063 * Return: The value @fn returns.
Rusty Russell2d3854a2008-11-05 13:39:10 +11005064 */
Tejun Heod84ff052013-03-12 11:29:59 -07005065long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
Rusty Russell2d3854a2008-11-05 13:39:10 +11005066{
Tejun Heoed48ece2012-09-18 12:48:43 -07005067 struct work_for_cpu wfc = { .fn = fn, .arg = arg };
Rusty Russell2d3854a2008-11-05 13:39:10 +11005068
Tejun Heoed48ece2012-09-18 12:48:43 -07005069 INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
5070 schedule_work_on(cpu, &wfc.work);
Bjorn Helgaas12997d12013-11-18 11:00:29 -07005071 flush_work(&wfc.work);
Chuansheng Liu440a1132014-01-11 22:26:33 -05005072 destroy_work_on_stack(&wfc.work);
Rusty Russell2d3854a2008-11-05 13:39:10 +11005073 return wfc.ret;
5074}
5075EXPORT_SYMBOL_GPL(work_on_cpu);
Thomas Gleixner0e8d6a92017-04-12 22:07:28 +02005076
5077/**
5078 * work_on_cpu_safe - run a function in thread context on a particular cpu
5079 * @cpu: the cpu to run on
5080 * @fn: the function to run
5081 * @arg: the function argument
5082 *
5083 * Disables CPU hotplug and calls work_on_cpu(). The caller must not hold
5084 * any locks which would prevent @fn from completing.
5085 *
5086 * Return: The value @fn returns.
5087 */
5088long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg)
5089{
5090 long ret = -ENODEV;
5091
5092 get_online_cpus();
5093 if (cpu_online(cpu))
5094 ret = work_on_cpu(cpu, fn, arg);
5095 put_online_cpus();
5096 return ret;
5097}
5098EXPORT_SYMBOL_GPL(work_on_cpu_safe);
Rusty Russell2d3854a2008-11-05 13:39:10 +11005099#endif /* CONFIG_SMP */
5100
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02005101#ifdef CONFIG_FREEZER
Rusty Russelle7577c52009-01-01 10:12:25 +10305102
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02005103/**
5104 * freeze_workqueues_begin - begin freezing workqueues
5105 *
Tejun Heo58a69cb2011-02-16 09:25:31 +01005106 * Start freezing workqueues. After this function returns, all freezable
Tejun Heoc5aa87b2013-03-13 16:51:36 -07005107 * workqueues will queue new works to their delayed_works list instead of
Tejun Heo706026c2013-01-24 11:01:34 -08005108 * pool->worklist.
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02005109 *
5110 * CONTEXT:
Lai Jiangshana357fc02013-03-25 16:57:19 -07005111 * Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's.
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02005112 */
5113void freeze_workqueues_begin(void)
5114{
Tejun Heo24b8a842013-03-12 11:29:58 -07005115 struct workqueue_struct *wq;
5116 struct pool_workqueue *pwq;
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02005117
Lai Jiangshan68e13a62013-03-25 16:57:17 -07005118 mutex_lock(&wq_pool_mutex);
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02005119
Tejun Heo6183c002013-03-12 11:29:57 -07005120 WARN_ON_ONCE(workqueue_freezing);
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02005121 workqueue_freezing = true;
5122
Tejun Heo24b8a842013-03-12 11:29:58 -07005123 list_for_each_entry(wq, &workqueues, list) {
Lai Jiangshana357fc02013-03-25 16:57:19 -07005124 mutex_lock(&wq->mutex);
Tejun Heo699ce092013-03-13 16:51:35 -07005125 for_each_pwq(pwq, wq)
5126 pwq_adjust_max_active(pwq);
Lai Jiangshana357fc02013-03-25 16:57:19 -07005127 mutex_unlock(&wq->mutex);
Tejun Heo24b8a842013-03-12 11:29:58 -07005128 }
Tejun Heo5bcab332013-03-13 19:47:40 -07005129
Lai Jiangshan68e13a62013-03-25 16:57:17 -07005130 mutex_unlock(&wq_pool_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005131}
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02005132
5133/**
Tejun Heo58a69cb2011-02-16 09:25:31 +01005134 * freeze_workqueues_busy - are freezable workqueues still busy?
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02005135 *
5136 * Check whether freezing is complete. This function must be called
5137 * between freeze_workqueues_begin() and thaw_workqueues().
5138 *
5139 * CONTEXT:
Lai Jiangshan68e13a62013-03-25 16:57:17 -07005140 * Grabs and releases wq_pool_mutex.
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02005141 *
Yacine Belkadid185af32013-07-31 14:59:24 -07005142 * Return:
Tejun Heo58a69cb2011-02-16 09:25:31 +01005143 * %true if some freezable workqueues are still busy. %false if freezing
5144 * is complete.
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02005145 */
5146bool freeze_workqueues_busy(void)
5147{
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02005148 bool busy = false;
Tejun Heo24b8a842013-03-12 11:29:58 -07005149 struct workqueue_struct *wq;
5150 struct pool_workqueue *pwq;
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02005151
Lai Jiangshan68e13a62013-03-25 16:57:17 -07005152 mutex_lock(&wq_pool_mutex);
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02005153
Tejun Heo6183c002013-03-12 11:29:57 -07005154 WARN_ON_ONCE(!workqueue_freezing);
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02005155
Tejun Heo24b8a842013-03-12 11:29:58 -07005156 list_for_each_entry(wq, &workqueues, list) {
5157 if (!(wq->flags & WQ_FREEZABLE))
5158 continue;
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02005159 /*
5160 * nr_active is monotonically decreasing. It's safe
5161 * to peek without lock.
5162 */
Thomas Gleixner24acfb72019-03-13 17:55:47 +01005163 rcu_read_lock();
Tejun Heo24b8a842013-03-12 11:29:58 -07005164 for_each_pwq(pwq, wq) {
Tejun Heo6183c002013-03-12 11:29:57 -07005165 WARN_ON_ONCE(pwq->nr_active < 0);
Tejun Heo112202d2013-02-13 19:29:12 -08005166 if (pwq->nr_active) {
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02005167 busy = true;
Thomas Gleixner24acfb72019-03-13 17:55:47 +01005168 rcu_read_unlock();
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02005169 goto out_unlock;
5170 }
5171 }
Thomas Gleixner24acfb72019-03-13 17:55:47 +01005172 rcu_read_unlock();
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02005173 }
5174out_unlock:
Lai Jiangshan68e13a62013-03-25 16:57:17 -07005175 mutex_unlock(&wq_pool_mutex);
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02005176 return busy;
5177}
5178
5179/**
5180 * thaw_workqueues - thaw workqueues
5181 *
5182 * Thaw workqueues. Normal queueing is restored and all collected
Tejun Heo706026c2013-01-24 11:01:34 -08005183 * frozen works are transferred to their respective pool worklists.
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02005184 *
5185 * CONTEXT:
Lai Jiangshana357fc02013-03-25 16:57:19 -07005186 * Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's.
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02005187 */
5188void thaw_workqueues(void)
5189{
Tejun Heo24b8a842013-03-12 11:29:58 -07005190 struct workqueue_struct *wq;
5191 struct pool_workqueue *pwq;
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02005192
Lai Jiangshan68e13a62013-03-25 16:57:17 -07005193 mutex_lock(&wq_pool_mutex);
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02005194
5195 if (!workqueue_freezing)
5196 goto out_unlock;
5197
Lai Jiangshan74b414e2014-05-22 19:01:16 +08005198 workqueue_freezing = false;
Tejun Heo8b03ae32010-06-29 10:07:12 +02005199
Tejun Heo24b8a842013-03-12 11:29:58 -07005200 /* restore max_active and repopulate worklist */
5201 list_for_each_entry(wq, &workqueues, list) {
Lai Jiangshana357fc02013-03-25 16:57:19 -07005202 mutex_lock(&wq->mutex);
Tejun Heo699ce092013-03-13 16:51:35 -07005203 for_each_pwq(pwq, wq)
5204 pwq_adjust_max_active(pwq);
Lai Jiangshana357fc02013-03-25 16:57:19 -07005205 mutex_unlock(&wq->mutex);
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02005206 }
5207
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02005208out_unlock:
Lai Jiangshan68e13a62013-03-25 16:57:17 -07005209 mutex_unlock(&wq_pool_mutex);
Tejun Heoa0a1a5f2010-06-29 10:07:12 +02005210}
5211#endif /* CONFIG_FREEZER */
5212
Lai Jiangshan042f7df12015-04-30 17:16:12 +08005213static int workqueue_apply_unbound_cpumask(void)
5214{
5215 LIST_HEAD(ctxs);
5216 int ret = 0;
5217 struct workqueue_struct *wq;
5218 struct apply_wqattrs_ctx *ctx, *n;
5219
5220 lockdep_assert_held(&wq_pool_mutex);
5221
5222 list_for_each_entry(wq, &workqueues, list) {
5223 if (!(wq->flags & WQ_UNBOUND))
5224 continue;
5225 /* creating multiple pwqs breaks ordering guarantee */
5226 if (wq->flags & __WQ_ORDERED)
5227 continue;
5228
5229 ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs);
5230 if (!ctx) {
5231 ret = -ENOMEM;
5232 break;
5233 }
5234
5235 list_add_tail(&ctx->list, &ctxs);
5236 }
5237
5238 list_for_each_entry_safe(ctx, n, &ctxs, list) {
5239 if (!ret)
5240 apply_wqattrs_commit(ctx);
5241 apply_wqattrs_cleanup(ctx);
5242 }
5243
5244 return ret;
5245}
5246
5247/**
5248 * workqueue_set_unbound_cpumask - Set the low-level unbound cpumask
5249 * @cpumask: the cpumask to set
5250 *
5251 * The low-level workqueues cpumask is a global cpumask that limits
5252 * the affinity of all unbound workqueues. This function check the @cpumask
5253 * and apply it to all unbound workqueues and updates all pwqs of them.
5254 *
5255 * Retun: 0 - Success
5256 * -EINVAL - Invalid @cpumask
5257 * -ENOMEM - Failed to allocate memory for attrs or pwqs.
5258 */
5259int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
5260{
5261 int ret = -EINVAL;
5262 cpumask_var_t saved_cpumask;
5263
5264 if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL))
5265 return -ENOMEM;
5266
Tal Shorerc98a9802017-11-03 17:27:50 +02005267 /*
5268 * Not excluding isolated cpus on purpose.
5269 * If the user wishes to include them, we allow that.
5270 */
Lai Jiangshan042f7df12015-04-30 17:16:12 +08005271 cpumask_and(cpumask, cpumask, cpu_possible_mask);
5272 if (!cpumask_empty(cpumask)) {
Lai Jiangshana0111cf2015-05-19 18:03:47 +08005273 apply_wqattrs_lock();
Lai Jiangshan042f7df12015-04-30 17:16:12 +08005274
5275 /* save the old wq_unbound_cpumask. */
5276 cpumask_copy(saved_cpumask, wq_unbound_cpumask);
5277
5278 /* update wq_unbound_cpumask at first and apply it to wqs. */
5279 cpumask_copy(wq_unbound_cpumask, cpumask);
5280 ret = workqueue_apply_unbound_cpumask();
5281
5282 /* restore the wq_unbound_cpumask when failed. */
5283 if (ret < 0)
5284 cpumask_copy(wq_unbound_cpumask, saved_cpumask);
5285
Lai Jiangshana0111cf2015-05-19 18:03:47 +08005286 apply_wqattrs_unlock();
Lai Jiangshan042f7df12015-04-30 17:16:12 +08005287 }
Lai Jiangshan042f7df12015-04-30 17:16:12 +08005288
5289 free_cpumask_var(saved_cpumask);
5290 return ret;
5291}
5292
Frederic Weisbecker6ba94422015-04-02 19:14:39 +08005293#ifdef CONFIG_SYSFS
5294/*
5295 * Workqueues with WQ_SYSFS flag set is visible to userland via
5296 * /sys/bus/workqueue/devices/WQ_NAME. All visible workqueues have the
5297 * following attributes.
5298 *
5299 * per_cpu RO bool : whether the workqueue is per-cpu or unbound
5300 * max_active RW int : maximum number of in-flight work items
5301 *
5302 * Unbound workqueues have the following extra attributes.
5303 *
Wang Long9a19b462017-11-02 23:05:12 -04005304 * pool_ids RO int : the associated pool IDs for each node
Frederic Weisbecker6ba94422015-04-02 19:14:39 +08005305 * nice RW int : nice value of the workers
5306 * cpumask RW mask : bitmask of allowed CPUs for the workers
Wang Long9a19b462017-11-02 23:05:12 -04005307 * numa RW bool : whether enable NUMA affinity
Frederic Weisbecker6ba94422015-04-02 19:14:39 +08005308 */
5309struct wq_device {
5310 struct workqueue_struct *wq;
5311 struct device dev;
5312};
5313
5314static struct workqueue_struct *dev_to_wq(struct device *dev)
5315{
5316 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
5317
5318 return wq_dev->wq;
5319}
5320
5321static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr,
5322 char *buf)
5323{
5324 struct workqueue_struct *wq = dev_to_wq(dev);
5325
5326 return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
5327}
5328static DEVICE_ATTR_RO(per_cpu);
5329
5330static ssize_t max_active_show(struct device *dev,
5331 struct device_attribute *attr, char *buf)
5332{
5333 struct workqueue_struct *wq = dev_to_wq(dev);
5334
5335 return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
5336}
5337
5338static ssize_t max_active_store(struct device *dev,
5339 struct device_attribute *attr, const char *buf,
5340 size_t count)
5341{
5342 struct workqueue_struct *wq = dev_to_wq(dev);
5343 int val;
5344
5345 if (sscanf(buf, "%d", &val) != 1 || val <= 0)
5346 return -EINVAL;
5347
5348 workqueue_set_max_active(wq, val);
5349 return count;
5350}
5351static DEVICE_ATTR_RW(max_active);
5352
5353static struct attribute *wq_sysfs_attrs[] = {
5354 &dev_attr_per_cpu.attr,
5355 &dev_attr_max_active.attr,
5356 NULL,
5357};
5358ATTRIBUTE_GROUPS(wq_sysfs);
5359
5360static ssize_t wq_pool_ids_show(struct device *dev,
5361 struct device_attribute *attr, char *buf)
5362{
5363 struct workqueue_struct *wq = dev_to_wq(dev);
5364 const char *delim = "";
5365 int node, written = 0;
5366
Thomas Gleixner24acfb72019-03-13 17:55:47 +01005367 get_online_cpus();
5368 rcu_read_lock();
Frederic Weisbecker6ba94422015-04-02 19:14:39 +08005369 for_each_node(node) {
5370 written += scnprintf(buf + written, PAGE_SIZE - written,
5371 "%s%d:%d", delim, node,
5372 unbound_pwq_by_node(wq, node)->pool->id);
5373 delim = " ";
5374 }
5375 written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
Thomas Gleixner24acfb72019-03-13 17:55:47 +01005376 rcu_read_unlock();
5377 put_online_cpus();
Frederic Weisbecker6ba94422015-04-02 19:14:39 +08005378
5379 return written;
5380}
5381
5382static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr,
5383 char *buf)
5384{
5385 struct workqueue_struct *wq = dev_to_wq(dev);
5386 int written;
5387
5388 mutex_lock(&wq->mutex);
5389 written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice);
5390 mutex_unlock(&wq->mutex);
5391
5392 return written;
5393}
5394
5395/* prepare workqueue_attrs for sysfs store operations */
5396static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
5397{
5398 struct workqueue_attrs *attrs;
5399
Lai Jiangshan899a94f2015-05-20 14:41:18 +08005400 lockdep_assert_held(&wq_pool_mutex);
5401
Frederic Weisbecker6ba94422015-04-02 19:14:39 +08005402 attrs = alloc_workqueue_attrs(GFP_KERNEL);
5403 if (!attrs)
5404 return NULL;
5405
Frederic Weisbecker6ba94422015-04-02 19:14:39 +08005406 copy_workqueue_attrs(attrs, wq->unbound_attrs);
Frederic Weisbecker6ba94422015-04-02 19:14:39 +08005407 return attrs;
5408}
5409
5410static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
5411 const char *buf, size_t count)
5412{
5413 struct workqueue_struct *wq = dev_to_wq(dev);
5414 struct workqueue_attrs *attrs;
Lai Jiangshand4d3e252015-05-19 18:03:48 +08005415 int ret = -ENOMEM;
5416
5417 apply_wqattrs_lock();
Frederic Weisbecker6ba94422015-04-02 19:14:39 +08005418
5419 attrs = wq_sysfs_prep_attrs(wq);
5420 if (!attrs)
Lai Jiangshand4d3e252015-05-19 18:03:48 +08005421 goto out_unlock;
Frederic Weisbecker6ba94422015-04-02 19:14:39 +08005422
5423 if (sscanf(buf, "%d", &attrs->nice) == 1 &&
5424 attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
Lai Jiangshand4d3e252015-05-19 18:03:48 +08005425 ret = apply_workqueue_attrs_locked(wq, attrs);
Frederic Weisbecker6ba94422015-04-02 19:14:39 +08005426 else
5427 ret = -EINVAL;
5428
Lai Jiangshand4d3e252015-05-19 18:03:48 +08005429out_unlock:
5430 apply_wqattrs_unlock();
Frederic Weisbecker6ba94422015-04-02 19:14:39 +08005431 free_workqueue_attrs(attrs);
5432 return ret ?: count;
5433}
5434
5435static ssize_t wq_cpumask_show(struct device *dev,
5436 struct device_attribute *attr, char *buf)
5437{
5438 struct workqueue_struct *wq = dev_to_wq(dev);
5439 int written;
5440
5441 mutex_lock(&wq->mutex);
5442 written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
5443 cpumask_pr_args(wq->unbound_attrs->cpumask));
5444 mutex_unlock(&wq->mutex);
5445 return written;
5446}
5447
5448static ssize_t wq_cpumask_store(struct device *dev,
5449 struct device_attribute *attr,
5450 const char *buf, size_t count)
5451{
5452 struct workqueue_struct *wq = dev_to_wq(dev);
5453 struct workqueue_attrs *attrs;
Lai Jiangshand4d3e252015-05-19 18:03:48 +08005454 int ret = -ENOMEM;
5455
5456 apply_wqattrs_lock();
Frederic Weisbecker6ba94422015-04-02 19:14:39 +08005457
5458 attrs = wq_sysfs_prep_attrs(wq);
5459 if (!attrs)
Lai Jiangshand4d3e252015-05-19 18:03:48 +08005460 goto out_unlock;
Frederic Weisbecker6ba94422015-04-02 19:14:39 +08005461
5462 ret = cpumask_parse(buf, attrs->cpumask);
5463 if (!ret)
Lai Jiangshand4d3e252015-05-19 18:03:48 +08005464 ret = apply_workqueue_attrs_locked(wq, attrs);
Frederic Weisbecker6ba94422015-04-02 19:14:39 +08005465
Lai Jiangshand4d3e252015-05-19 18:03:48 +08005466out_unlock:
5467 apply_wqattrs_unlock();
Frederic Weisbecker6ba94422015-04-02 19:14:39 +08005468 free_workqueue_attrs(attrs);
5469 return ret ?: count;
5470}
5471
5472static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
5473 char *buf)
5474{
5475 struct workqueue_struct *wq = dev_to_wq(dev);
5476 int written;
5477
5478 mutex_lock(&wq->mutex);
5479 written = scnprintf(buf, PAGE_SIZE, "%d\n",
5480 !wq->unbound_attrs->no_numa);
5481 mutex_unlock(&wq->mutex);
5482
5483 return written;
5484}
5485
5486static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
5487 const char *buf, size_t count)
5488{
5489 struct workqueue_struct *wq = dev_to_wq(dev);
5490 struct workqueue_attrs *attrs;
Lai Jiangshand4d3e252015-05-19 18:03:48 +08005491 int v, ret = -ENOMEM;
5492
5493 apply_wqattrs_lock();
Frederic Weisbecker6ba94422015-04-02 19:14:39 +08005494
5495 attrs = wq_sysfs_prep_attrs(wq);
5496 if (!attrs)
Lai Jiangshand4d3e252015-05-19 18:03:48 +08005497 goto out_unlock;
Frederic Weisbecker6ba94422015-04-02 19:14:39 +08005498
5499 ret = -EINVAL;
5500 if (sscanf(buf, "%d", &v) == 1) {
5501 attrs->no_numa = !v;
Lai Jiangshand4d3e252015-05-19 18:03:48 +08005502 ret = apply_workqueue_attrs_locked(wq, attrs);
Frederic Weisbecker6ba94422015-04-02 19:14:39 +08005503 }
5504
Lai Jiangshand4d3e252015-05-19 18:03:48 +08005505out_unlock:
5506 apply_wqattrs_unlock();
Frederic Weisbecker6ba94422015-04-02 19:14:39 +08005507 free_workqueue_attrs(attrs);
5508 return ret ?: count;
5509}
5510
5511static struct device_attribute wq_sysfs_unbound_attrs[] = {
5512 __ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
5513 __ATTR(nice, 0644, wq_nice_show, wq_nice_store),
5514 __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
5515 __ATTR(numa, 0644, wq_numa_show, wq_numa_store),
5516 __ATTR_NULL,
5517};
5518
5519static struct bus_type wq_subsys = {
5520 .name = "workqueue",
5521 .dev_groups = wq_sysfs_groups,
5522};
5523
Frederic Weisbeckerb05a7922015-04-27 17:58:39 +08005524static ssize_t wq_unbound_cpumask_show(struct device *dev,
5525 struct device_attribute *attr, char *buf)
5526{
5527 int written;
5528
Lai Jiangshan042f7df12015-04-30 17:16:12 +08005529 mutex_lock(&wq_pool_mutex);
Frederic Weisbeckerb05a7922015-04-27 17:58:39 +08005530 written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
5531 cpumask_pr_args(wq_unbound_cpumask));
Lai Jiangshan042f7df12015-04-30 17:16:12 +08005532 mutex_unlock(&wq_pool_mutex);
Frederic Weisbeckerb05a7922015-04-27 17:58:39 +08005533
5534 return written;
5535}
5536
Lai Jiangshan042f7df12015-04-30 17:16:12 +08005537static ssize_t wq_unbound_cpumask_store(struct device *dev,
5538 struct device_attribute *attr, const char *buf, size_t count)
5539{
5540 cpumask_var_t cpumask;
5541 int ret;
5542
5543 if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
5544 return -ENOMEM;
5545
5546 ret = cpumask_parse(buf, cpumask);
5547 if (!ret)
5548 ret = workqueue_set_unbound_cpumask(cpumask);
5549
5550 free_cpumask_var(cpumask);
5551 return ret ? ret : count;
5552}
5553
Frederic Weisbeckerb05a7922015-04-27 17:58:39 +08005554static struct device_attribute wq_sysfs_cpumask_attr =
Lai Jiangshan042f7df12015-04-30 17:16:12 +08005555 __ATTR(cpumask, 0644, wq_unbound_cpumask_show,
5556 wq_unbound_cpumask_store);
Frederic Weisbeckerb05a7922015-04-27 17:58:39 +08005557
Frederic Weisbecker6ba94422015-04-02 19:14:39 +08005558static int __init wq_sysfs_init(void)
5559{
Frederic Weisbeckerb05a7922015-04-27 17:58:39 +08005560 int err;
5561
5562 err = subsys_virtual_register(&wq_subsys, NULL);
5563 if (err)
5564 return err;
5565
5566 return device_create_file(wq_subsys.dev_root, &wq_sysfs_cpumask_attr);
Frederic Weisbecker6ba94422015-04-02 19:14:39 +08005567}
5568core_initcall(wq_sysfs_init);
5569
5570static void wq_device_release(struct device *dev)
5571{
5572 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
5573
5574 kfree(wq_dev);
5575}
5576
5577/**
5578 * workqueue_sysfs_register - make a workqueue visible in sysfs
5579 * @wq: the workqueue to register
5580 *
5581 * Expose @wq in sysfs under /sys/bus/workqueue/devices.
5582 * alloc_workqueue*() automatically calls this function if WQ_SYSFS is set
5583 * which is the preferred method.
5584 *
5585 * Workqueue user should use this function directly iff it wants to apply
5586 * workqueue_attrs before making the workqueue visible in sysfs; otherwise,
5587 * apply_workqueue_attrs() may race against userland updating the
5588 * attributes.
5589 *
5590 * Return: 0 on success, -errno on failure.
5591 */
5592int workqueue_sysfs_register(struct workqueue_struct *wq)
5593{
5594 struct wq_device *wq_dev;
5595 int ret;
5596
5597 /*
Shailendra Verma402dd892015-05-23 10:38:14 +05305598 * Adjusting max_active or creating new pwqs by applying
Frederic Weisbecker6ba94422015-04-02 19:14:39 +08005599 * attributes breaks ordering guarantee. Disallow exposing ordered
5600 * workqueues.
5601 */
Tejun Heo0a94efb2017-07-23 08:36:15 -04005602 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
Frederic Weisbecker6ba94422015-04-02 19:14:39 +08005603 return -EINVAL;
5604
5605 wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
5606 if (!wq_dev)
5607 return -ENOMEM;
5608
5609 wq_dev->wq = wq;
5610 wq_dev->dev.bus = &wq_subsys;
Frederic Weisbecker6ba94422015-04-02 19:14:39 +08005611 wq_dev->dev.release = wq_device_release;
Lars-Peter Clausen23217b42016-02-17 21:04:41 +01005612 dev_set_name(&wq_dev->dev, "%s", wq->name);
Frederic Weisbecker6ba94422015-04-02 19:14:39 +08005613
5614 /*
5615 * unbound_attrs are created separately. Suppress uevent until
5616 * everything is ready.
5617 */
5618 dev_set_uevent_suppress(&wq_dev->dev, true);
5619
5620 ret = device_register(&wq_dev->dev);
5621 if (ret) {
Arvind Yadav537f4142018-03-06 15:35:43 +05305622 put_device(&wq_dev->dev);
Frederic Weisbecker6ba94422015-04-02 19:14:39 +08005623 wq->wq_dev = NULL;
5624 return ret;
5625 }
5626
5627 if (wq->flags & WQ_UNBOUND) {
5628 struct device_attribute *attr;
5629
5630 for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) {
5631 ret = device_create_file(&wq_dev->dev, attr);
5632 if (ret) {
5633 device_unregister(&wq_dev->dev);
5634 wq->wq_dev = NULL;
5635 return ret;
5636 }
5637 }
5638 }
5639
5640 dev_set_uevent_suppress(&wq_dev->dev, false);
5641 kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
5642 return 0;
5643}
5644
5645/**
5646 * workqueue_sysfs_unregister - undo workqueue_sysfs_register()
5647 * @wq: the workqueue to unregister
5648 *
5649 * If @wq is registered to sysfs by workqueue_sysfs_register(), unregister.
5650 */
5651static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
5652{
5653 struct wq_device *wq_dev = wq->wq_dev;
5654
5655 if (!wq->wq_dev)
5656 return;
5657
5658 wq->wq_dev = NULL;
5659 device_unregister(&wq_dev->dev);
5660}
5661#else /* CONFIG_SYSFS */
5662static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { }
5663#endif /* CONFIG_SYSFS */
5664
Tejun Heo82607adc2015-12-08 11:28:04 -05005665/*
5666 * Workqueue watchdog.
5667 *
5668 * Stall may be caused by various bugs - missing WQ_MEM_RECLAIM, illegal
5669 * flush dependency, a concurrency managed work item which stays RUNNING
5670 * indefinitely. Workqueue stalls can be very difficult to debug as the
5671 * usual warning mechanisms don't trigger and internal workqueue state is
5672 * largely opaque.
5673 *
5674 * Workqueue watchdog monitors all worker pools periodically and dumps
5675 * state if some pools failed to make forward progress for a while where
5676 * forward progress is defined as the first item on ->worklist changing.
5677 *
5678 * This mechanism is controlled through the kernel parameter
5679 * "workqueue.watchdog_thresh" which can be updated at runtime through the
5680 * corresponding sysfs parameter file.
5681 */
5682#ifdef CONFIG_WQ_WATCHDOG
5683
Tejun Heo82607adc2015-12-08 11:28:04 -05005684static unsigned long wq_watchdog_thresh = 30;
Kees Cook5cd79d62017-10-04 16:27:00 -07005685static struct timer_list wq_watchdog_timer;
Tejun Heo82607adc2015-12-08 11:28:04 -05005686
5687static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
5688static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
5689
5690static void wq_watchdog_reset_touched(void)
5691{
5692 int cpu;
5693
5694 wq_watchdog_touched = jiffies;
5695 for_each_possible_cpu(cpu)
5696 per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
5697}
5698
Kees Cook5cd79d62017-10-04 16:27:00 -07005699static void wq_watchdog_timer_fn(struct timer_list *unused)
Tejun Heo82607adc2015-12-08 11:28:04 -05005700{
5701 unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
5702 bool lockup_detected = false;
5703 struct worker_pool *pool;
5704 int pi;
5705
5706 if (!thresh)
5707 return;
5708
5709 rcu_read_lock();
5710
5711 for_each_pool(pool, pi) {
5712 unsigned long pool_ts, touched, ts;
5713
5714 if (list_empty(&pool->worklist))
5715 continue;
5716
5717 /* get the latest of pool and touched timestamps */
5718 pool_ts = READ_ONCE(pool->watchdog_ts);
5719 touched = READ_ONCE(wq_watchdog_touched);
5720
5721 if (time_after(pool_ts, touched))
5722 ts = pool_ts;
5723 else
5724 ts = touched;
5725
5726 if (pool->cpu >= 0) {
5727 unsigned long cpu_touched =
5728 READ_ONCE(per_cpu(wq_watchdog_touched_cpu,
5729 pool->cpu));
5730 if (time_after(cpu_touched, ts))
5731 ts = cpu_touched;
5732 }
5733
5734 /* did we stall? */
5735 if (time_after(jiffies, ts + thresh)) {
5736 lockup_detected = true;
5737 pr_emerg("BUG: workqueue lockup - pool");
5738 pr_cont_pool_info(pool);
5739 pr_cont(" stuck for %us!\n",
5740 jiffies_to_msecs(jiffies - pool_ts) / 1000);
5741 }
5742 }
5743
5744 rcu_read_unlock();
5745
5746 if (lockup_detected)
5747 show_workqueue_state();
5748
5749 wq_watchdog_reset_touched();
5750 mod_timer(&wq_watchdog_timer, jiffies + thresh);
5751}
5752
Vincent Whitchurchcb9d7fd2018-08-21 17:25:07 +02005753notrace void wq_watchdog_touch(int cpu)
Tejun Heo82607adc2015-12-08 11:28:04 -05005754{
5755 if (cpu >= 0)
5756 per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
5757 else
5758 wq_watchdog_touched = jiffies;
5759}
5760
5761static void wq_watchdog_set_thresh(unsigned long thresh)
5762{
5763 wq_watchdog_thresh = 0;
5764 del_timer_sync(&wq_watchdog_timer);
5765
5766 if (thresh) {
5767 wq_watchdog_thresh = thresh;
5768 wq_watchdog_reset_touched();
5769 mod_timer(&wq_watchdog_timer, jiffies + thresh * HZ);
5770 }
5771}
5772
5773static int wq_watchdog_param_set_thresh(const char *val,
5774 const struct kernel_param *kp)
5775{
5776 unsigned long thresh;
5777 int ret;
5778
5779 ret = kstrtoul(val, 0, &thresh);
5780 if (ret)
5781 return ret;
5782
5783 if (system_wq)
5784 wq_watchdog_set_thresh(thresh);
5785 else
5786 wq_watchdog_thresh = thresh;
5787
5788 return 0;
5789}
5790
5791static const struct kernel_param_ops wq_watchdog_thresh_ops = {
5792 .set = wq_watchdog_param_set_thresh,
5793 .get = param_get_ulong,
5794};
5795
5796module_param_cb(watchdog_thresh, &wq_watchdog_thresh_ops, &wq_watchdog_thresh,
5797 0644);
5798
5799static void wq_watchdog_init(void)
5800{
Kees Cook5cd79d62017-10-04 16:27:00 -07005801 timer_setup(&wq_watchdog_timer, wq_watchdog_timer_fn, TIMER_DEFERRABLE);
Tejun Heo82607adc2015-12-08 11:28:04 -05005802 wq_watchdog_set_thresh(wq_watchdog_thresh);
5803}
5804
5805#else /* CONFIG_WQ_WATCHDOG */
5806
5807static inline void wq_watchdog_init(void) { }
5808
5809#endif /* CONFIG_WQ_WATCHDOG */
5810
Tejun Heobce90382013-04-01 11:23:32 -07005811static void __init wq_numa_init(void)
5812{
5813 cpumask_var_t *tbl;
5814 int node, cpu;
5815
Tejun Heobce90382013-04-01 11:23:32 -07005816 if (num_possible_nodes() <= 1)
5817 return;
5818
Tejun Heod55262c2013-04-01 11:23:38 -07005819 if (wq_disable_numa) {
5820 pr_info("workqueue: NUMA affinity support disabled\n");
5821 return;
5822 }
5823
Tejun Heo4c16bd32013-04-01 11:23:36 -07005824 wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs(GFP_KERNEL);
5825 BUG_ON(!wq_update_unbound_numa_attrs_buf);
5826
Tejun Heobce90382013-04-01 11:23:32 -07005827 /*
5828 * We want masks of possible CPUs of each node which isn't readily
5829 * available. Build one from cpu_to_node() which should have been
5830 * fully initialized by now.
5831 */
Kees Cook6396bb22018-06-12 14:03:40 -07005832 tbl = kcalloc(nr_node_ids, sizeof(tbl[0]), GFP_KERNEL);
Tejun Heobce90382013-04-01 11:23:32 -07005833 BUG_ON(!tbl);
5834
5835 for_each_node(node)
Yasuaki Ishimatsu5a6024f2014-07-07 09:56:48 -04005836 BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
Tejun Heo1be0c252013-05-15 14:24:24 -07005837 node_online(node) ? node : NUMA_NO_NODE));
Tejun Heobce90382013-04-01 11:23:32 -07005838
5839 for_each_possible_cpu(cpu) {
5840 node = cpu_to_node(cpu);
5841 if (WARN_ON(node == NUMA_NO_NODE)) {
5842 pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
5843 /* happens iff arch is bonkers, let's just proceed */
5844 return;
5845 }
5846 cpumask_set_cpu(cpu, tbl[node]);
5847 }
5848
5849 wq_numa_possible_cpumask = tbl;
5850 wq_numa_enabled = true;
5851}
5852
Tejun Heo3347fa02016-09-16 15:49:32 -04005853/**
5854 * workqueue_init_early - early init for workqueue subsystem
5855 *
5856 * This is the first half of two-staged workqueue subsystem initialization
5857 * and invoked as soon as the bare basics - memory allocation, cpumasks and
5858 * idr are up. It sets up all the data structures and system workqueues
5859 * and allows early boot code to create workqueues and queue/cancel work
5860 * items. Actual work item execution starts only after kthreads can be
5861 * created and scheduled right before early initcalls.
5862 */
5863int __init workqueue_init_early(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005864{
Tejun Heo7a4e3442013-03-12 11:30:00 -07005865 int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
Frederic Weisbecker1bda3f82018-02-21 05:17:26 +01005866 int hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ;
Tejun Heo7a4e3442013-03-12 11:30:00 -07005867 int i, cpu;
Tejun Heoc34056a2010-06-29 10:07:11 +02005868
Tejun Heoe904e6c2013-03-12 11:29:57 -07005869 WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
5870
Frederic Weisbeckerb05a7922015-04-27 17:58:39 +08005871 BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
Frederic Weisbecker1bda3f82018-02-21 05:17:26 +01005872 cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(hk_flags));
Frederic Weisbeckerb05a7922015-04-27 17:58:39 +08005873
Tejun Heoe904e6c2013-03-12 11:29:57 -07005874 pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
5875
Tejun Heo706026c2013-01-24 11:01:34 -08005876 /* initialize CPU pools */
Tejun Heo29c91e92013-03-12 11:30:03 -07005877 for_each_possible_cpu(cpu) {
Tejun Heo4ce62e92012-07-13 22:16:44 -07005878 struct worker_pool *pool;
Tejun Heo8b03ae32010-06-29 10:07:12 +02005879
Tejun Heo7a4e3442013-03-12 11:30:00 -07005880 i = 0;
Tejun Heof02ae732013-03-12 11:30:03 -07005881 for_each_cpu_worker_pool(pool, cpu) {
Tejun Heo7a4e3442013-03-12 11:30:00 -07005882 BUG_ON(init_worker_pool(pool));
Tejun Heoec22ca52013-01-24 11:01:33 -08005883 pool->cpu = cpu;
Tejun Heo29c91e92013-03-12 11:30:03 -07005884 cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu));
Tejun Heo7a4e3442013-03-12 11:30:00 -07005885 pool->attrs->nice = std_nice[i++];
Tejun Heof3f90ad2013-04-01 11:23:34 -07005886 pool->node = cpu_to_node(cpu);
Tejun Heo7a4e3442013-03-12 11:30:00 -07005887
Tejun Heo9daf9e62013-01-24 11:01:33 -08005888 /* alloc pool ID */
Lai Jiangshan68e13a62013-03-25 16:57:17 -07005889 mutex_lock(&wq_pool_mutex);
Tejun Heo9daf9e62013-01-24 11:01:33 -08005890 BUG_ON(worker_pool_assign_id(pool));
Lai Jiangshan68e13a62013-03-25 16:57:17 -07005891 mutex_unlock(&wq_pool_mutex);
Tejun Heo4ce62e92012-07-13 22:16:44 -07005892 }
Tejun Heo8b03ae32010-06-29 10:07:12 +02005893 }
5894
Tejun Heo8a2b7532013-09-05 12:30:04 -04005895 /* create default unbound and ordered wq attrs */
Tejun Heo29c91e92013-03-12 11:30:03 -07005896 for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
5897 struct workqueue_attrs *attrs;
5898
5899 BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
Tejun Heo29c91e92013-03-12 11:30:03 -07005900 attrs->nice = std_nice[i];
Tejun Heo29c91e92013-03-12 11:30:03 -07005901 unbound_std_wq_attrs[i] = attrs;
Tejun Heo8a2b7532013-09-05 12:30:04 -04005902
5903 /*
5904 * An ordered wq should have only one pwq as ordering is
5905 * guaranteed by max_active which is enforced by pwqs.
5906 * Turn off NUMA so that dfl_pwq is used for all nodes.
5907 */
5908 BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
5909 attrs->nice = std_nice[i];
5910 attrs->no_numa = true;
5911 ordered_wq_attrs[i] = attrs;
Tejun Heo29c91e92013-03-12 11:30:03 -07005912 }
5913
Tejun Heod320c032010-06-29 10:07:14 +02005914 system_wq = alloc_workqueue("events", 0, 0);
Joonsoo Kim1aabe902012-08-15 23:25:39 +09005915 system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
Tejun Heod320c032010-06-29 10:07:14 +02005916 system_long_wq = alloc_workqueue("events_long", 0, 0);
Tejun Heof3421792010-07-02 10:03:51 +02005917 system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
5918 WQ_UNBOUND_MAX_ACTIVE);
Tejun Heo24d51ad2011-02-21 09:52:50 +01005919 system_freezable_wq = alloc_workqueue("events_freezable",
5920 WQ_FREEZABLE, 0);
Viresh Kumar06681062013-04-24 17:12:54 +05305921 system_power_efficient_wq = alloc_workqueue("events_power_efficient",
5922 WQ_POWER_EFFICIENT, 0);
5923 system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
5924 WQ_FREEZABLE | WQ_POWER_EFFICIENT,
5925 0);
Joonsoo Kim1aabe902012-08-15 23:25:39 +09005926 BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
Viresh Kumar06681062013-04-24 17:12:54 +05305927 !system_unbound_wq || !system_freezable_wq ||
5928 !system_power_efficient_wq ||
5929 !system_freezable_power_efficient_wq);
Tejun Heo82607adc2015-12-08 11:28:04 -05005930
Tejun Heo3347fa02016-09-16 15:49:32 -04005931 return 0;
5932}
5933
5934/**
5935 * workqueue_init - bring workqueue subsystem fully online
5936 *
5937 * This is the latter half of two-staged workqueue subsystem initialization
5938 * and invoked as soon as kthreads can be created and scheduled.
5939 * Workqueues have been created and work items queued on them, but there
5940 * are no kworkers executing the work items yet. Populate the worker pools
5941 * with the initial workers and enable future kworker creations.
5942 */
5943int __init workqueue_init(void)
5944{
Tejun Heo2186d9f2016-10-19 12:01:27 -04005945 struct workqueue_struct *wq;
Tejun Heo3347fa02016-09-16 15:49:32 -04005946 struct worker_pool *pool;
5947 int cpu, bkt;
5948
Tejun Heo2186d9f2016-10-19 12:01:27 -04005949 /*
5950 * It'd be simpler to initialize NUMA in workqueue_init_early() but
5951 * CPU to node mapping may not be available that early on some
5952 * archs such as power and arm64. As per-cpu pools created
5953 * previously could be missing node hint and unbound pools NUMA
5954 * affinity, fix them up.
Tejun Heo40c17f72018-01-08 05:38:37 -08005955 *
5956 * Also, while iterating workqueues, create rescuers if requested.
Tejun Heo2186d9f2016-10-19 12:01:27 -04005957 */
5958 wq_numa_init();
5959
5960 mutex_lock(&wq_pool_mutex);
5961
5962 for_each_possible_cpu(cpu) {
5963 for_each_cpu_worker_pool(pool, cpu) {
5964 pool->node = cpu_to_node(cpu);
5965 }
5966 }
5967
Tejun Heo40c17f72018-01-08 05:38:37 -08005968 list_for_each_entry(wq, &workqueues, list) {
Tejun Heo2186d9f2016-10-19 12:01:27 -04005969 wq_update_unbound_numa(wq, smp_processor_id(), true);
Tejun Heo40c17f72018-01-08 05:38:37 -08005970 WARN(init_rescuer(wq),
5971 "workqueue: failed to create early rescuer for %s",
5972 wq->name);
5973 }
Tejun Heo2186d9f2016-10-19 12:01:27 -04005974
5975 mutex_unlock(&wq_pool_mutex);
5976
Tejun Heo3347fa02016-09-16 15:49:32 -04005977 /* create the initial workers */
5978 for_each_online_cpu(cpu) {
5979 for_each_cpu_worker_pool(pool, cpu) {
5980 pool->flags &= ~POOL_DISASSOCIATED;
5981 BUG_ON(!create_worker(pool));
5982 }
5983 }
5984
5985 hash_for_each(unbound_pool_hash, bkt, pool, hash_node)
5986 BUG_ON(!create_worker(pool));
5987
5988 wq_online = true;
Tejun Heo82607adc2015-12-08 11:28:04 -05005989 wq_watchdog_init();
5990
Suresh Siddha6ee05782010-07-30 14:57:37 -07005991 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005992}