diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/async.c | 14 | ||||
-rw-r--r-- | kernel/kmod.c | 9 | ||||
-rw-r--r-- | kernel/sched/core.c | 2 | ||||
-rw-r--r-- | kernel/workqueue.c | 178 | ||||
-rw-r--r-- | kernel/workqueue_internal.h | 66 | ||||
-rw-r--r-- | kernel/workqueue_sched.h | 9 |
6 files changed, 158 insertions, 120 deletions
diff --git a/kernel/async.c b/kernel/async.c index 6f34904a0b53..6c68fc3fae7b 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -57,6 +57,8 @@ asynchronous and synchronous parts of the kernel. #include <linux/slab.h> #include <linux/workqueue.h> +#include "workqueue_internal.h" + static async_cookie_t next_cookie = 1; #define MAX_WORK 32768 @@ -353,3 +355,15 @@ void async_synchronize_cookie(async_cookie_t cookie) async_synchronize_cookie_domain(cookie, &async_running); } EXPORT_SYMBOL_GPL(async_synchronize_cookie); + +/** + * current_is_async - is %current an async worker task? + * + * Returns %true if %current is an async worker task. + */ +bool current_is_async(void) +{ + struct worker *worker = current_wq_worker(); + + return worker && worker->current_func == async_run_entry_fn; +} diff --git a/kernel/kmod.c b/kernel/kmod.c index 0023a87e8de6..56dd34976d7b 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -38,6 +38,7 @@ #include <linux/suspend.h> #include <linux/rwsem.h> #include <linux/ptrace.h> +#include <linux/async.h> #include <asm/uaccess.h> #include <trace/events/module.h> @@ -130,6 +131,14 @@ int __request_module(bool wait, const char *fmt, ...) #define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */ static int kmod_loop_msg; + /* + * We don't allow synchronous module loading from async. Module + * init may invoke async_synchronize_full() which will end up + * waiting for this task which already is waiting for the module + * loading to complete, leading to a deadlock. + */ + WARN_ON_ONCE(wait && current_is_async()); + va_start(args, fmt); ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args); va_end(args); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 26058d0bebba..bfe8ae22f710 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -83,7 +83,7 @@ #endif #include "sched.h" -#include "../workqueue_sched.h" +#include "../workqueue_internal.h" #include "../smpboot.h" #define CREATE_TRACE_POINTS diff --git a/kernel/workqueue.c b/kernel/workqueue.c index fbc6576a83c3..2ffa240052fa 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -41,8 +41,9 @@ #include <linux/debug_locks.h> #include <linux/lockdep.h> #include <linux/idr.h> +#include <linux/hashtable.h> -#include "workqueue_sched.h" +#include "workqueue_internal.h" enum { /* @@ -82,8 +83,6 @@ enum { NR_WORKER_POOLS = 2, /* # worker pools per gcwq */ BUSY_WORKER_HASH_ORDER = 6, /* 64 pointers */ - BUSY_WORKER_HASH_SIZE = 1 << BUSY_WORKER_HASH_ORDER, - BUSY_WORKER_HASH_MASK = BUSY_WORKER_HASH_SIZE - 1, MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */ IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */ @@ -123,33 +122,7 @@ enum { * W: workqueue_lock protected. */ -struct global_cwq; -struct worker_pool; - -/* - * The poor guys doing the actual heavy lifting. All on-duty workers - * are either serving the manager role, on idle list or on busy hash. - */ -struct worker { - /* on idle list while idle, on busy hash table while busy */ - union { - struct list_head entry; /* L: while idle */ - struct hlist_node hentry; /* L: while busy */ - }; - - struct work_struct *current_work; /* L: work being processed */ - struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */ - struct list_head scheduled; /* L: scheduled works */ - struct task_struct *task; /* I: worker task */ - struct worker_pool *pool; /* I: the associated pool */ - /* 64 bytes boundary on 64bit, 32 on 32bit */ - unsigned long last_active; /* L: last active timestamp */ - unsigned int flags; /* X: flags */ - int id; /* I: worker id */ - - /* for rebinding worker to CPU */ - struct work_struct rebind_work; /* L: for busy worker */ -}; +/* struct worker is defined in workqueue_internal.h */ struct worker_pool { struct global_cwq *gcwq; /* I: the owning gcwq */ @@ -180,7 +153,7 @@ struct global_cwq { unsigned int flags; /* L: GCWQ_* flags */ /* workers are chained either in busy_hash or pool idle_list */ - struct hlist_head busy_hash[BUSY_WORKER_HASH_SIZE]; + DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER); /* L: hash of busy workers */ struct worker_pool pools[NR_WORKER_POOLS]; @@ -285,8 +258,7 @@ EXPORT_SYMBOL_GPL(system_freezable_wq); (pool) < &(gcwq)->pools[NR_WORKER_POOLS]; (pool)++) #define for_each_busy_worker(worker, i, pos, gcwq) \ - for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) \ - hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry) + hash_for_each(gcwq->busy_hash, i, pos, worker, hentry) static inline int __next_gcwq_cpu(int cpu, const struct cpumask *mask, unsigned int sw) @@ -764,12 +736,20 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, unsigned int cpu) { struct worker *worker = kthread_data(task), *to_wakeup = NULL; - struct worker_pool *pool = worker->pool; - atomic_t *nr_running = get_pool_nr_running(pool); + struct worker_pool *pool; + atomic_t *nr_running; + /* + * Rescuers, which may not have all the fields set up like normal + * workers, also reach here, let's not access anything before + * checking NOT_RUNNING. + */ if (worker->flags & WORKER_NOT_RUNNING) return NULL; + pool = worker->pool; + nr_running = get_pool_nr_running(pool); + /* this can only happen on the local cpu */ BUG_ON(cpu != raw_smp_processor_id()); @@ -859,41 +839,31 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags) } /** - * busy_worker_head - return the busy hash head for a work - * @gcwq: gcwq of interest - * @work: work to be hashed - * - * Return hash head of @gcwq for @work. - * - * CONTEXT: - * spin_lock_irq(gcwq->lock). - * - * RETURNS: - * Pointer to the hash head. - */ -static struct hlist_head *busy_worker_head(struct global_cwq *gcwq, - struct work_struct *work) -{ - const int base_shift = ilog2(sizeof(struct work_struct)); - unsigned long v = (unsigned long)work; - - /* simple shift and fold hash, do we need something better? */ - v >>= base_shift; - v += v >> BUSY_WORKER_HASH_ORDER; - v &= BUSY_WORKER_HASH_MASK; - - return &gcwq->busy_hash[v]; -} - -/** - * __find_worker_executing_work - find worker which is executing a work + * find_worker_executing_work - find worker which is executing a work * @gcwq: gcwq of interest - * @bwh: hash head as returned by busy_worker_head() * @work: work to find worker for * - * Find a worker which is executing @work on @gcwq. @bwh should be - * the hash head obtained by calling busy_worker_head() with the same - * work. + * Find a worker which is executing @work on @gcwq by searching + * @gcwq->busy_hash which is keyed by the address of @work. For a worker + * to match, its current execution should match the address of @work and + * its work function. This is to avoid unwanted dependency between + * unrelated work executions through a work item being recycled while still + * being executed. + * + * This is a bit tricky. A work item may be freed once its execution + * starts and nothing prevents the freed area from being recycled for + * another work item. If the same work item address ends up being reused + * before the original execution finishes, workqueue will identify the + * recycled work item as currently executing and make it wait until the + * current execution finishes, introducing an unwanted dependency. + * + * This function checks the work item address, work function and workqueue + * to avoid false positives. Note that this isn't complete as one may + * construct a work function which can introduce dependency onto itself + * through a recycled work item. Well, if somebody wants to shoot oneself + * in the foot that badly, there's only so much we can do, and if such + * deadlock actually occurs, it should be easy to locate the culprit work + * function. * * CONTEXT: * spin_lock_irq(gcwq->lock). @@ -902,40 +872,19 @@ static struct hlist_head *busy_worker_head(struct global_cwq *gcwq, * Pointer to worker which is executing @work if found, NULL * otherwise. */ -static struct worker *__find_worker_executing_work(struct global_cwq *gcwq, - struct hlist_head *bwh, - struct work_struct *work) +static struct worker *find_worker_executing_work(struct global_cwq *gcwq, + struct work_struct *work) { struct worker *worker; struct hlist_node *tmp; - hlist_for_each_entry(worker, tmp, bwh, hentry) - if (worker->current_work == work) + hash_for_each_possible(gcwq->busy_hash, worker, tmp, hentry, + (unsigned long)work) + if (worker->current_work == work && + worker->current_func == work->func) return worker; - return NULL; -} -/** - * find_worker_executing_work - find worker which is executing a work - * @gcwq: gcwq of interest - * @work: work to find worker for - * - * Find a worker which is executing @work on @gcwq. This function is - * identical to __find_worker_executing_work() except that this - * function calculates @bwh itself. - * - * CONTEXT: - * spin_lock_irq(gcwq->lock). - * - * RETURNS: - * Pointer to worker which is executing @work if found, NULL - * otherwise. - */ -static struct worker *find_worker_executing_work(struct global_cwq *gcwq, - struct work_struct *work) -{ - return __find_worker_executing_work(gcwq, busy_worker_head(gcwq, work), - work); + return NULL; } /** @@ -2166,9 +2115,7 @@ __acquires(&gcwq->lock) struct cpu_workqueue_struct *cwq = get_work_cwq(work); struct worker_pool *pool = worker->pool; struct global_cwq *gcwq = pool->gcwq; - struct hlist_head *bwh = busy_worker_head(gcwq, work); bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE; - work_func_t f = work->func; int work_color; struct worker *collision; #ifdef CONFIG_LOCKDEP @@ -2198,7 +2145,7 @@ __acquires(&gcwq->lock) * already processing the work. If so, defer the work to the * currently executing one. */ - collision = __find_worker_executing_work(gcwq, bwh, work); + collision = find_worker_executing_work(gcwq, work); if (unlikely(collision)) { move_linked_works(work, &collision->scheduled, NULL); return; @@ -2206,8 +2153,9 @@ __acquires(&gcwq->lock) /* claim and dequeue */ debug_work_deactivate(work); - hlist_add_head(&worker->hentry, bwh); + hash_add(gcwq->busy_hash, &worker->hentry, (unsigned long)work); worker->current_work = work; + worker->current_func = work->func; worker->current_cwq = cwq; work_color = get_work_color(work); @@ -2240,7 +2188,7 @@ __acquires(&gcwq->lock) lock_map_acquire_read(&cwq->wq->lockdep_map); lock_map_acquire(&lockdep_map); trace_workqueue_execute_start(work); - f(work); + worker->current_func(work); /* * While we must be careful to not use "work" after this, the trace * point will only record its address. @@ -2252,7 +2200,8 @@ __acquires(&gcwq->lock) if (unlikely(in_atomic() || lockdep_depth(current) > 0)) { pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n" " last function: %pf\n", - current->comm, preempt_count(), task_pid_nr(current), f); + current->comm, preempt_count(), task_pid_nr(current), + worker->current_func); debug_show_held_locks(current); dump_stack(); } @@ -2264,8 +2213,9 @@ __acquires(&gcwq->lock) worker_clr_flags(worker, WORKER_CPU_INTENSIVE); /* we're done with it, release */ - hlist_del_init(&worker->hentry); + hash_del(&worker->hentry); worker->current_work = NULL; + worker->current_func = NULL; worker->current_cwq = NULL; cwq_dec_nr_in_flight(cwq, work_color); } @@ -2388,7 +2338,7 @@ sleep: /** * rescuer_thread - the rescuer thread function - * @__wq: the associated workqueue + * @__rescuer: self * * Workqueue rescuer thread function. There's one rescuer for each * workqueue which has WQ_RESCUER set. @@ -2405,20 +2355,27 @@ sleep: * * This should happen rarely. */ -static int rescuer_thread(void *__wq) +static int rescuer_thread(void *__rescuer) { - struct workqueue_struct *wq = __wq; - struct worker *rescuer = wq->rescuer; + struct worker *rescuer = __rescuer; + struct workqueue_struct *wq = rescuer->rescue_wq; struct list_head *scheduled = &rescuer->scheduled; bool is_unbound = wq->flags & WQ_UNBOUND; unsigned int cpu; set_user_nice(current, RESCUER_NICE_LEVEL); + + /* + * Mark rescuer as worker too. As WORKER_PREP is never cleared, it + * doesn't participate in concurrency management. + */ + rescuer->task->flags |= PF_WQ_WORKER; repeat: set_current_state(TASK_INTERRUPTIBLE); if (kthread_should_stop()) { __set_current_state(TASK_RUNNING); + rescuer->task->flags &= ~PF_WQ_WORKER; return 0; } @@ -2462,6 +2419,8 @@ repeat: spin_unlock_irq(&gcwq->lock); } + /* rescuers should never participate in concurrency management */ + WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING)); schedule(); goto repeat; } @@ -3297,7 +3256,8 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt, if (!rescuer) goto err; - rescuer->task = kthread_create(rescuer_thread, wq, "%s", + rescuer->rescue_wq = wq; + rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", wq->name); if (IS_ERR(rescuer->task)) goto err; @@ -3831,7 +3791,6 @@ out_unlock: static int __init init_workqueues(void) { unsigned int cpu; - int i; /* make sure we have enough bits for OFFQ CPU number */ BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_CPU_SHIFT)) < @@ -3849,8 +3808,7 @@ static int __init init_workqueues(void) gcwq->cpu = cpu; gcwq->flags |= GCWQ_DISASSOCIATED; - for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) - INIT_HLIST_HEAD(&gcwq->busy_hash[i]); + hash_init(gcwq->busy_hash); for_each_worker_pool(pool, gcwq) { pool->gcwq = gcwq; diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h new file mode 100644 index 000000000000..cc35e7e62091 --- /dev/null +++ b/kernel/workqueue_internal.h @@ -0,0 +1,66 @@ +/* + * kernel/workqueue_internal.h + * + * Workqueue internal header file. Only to be included by workqueue and + * core kernel subsystems. + */ +#ifndef _KERNEL_WORKQUEUE_INTERNAL_H +#define _KERNEL_WORKQUEUE_INTERNAL_H + +#include <linux/workqueue.h> +#include <linux/kthread.h> + +struct global_cwq; +struct worker_pool; + +/* + * The poor guys doing the actual heavy lifting. All on-duty workers are + * either serving the manager role, on idle list or on busy hash. For + * details on the locking annotation (L, I, X...), refer to workqueue.c. + * + * Only to be used in workqueue and async. + */ +struct worker { + /* on idle list while idle, on busy hash table while busy */ + union { + struct list_head entry; /* L: while idle */ + struct hlist_node hentry; /* L: while busy */ + }; + + struct work_struct *current_work; /* L: work being processed */ + work_func_t current_func; /* L: current_work's fn */ + struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */ + struct list_head scheduled; /* L: scheduled works */ + struct task_struct *task; /* I: worker task */ + struct worker_pool *pool; /* I: the associated pool */ + /* 64 bytes boundary on 64bit, 32 on 32bit */ + unsigned long last_active; /* L: last active timestamp */ + unsigned int flags; /* X: flags */ + int id; /* I: worker id */ + + /* for rebinding worker to CPU */ + struct work_struct rebind_work; /* L: for busy worker */ + + /* used only by rescuers to point to the target workqueue */ + struct workqueue_struct *rescue_wq; /* I: the workqueue to rescue */ +}; + +/** + * current_wq_worker - return struct worker if %current is a workqueue worker + */ +static inline struct worker *current_wq_worker(void) +{ + if (current->flags & PF_WQ_WORKER) + return kthread_data(current); + return NULL; +} + +/* + * Scheduler hooks for concurrency managed workqueue. Only to be used from + * sched.c and workqueue.c. + */ +void wq_worker_waking_up(struct task_struct *task, unsigned int cpu); +struct task_struct *wq_worker_sleeping(struct task_struct *task, + unsigned int cpu); + +#endif /* _KERNEL_WORKQUEUE_INTERNAL_H */ diff --git a/kernel/workqueue_sched.h b/kernel/workqueue_sched.h deleted file mode 100644 index 2d10fc98dc79..000000000000 --- a/kernel/workqueue_sched.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * kernel/workqueue_sched.h - * - * Scheduler hooks for concurrency managed workqueue. Only to be - * included from sched.c and workqueue.c. - */ -void wq_worker_waking_up(struct task_struct *task, unsigned int cpu); -struct task_struct *wq_worker_sleeping(struct task_struct *task, - unsigned int cpu); |