diff options
Diffstat (limited to 'kernel/padata.c')
-rw-r--r-- | kernel/padata.c | 189 |
1 files changed, 133 insertions, 56 deletions
diff --git a/kernel/padata.c b/kernel/padata.c index fd03513c732..fdd8ae609ce 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -29,7 +29,7 @@ #include <linux/rcupdate.h> #define MAX_SEQ_NR INT_MAX - NR_CPUS -#define MAX_OBJ_NUM 10000 * NR_CPUS +#define MAX_OBJ_NUM 1000 static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) { @@ -88,7 +88,7 @@ static void padata_parallel_worker(struct work_struct *work) local_bh_enable(); } -/* +/** * padata_do_parallel - padata parallelization function * * @pinst: padata instance @@ -152,6 +152,23 @@ out: } EXPORT_SYMBOL(padata_do_parallel); +/* + * padata_get_next - Get the next object that needs serialization. + * + * Return values are: + * + * A pointer to the control struct of the next object that needs + * serialization, if present in one of the percpu reorder queues. + * + * NULL, if all percpu reorder queues are empty. + * + * -EINPROGRESS, if the next object that needs serialization will + * be parallel processed by another cpu and is not yet present in + * the cpu's reorder queue. + * + * -ENODATA, if this cpu has to do the parallel processing for + * the next object. + */ static struct padata_priv *padata_get_next(struct parallel_data *pd) { int cpu, num_cpus, empty, calc_seq_nr; @@ -173,7 +190,7 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd) /* * Calculate the seq_nr of the object that should be - * next in this queue. + * next in this reorder queue. */ overrun = 0; calc_seq_nr = (atomic_read(&queue->num_obj) * num_cpus) @@ -231,7 +248,8 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd) goto out; } - if (next_nr % num_cpus == next_queue->cpu_index) { + queue = per_cpu_ptr(pd->queue, smp_processor_id()); + if (queue->cpu_index == next_queue->cpu_index) { padata = ERR_PTR(-ENODATA); goto out; } @@ -247,19 +265,40 @@ static void padata_reorder(struct parallel_data *pd) struct padata_queue *queue; struct padata_instance *pinst = pd->pinst; -try_again: + /* + * We need to ensure that only one cpu can work on dequeueing of + * the reorder queue the time. Calculating in which percpu reorder + * queue the next object will arrive takes some time. A spinlock + * would be highly contended. Also it is not clear in which order + * the objects arrive to the reorder queues. So a cpu could wait to + * get the lock just to notice that there is nothing to do at the + * moment. Therefore we use a trylock and let the holder of the lock + * care for all the objects enqueued during the holdtime of the lock. + */ if (!spin_trylock_bh(&pd->lock)) - goto out; + return; while (1) { padata = padata_get_next(pd); + /* + * All reorder queues are empty, or the next object that needs + * serialization is parallel processed by another cpu and is + * still on it's way to the cpu's reorder queue, nothing to + * do for now. + */ if (!padata || PTR_ERR(padata) == -EINPROGRESS) break; + /* + * This cpu has to do the parallel processing of the next + * object. It's waiting in the cpu's parallelization queue, + * so exit imediately. + */ if (PTR_ERR(padata) == -ENODATA) { + del_timer(&pd->timer); spin_unlock_bh(&pd->lock); - goto out; + return; } queue = per_cpu_ptr(pd->queue, padata->cb_cpu); @@ -273,13 +312,27 @@ try_again: spin_unlock_bh(&pd->lock); - if (atomic_read(&pd->reorder_objects)) - goto try_again; + /* + * The next object that needs serialization might have arrived to + * the reorder queues in the meantime, we will be called again + * from the timer function if noone else cares for it. + */ + if (atomic_read(&pd->reorder_objects) + && !(pinst->flags & PADATA_RESET)) + mod_timer(&pd->timer, jiffies + HZ); + else + del_timer(&pd->timer); -out: return; } +static void padata_reorder_timer(unsigned long arg) +{ + struct parallel_data *pd = (struct parallel_data *)arg; + + padata_reorder(pd); +} + static void padata_serial_worker(struct work_struct *work) { struct padata_queue *queue; @@ -308,7 +361,7 @@ static void padata_serial_worker(struct work_struct *work) local_bh_enable(); } -/* +/** * padata_do_serial - padata serialization function * * @padata: object to be serialized. @@ -338,6 +391,7 @@ void padata_do_serial(struct padata_priv *padata) } EXPORT_SYMBOL(padata_do_serial); +/* Allocate and initialize the internal cpumask dependend resources. */ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, const struct cpumask *cpumask) { @@ -358,17 +412,15 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, if (!alloc_cpumask_var(&pd->cpumask, GFP_KERNEL)) goto err_free_queue; - for_each_possible_cpu(cpu) { + cpumask_and(pd->cpumask, cpumask, cpu_active_mask); + + for_each_cpu(cpu, pd->cpumask) { queue = per_cpu_ptr(pd->queue, cpu); queue->pd = pd; - if (cpumask_test_cpu(cpu, cpumask) - && cpumask_test_cpu(cpu, cpu_active_mask)) { - queue->cpu_index = cpu_index; - cpu_index++; - } else - queue->cpu_index = -1; + queue->cpu_index = cpu_index; + cpu_index++; INIT_LIST_HEAD(&queue->reorder.list); INIT_LIST_HEAD(&queue->parallel.list); @@ -382,11 +434,10 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, atomic_set(&queue->num_obj, 0); } - cpumask_and(pd->cpumask, cpumask, cpu_active_mask); - num_cpus = cpumask_weight(pd->cpumask); pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1; + setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd); atomic_set(&pd->seq_nr, -1); atomic_set(&pd->reorder_objects, 0); atomic_set(&pd->refcnt, 0); @@ -410,6 +461,31 @@ static void padata_free_pd(struct parallel_data *pd) kfree(pd); } +/* Flush all objects out of the padata queues. */ +static void padata_flush_queues(struct parallel_data *pd) +{ + int cpu; + struct padata_queue *queue; + + for_each_cpu(cpu, pd->cpumask) { + queue = per_cpu_ptr(pd->queue, cpu); + flush_work(&queue->pwork); + } + + del_timer_sync(&pd->timer); + + if (atomic_read(&pd->reorder_objects)) + padata_reorder(pd); + + for_each_cpu(cpu, pd->cpumask) { + queue = per_cpu_ptr(pd->queue, cpu); + flush_work(&queue->swork); + } + + BUG_ON(atomic_read(&pd->refcnt) != 0); +} + +/* Replace the internal control stucture with a new one. */ static void padata_replace(struct padata_instance *pinst, struct parallel_data *pd_new) { @@ -421,17 +497,13 @@ static void padata_replace(struct padata_instance *pinst, synchronize_rcu(); - while (atomic_read(&pd_old->refcnt) != 0) - yield(); - - flush_workqueue(pinst->wq); - + padata_flush_queues(pd_old); padata_free_pd(pd_old); pinst->flags &= ~PADATA_RESET; } -/* +/** * padata_set_cpumask - set the cpumask that padata should use * * @pinst: padata instance @@ -443,10 +515,10 @@ int padata_set_cpumask(struct padata_instance *pinst, struct parallel_data *pd; int err = 0; - might_sleep(); - mutex_lock(&pinst->lock); + get_online_cpus(); + pd = padata_alloc_pd(pinst, cpumask); if (!pd) { err = -ENOMEM; @@ -458,6 +530,8 @@ int padata_set_cpumask(struct padata_instance *pinst, padata_replace(pinst, pd); out: + put_online_cpus(); + mutex_unlock(&pinst->lock); return err; @@ -479,7 +553,7 @@ static int __padata_add_cpu(struct padata_instance *pinst, int cpu) return 0; } -/* +/** * padata_add_cpu - add a cpu to the padata cpumask * * @pinst: padata instance @@ -489,12 +563,12 @@ int padata_add_cpu(struct padata_instance *pinst, int cpu) { int err; - might_sleep(); - mutex_lock(&pinst->lock); + get_online_cpus(); cpumask_set_cpu(cpu, pinst->cpumask); err = __padata_add_cpu(pinst, cpu); + put_online_cpus(); mutex_unlock(&pinst->lock); @@ -517,7 +591,7 @@ static int __padata_remove_cpu(struct padata_instance *pinst, int cpu) return 0; } -/* +/** * padata_remove_cpu - remove a cpu from the padata cpumask * * @pinst: padata instance @@ -527,12 +601,12 @@ int padata_remove_cpu(struct padata_instance *pinst, int cpu) { int err; - might_sleep(); - mutex_lock(&pinst->lock); + get_online_cpus(); cpumask_clear_cpu(cpu, pinst->cpumask); err = __padata_remove_cpu(pinst, cpu); + put_online_cpus(); mutex_unlock(&pinst->lock); @@ -540,38 +614,35 @@ int padata_remove_cpu(struct padata_instance *pinst, int cpu) } EXPORT_SYMBOL(padata_remove_cpu); -/* +/** * padata_start - start the parallel processing * * @pinst: padata instance to start */ void padata_start(struct padata_instance *pinst) { - might_sleep(); - mutex_lock(&pinst->lock); pinst->flags |= PADATA_INIT; mutex_unlock(&pinst->lock); } EXPORT_SYMBOL(padata_start); -/* +/** * padata_stop - stop the parallel processing * * @pinst: padata instance to stop */ void padata_stop(struct padata_instance *pinst) { - might_sleep(); - mutex_lock(&pinst->lock); pinst->flags &= ~PADATA_INIT; mutex_unlock(&pinst->lock); } EXPORT_SYMBOL(padata_stop); -static int __cpuinit padata_cpu_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) +#ifdef CONFIG_HOTPLUG_CPU +static int padata_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) { int err; struct padata_instance *pinst; @@ -588,7 +659,7 @@ static int __cpuinit padata_cpu_callback(struct notifier_block *nfb, err = __padata_add_cpu(pinst, cpu); mutex_unlock(&pinst->lock); if (err) - return NOTIFY_BAD; + return notifier_from_errno(err); break; case CPU_DOWN_PREPARE: @@ -599,7 +670,7 @@ static int __cpuinit padata_cpu_callback(struct notifier_block *nfb, err = __padata_remove_cpu(pinst, cpu); mutex_unlock(&pinst->lock); if (err) - return NOTIFY_BAD; + return notifier_from_errno(err); break; case CPU_UP_CANCELED: @@ -621,8 +692,9 @@ static int __cpuinit padata_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } +#endif -/* +/** * padata_alloc - allocate and initialize a padata instance * * @cpumask: cpumask that padata uses for parallelization @@ -631,7 +703,6 @@ static int __cpuinit padata_cpu_callback(struct notifier_block *nfb, struct padata_instance *padata_alloc(const struct cpumask *cpumask, struct workqueue_struct *wq) { - int err; struct padata_instance *pinst; struct parallel_data *pd; @@ -639,6 +710,8 @@ struct padata_instance *padata_alloc(const struct cpumask *cpumask, if (!pinst) goto err; + get_online_cpus(); + pd = padata_alloc_pd(pinst, cpumask); if (!pd) goto err_free_inst; @@ -654,31 +727,32 @@ struct padata_instance *padata_alloc(const struct cpumask *cpumask, pinst->flags = 0; +#ifdef CONFIG_HOTPLUG_CPU pinst->cpu_notifier.notifier_call = padata_cpu_callback; pinst->cpu_notifier.priority = 0; - err = register_hotcpu_notifier(&pinst->cpu_notifier); - if (err) - goto err_free_cpumask; + register_hotcpu_notifier(&pinst->cpu_notifier); +#endif + + put_online_cpus(); mutex_init(&pinst->lock); return pinst; -err_free_cpumask: - free_cpumask_var(pinst->cpumask); err_free_pd: padata_free_pd(pd); err_free_inst: kfree(pinst); + put_online_cpus(); err: return NULL; } EXPORT_SYMBOL(padata_alloc); -/* +/** * padata_free - free a padata instance * - * @ padata_inst: padata instance to free + * @padata_inst: padata instance to free */ void padata_free(struct padata_instance *pinst) { @@ -686,10 +760,13 @@ void padata_free(struct padata_instance *pinst) synchronize_rcu(); - while (atomic_read(&pinst->pd->refcnt) != 0) - yield(); - +#ifdef CONFIG_HOTPLUG_CPU unregister_hotcpu_notifier(&pinst->cpu_notifier); +#endif + get_online_cpus(); + padata_flush_queues(pinst->pd); + put_online_cpus(); + padata_free_pd(pinst->pd); free_cpumask_var(pinst->cpumask); kfree(pinst); |