diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/cgroup.c | 7 | ||||
-rw-r--r-- | kernel/cred.c | 2 | ||||
-rw-r--r-- | kernel/futex.c | 30 | ||||
-rw-r--r-- | kernel/hw_breakpoint.c | 2 | ||||
-rw-r--r-- | kernel/kfifo.c | 6 | ||||
-rw-r--r-- | kernel/kgdb.c | 6 | ||||
-rw-r--r-- | kernel/padata.c | 690 | ||||
-rw-r--r-- | kernel/perf_event.c | 13 | ||||
-rw-r--r-- | kernel/power/Kconfig | 19 | ||||
-rw-r--r-- | kernel/power/main.c | 31 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 4 | ||||
-rw-r--r-- | kernel/power/swap.c | 4 | ||||
-rw-r--r-- | kernel/power/swsusp.c | 58 | ||||
-rw-r--r-- | kernel/power/user.c | 23 | ||||
-rw-r--r-- | kernel/resource.c | 44 | ||||
-rw-r--r-- | kernel/smp.c | 8 | ||||
-rw-r--r-- | kernel/softirq.c | 15 | ||||
-rw-r--r-- | kernel/softlockup.c | 15 | ||||
-rw-r--r-- | kernel/sys.c | 2 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 2 |
21 files changed, 875 insertions, 107 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 864ff75d65f..6aebdeb2aa3 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -100,6 +100,7 @@ obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o +obj-$(CONFIG_PADATA) += padata.o ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 1fbcc748044..aa3bee56644 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2936,14 +2936,17 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, for_each_subsys(root, ss) { struct cgroup_subsys_state *css = ss->create(ss, cgrp); + if (IS_ERR(css)) { err = PTR_ERR(css); goto err_destroy; } init_cgroup_css(css, ss, cgrp); - if (ss->use_id) - if (alloc_css_id(ss, parent, cgrp)) + if (ss->use_id) { + err = alloc_css_id(ss, parent, cgrp); + if (err) goto err_destroy; + } /* At error, ->destroy() callback has to free assigned ID. */ } diff --git a/kernel/cred.c b/kernel/cred.c index dd76cfe5f5b..1ed8ca18790 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -224,7 +224,7 @@ struct cred *cred_alloc_blank(void) #ifdef CONFIG_KEYS new->tgcred = kzalloc(sizeof(*new->tgcred), GFP_KERNEL); if (!new->tgcred) { - kfree(new); + kmem_cache_free(cred_jar, new); return NULL; } atomic_set(&new->tgcred->usage, 1); diff --git a/kernel/futex.c b/kernel/futex.c index d9b3a2228f9..e7a35f1039e 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -530,8 +530,25 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, return -EINVAL; WARN_ON(!atomic_read(&pi_state->refcount)); - WARN_ON(pid && pi_state->owner && - pi_state->owner->pid != pid); + + /* + * When pi_state->owner is NULL then the owner died + * and another waiter is on the fly. pi_state->owner + * is fixed up by the task which acquires + * pi_state->rt_mutex. + * + * We do not check for pid == 0 which can happen when + * the owner died and robust_list_exit() cleared the + * TID. + */ + if (pid && pi_state->owner) { + /* + * Bail out if user space manipulated the + * futex value. + */ + if (pid != task_pid_vnr(pi_state->owner)) + return -EINVAL; + } atomic_inc(&pi_state->refcount); *ps = pi_state; @@ -758,6 +775,13 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) if (!pi_state) return -EINVAL; + /* + * If current does not own the pi_state then the futex is + * inconsistent and user space fiddled with the futex value. + */ + if (pi_state->owner != current) + return -EINVAL; + raw_spin_lock(&pi_state->pi_mutex.wait_lock); new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); @@ -1971,7 +1995,7 @@ retry_private: /* Unqueue and drop the lock */ unqueue_me_pi(&q); - goto out; + goto out_put_key; out_unlock_put_key: queue_unlock(&q, hb); diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 8a5c7d55ac9..967e66143e1 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -360,8 +360,8 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) { u64 old_addr = bp->attr.bp_addr; + u64 old_len = bp->attr.bp_len; int old_type = bp->attr.bp_type; - int old_len = bp->attr.bp_len; int err = 0; perf_event_disable(bp); diff --git a/kernel/kfifo.c b/kernel/kfifo.c index 32c5c15d750..35edbe22e9a 100644 --- a/kernel/kfifo.c +++ b/kernel/kfifo.c @@ -80,7 +80,7 @@ int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask) buffer = kmalloc(size, gfp_mask); if (!buffer) { - _kfifo_init(fifo, 0, 0); + _kfifo_init(fifo, NULL, 0); return -ENOMEM; } @@ -97,6 +97,7 @@ EXPORT_SYMBOL(kfifo_alloc); void kfifo_free(struct kfifo *fifo) { kfree(fifo->buffer); + _kfifo_init(fifo, NULL, 0); } EXPORT_SYMBOL(kfifo_free); @@ -349,6 +350,7 @@ EXPORT_SYMBOL(__kfifo_from_user_n); * @fifo: the fifo to be used. * @from: pointer to the data to be added. * @len: the length of the data to be added. + * @total: the actual returned data length. * * This function copies at most @len bytes from the @from into the * FIFO depending and returns -EFAULT/0. @@ -399,7 +401,7 @@ EXPORT_SYMBOL(__kfifo_to_user_n); * @fifo: the fifo to be used. * @to: where the data must be copied. * @len: the size of the destination buffer. - @ @lenout: pointer to output variable with copied data + * @lenout: pointer to output variable with copied data * * This function copies at most @len bytes from the FIFO into the * @to buffer and 0 or -EFAULT. diff --git a/kernel/kgdb.c b/kernel/kgdb.c index c7ade62e4ef..761fdd2b303 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c @@ -599,7 +599,7 @@ static void kgdb_wait(struct pt_regs *regs) /* Signal the primary CPU that we are done: */ atomic_set(&cpu_in_kgdb[cpu], 0); - touch_softlockup_watchdog(); + touch_softlockup_watchdog_sync(); clocksource_touch_watchdog(); local_irq_restore(flags); } @@ -1453,7 +1453,7 @@ acquirelock: (kgdb_info[cpu].task && kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) { atomic_set(&kgdb_active, -1); - touch_softlockup_watchdog(); + touch_softlockup_watchdog_sync(); clocksource_touch_watchdog(); local_irq_restore(flags); @@ -1553,7 +1553,7 @@ kgdb_restore: } /* Free kgdb_active */ atomic_set(&kgdb_active, -1); - touch_softlockup_watchdog(); + touch_softlockup_watchdog_sync(); clocksource_touch_watchdog(); local_irq_restore(flags); diff --git a/kernel/padata.c b/kernel/padata.c new file mode 100644 index 00000000000..6f9bcb8313d --- /dev/null +++ b/kernel/padata.c @@ -0,0 +1,690 @@ +/* + * padata.c - generic interface to process data streams in parallel + * + * Copyright (C) 2008, 2009 secunet Security Networks AG + * Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <linux/module.h> +#include <linux/cpumask.h> +#include <linux/err.h> +#include <linux/cpu.h> +#include <linux/padata.h> +#include <linux/mutex.h> +#include <linux/sched.h> +#include <linux/rcupdate.h> + +#define MAX_SEQ_NR INT_MAX - NR_CPUS +#define MAX_OBJ_NUM 10000 * NR_CPUS + +static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) +{ + int cpu, target_cpu; + + target_cpu = cpumask_first(pd->cpumask); + for (cpu = 0; cpu < cpu_index; cpu++) + target_cpu = cpumask_next(target_cpu, pd->cpumask); + + return target_cpu; +} + +static int padata_cpu_hash(struct padata_priv *padata) +{ + int cpu_index; + struct parallel_data *pd; + + pd = padata->pd; + + /* + * Hash the sequence numbers to the cpus by taking + * seq_nr mod. number of cpus in use. + */ + cpu_index = padata->seq_nr % cpumask_weight(pd->cpumask); + + return padata_index_to_cpu(pd, cpu_index); +} + +static void padata_parallel_worker(struct work_struct *work) +{ + struct padata_queue *queue; + struct parallel_data *pd; + struct padata_instance *pinst; + LIST_HEAD(local_list); + + local_bh_disable(); + queue = container_of(work, struct padata_queue, pwork); + pd = queue->pd; + pinst = pd->pinst; + + spin_lock(&queue->parallel.lock); + list_replace_init(&queue->parallel.list, &local_list); + spin_unlock(&queue->parallel.lock); + + while (!list_empty(&local_list)) { + struct padata_priv *padata; + + padata = list_entry(local_list.next, + struct padata_priv, list); + + list_del_init(&padata->list); + + padata->parallel(padata); + } + + local_bh_enable(); +} + +/* + * padata_do_parallel - padata parallelization function + * + * @pinst: padata instance + * @padata: object to be parallelized + * @cb_cpu: cpu the serialization callback function will run on, + * must be in the cpumask of padata. + * + * The parallelization callback function will run with BHs off. + * Note: Every object which is parallelized by padata_do_parallel + * must be seen by padata_do_serial. + */ +int padata_do_parallel(struct padata_instance *pinst, + struct padata_priv *padata, int cb_cpu) +{ + int target_cpu, err; + struct padata_queue *queue; + struct parallel_data *pd; + + rcu_read_lock_bh(); + + pd = rcu_dereference(pinst->pd); + + err = 0; + if (!(pinst->flags & PADATA_INIT)) + goto out; + + err = -EBUSY; + if ((pinst->flags & PADATA_RESET)) + goto out; + + if (atomic_read(&pd->refcnt) >= MAX_OBJ_NUM) + goto out; + + err = -EINVAL; + if (!cpumask_test_cpu(cb_cpu, pd->cpumask)) + goto out; + + err = -EINPROGRESS; + atomic_inc(&pd->refcnt); + padata->pd = pd; + padata->cb_cpu = cb_cpu; + + if (unlikely(atomic_read(&pd->seq_nr) == pd->max_seq_nr)) + atomic_set(&pd->seq_nr, -1); + + padata->seq_nr = atomic_inc_return(&pd->seq_nr); + + target_cpu = padata_cpu_hash(padata); + queue = per_cpu_ptr(pd->queue, target_cpu); + + spin_lock(&queue->parallel.lock); + list_add_tail(&padata->list, &queue->parallel.list); + spin_unlock(&queue->parallel.lock); + + queue_work_on(target_cpu, pinst->wq, &queue->pwork); + +out: + rcu_read_unlock_bh(); + + return err; +} +EXPORT_SYMBOL(padata_do_parallel); + +static struct padata_priv *padata_get_next(struct parallel_data *pd) +{ + int cpu, num_cpus, empty, calc_seq_nr; + int seq_nr, next_nr, overrun, next_overrun; + struct padata_queue *queue, *next_queue; + struct padata_priv *padata; + struct padata_list *reorder; + + empty = 0; + next_nr = -1; + next_overrun = 0; + next_queue = NULL; + + num_cpus = cpumask_weight(pd->cpumask); + + for_each_cpu(cpu, pd->cpumask) { + queue = per_cpu_ptr(pd->queue, cpu); + reorder = &queue->reorder; + + /* + * Calculate the seq_nr of the object that should be + * next in this queue. + */ + overrun = 0; + calc_seq_nr = (atomic_read(&queue->num_obj) * num_cpus) + + queue->cpu_index; + + if (unlikely(calc_seq_nr > pd->max_seq_nr)) { + calc_seq_nr = calc_seq_nr - pd->max_seq_nr - 1; + overrun = 1; + } + + if (!list_empty(&reorder->list)) { + padata = list_entry(reorder->list.next, + struct padata_priv, list); + + seq_nr = padata->seq_nr; + BUG_ON(calc_seq_nr != seq_nr); + } else { + seq_nr = calc_seq_nr; + empty++; + } + + if (next_nr < 0 || seq_nr < next_nr + || (next_overrun && !overrun)) { + next_nr = seq_nr; + next_overrun = overrun; + next_queue = queue; + } + } + + padata = NULL; + + if (empty == num_cpus) + goto out; + + reorder = &next_queue->reorder; + + if (!list_empty(&reorder->list)) { + padata = list_entry(reorder->list.next, + struct padata_priv, list); + + if (unlikely(next_overrun)) { + for_each_cpu(cpu, pd->cpumask) { + queue = per_cpu_ptr(pd->queue, cpu); + atomic_set(&queue->num_obj, 0); + } + } + + spin_lock(&reorder->lock); + list_del_init(&padata->list); + atomic_dec(&pd->reorder_objects); + spin_unlock(&reorder->lock); + + atomic_inc(&next_queue->num_obj); + + goto out; + } + + if (next_nr % num_cpus == next_queue->cpu_index) { + padata = ERR_PTR(-ENODATA); + goto out; + } + + padata = ERR_PTR(-EINPROGRESS); +out: + return padata; +} + +static void padata_reorder(struct parallel_data *pd) +{ + struct padata_priv *padata; + struct padata_queue *queue; + struct padata_instance *pinst = pd->pinst; + +try_again: + if (!spin_trylock_bh(&pd->lock)) + goto out; + + while (1) { + padata = padata_get_next(pd); + + if (!padata || PTR_ERR(padata) == -EINPROGRESS) + break; + + if (PTR_ERR(padata) == -ENODATA) { + spin_unlock_bh(&pd->lock); + goto out; + } + + queue = per_cpu_ptr(pd->queue, padata->cb_cpu); + + spin_lock(&queue->serial.lock); + list_add_tail(&padata->list, &queue->serial.list); + spin_unlock(&queue->serial.lock); + + queue_work_on(padata->cb_cpu, pinst->wq, &queue->swork); + } + + spin_unlock_bh(&pd->lock); + + if (atomic_read(&pd->reorder_objects)) + goto try_again; + +out: + return; +} + +static void padata_serial_worker(struct work_struct *work) +{ + struct padata_queue *queue; + struct parallel_data *pd; + LIST_HEAD(local_list); + + local_bh_disable(); + queue = container_of(work, struct padata_queue, swork); + pd = queue->pd; + + spin_lock(&queue->serial.lock); + list_replace_init(&queue->serial.list, &local_list); + spin_unlock(&queue->serial.lock); + + while (!list_empty(&local_list)) { + struct padata_priv *padata; + + padata = list_entry(local_list.next, + struct padata_priv, list); + + list_del_init(&padata->list); + + padata->serial(padata); + atomic_dec(&pd->refcnt); + } + local_bh_enable(); +} + +/* + * padata_do_serial - padata serialization function + * + * @padata: object to be serialized. + * + * padata_do_serial must be called for every parallelized object. + * The serialization callback function will run with BHs off. + */ +void padata_do_serial(struct padata_priv *padata) +{ + int cpu; + struct padata_queue *queue; + struct parallel_data *pd; + + pd = padata->pd; + + cpu = get_cpu(); + queue = per_cpu_ptr(pd->queue, cpu); + + spin_lock(&queue->reorder.lock); + atomic_inc(&pd->reorder_objects); + list_add_tail(&padata->list, &queue->reorder.list); + spin_unlock(&queue->reorder.lock); + + put_cpu(); + + padata_reorder(pd); +} +EXPORT_SYMBOL(padata_do_serial); + +static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, + const struct cpumask *cpumask) +{ + int cpu, cpu_index, num_cpus; + struct padata_queue *queue; + struct parallel_data *pd; + + cpu_index = 0; + + pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL); + if (!pd) + goto err; + + pd->queue = alloc_percpu(struct padata_queue); + if (!pd->queue) + goto err_free_pd; + + if (!alloc_cpumask_var(&pd->cpumask, GFP_KERNEL)) + goto err_free_queue; + + for_each_possible_cpu(cpu) { + queue = per_cpu_ptr(pd->queue, cpu); + + queue->pd = pd; + + if (cpumask_test_cpu(cpu, cpumask) + && cpumask_test_cpu(cpu, cpu_active_mask)) { + queue->cpu_index = cpu_index; + cpu_index++; + } else + queue->cpu_index = -1; + + INIT_LIST_HEAD(&queue->reorder.list); + INIT_LIST_HEAD(&queue->parallel.list); + INIT_LIST_HEAD(&queue->serial.list); + spin_lock_init(&queue->reorder.lock); + spin_lock_init(&queue->parallel.lock); + spin_lock_init(&queue->serial.lock); + + INIT_WORK(&queue->pwork, padata_parallel_worker); + INIT_WORK(&queue->swork, padata_serial_worker); + atomic_set(&queue->num_obj, 0); + } + + cpumask_and(pd->cpumask, cpumask, cpu_active_mask); + + num_cpus = cpumask_weight(pd->cpumask); + pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1; + + atomic_set(&pd->seq_nr, -1); + atomic_set(&pd->reorder_objects, 0); + atomic_set(&pd->refcnt, 0); + pd->pinst = pinst; + spin_lock_init(&pd->lock); + + return pd; + +err_free_queue: + free_percpu(pd->queue); +err_free_pd: + kfree(pd); +err: + return NULL; +} + +static void padata_free_pd(struct parallel_data *pd) +{ + free_cpumask_var(pd->cpumask); + free_percpu(pd->queue); + kfree(pd); +} + +static void padata_replace(struct padata_instance *pinst, + struct parallel_data *pd_new) +{ + struct parallel_data *pd_old = pinst->pd; + + pinst->flags |= PADATA_RESET; + + rcu_assign_pointer(pinst->pd, pd_new); + + synchronize_rcu(); + + while (atomic_read(&pd_old->refcnt) != 0) + yield(); + + flush_workqueue(pinst->wq); + + padata_free_pd(pd_old); + + pinst->flags &= ~PADATA_RESET; +} + +/* + * padata_set_cpumask - set the cpumask that padata should use + * + * @pinst: padata instance + * @cpumask: the cpumask to use + */ +int padata_set_cpumask(struct padata_instance *pinst, + cpumask_var_t cpumask) +{ + struct parallel_data *pd; + int err = 0; + + might_sleep(); + + mutex_lock(&pinst->lock); + + pd = padata_alloc_pd(pinst, cpumask); + if (!pd) { + err = -ENOMEM; + goto out; + } + + cpumask_copy(pinst->cpumask, cpumask); + + padata_replace(pinst, pd); + +out: + mutex_unlock(&pinst->lock); + + return err; +} +EXPORT_SYMBOL(padata_set_cpumask); + +static int __padata_add_cpu(struct padata_instance *pinst, int cpu) +{ + struct parallel_data *pd; + + if (cpumask_test_cpu(cpu, cpu_active_mask)) { + pd = padata_alloc_pd(pinst, pinst->cpumask); + if (!pd) + return -ENOMEM; + + padata_replace(pinst, pd); + } + + return 0; +} + +/* + * padata_add_cpu - add a cpu to the padata cpumask + * + * @pinst: padata instance + * @cpu: cpu to add + */ +int padata_add_cpu(struct padata_instance *pinst, int cpu) +{ + int err; + + might_sleep(); + + mutex_lock(&pinst->lock); + + cpumask_set_cpu(cpu, pinst->cpumask); + err = __padata_add_cpu(pinst, cpu); + + mutex_unlock(&pinst->lock); + + return err; +} +EXPORT_SYMBOL(padata_add_cpu); + +static int __padata_remove_cpu(struct padata_instance *pinst, int cpu) +{ + struct parallel_data *pd; + + if (cpumask_test_cpu(cpu, cpu_online_mask)) { + pd = padata_alloc_pd(pinst, pinst->cpumask); + if (!pd) + return -ENOMEM; + + padata_replace(pinst, pd); + } + + return 0; +} + +/* + * padata_remove_cpu - remove a cpu from the padata cpumask + * + * @pinst: padata instance + * @cpu: cpu to remove + */ +int padata_remove_cpu(struct padata_instance *pinst, int cpu) +{ + int err; + + might_sleep(); + + mutex_lock(&pinst->lock); + + cpumask_clear_cpu(cpu, pinst->cpumask); + err = __padata_remove_cpu(pinst, cpu); + + mutex_unlock(&pinst->lock); + + return err; +} +EXPORT_SYMBOL(padata_remove_cpu); + +/* + * padata_start - start the parallel processing + * + * @pinst: padata instance to start + */ +void padata_start(struct padata_instance *pinst) +{ + might_sleep(); + + mutex_lock(&pinst->lock); + pinst->flags |= PADATA_INIT; + mutex_unlock(&pinst->lock); +} +EXPORT_SYMBOL(padata_start); + +/* + * padata_stop - stop the parallel processing + * + * @pinst: padata instance to stop + */ +void padata_stop(struct padata_instance *pinst) +{ + might_sleep(); + + mutex_lock(&pinst->lock); + pinst->flags &= ~PADATA_INIT; + mutex_unlock(&pinst->lock); +} +EXPORT_SYMBOL(padata_stop); + +static int __cpuinit padata_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + int err; + struct padata_instance *pinst; + int cpu = (unsigned long)hcpu; + + pinst = container_of(nfb, struct padata_instance, cpu_notifier); + + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + if (!cpumask_test_cpu(cpu, pinst->cpumask)) + break; + mutex_lock(&pinst->lock); + err = __padata_add_cpu(pinst, cpu); + mutex_unlock(&pinst->lock); + if (err) + return NOTIFY_BAD; + break; + + case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: + if (!cpumask_test_cpu(cpu, pinst->cpumask)) + break; + mutex_lock(&pinst->lock); + err = __padata_remove_cpu(pinst, cpu); + mutex_unlock(&pinst->lock); + if (err) + return NOTIFY_BAD; + break; + + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: + if (!cpumask_test_cpu(cpu, pinst->cpumask)) + break; + mutex_lock(&pinst->lock); + __padata_remove_cpu(pinst, cpu); + mutex_unlock(&pinst->lock); + + case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: + if (!cpumask_test_cpu(cpu, pinst->cpumask)) + break; + mutex_lock(&pinst->lock); + __padata_add_cpu(pinst, cpu); + mutex_unlock(&pinst->lock); + } + + return NOTIFY_OK; +} + +/* + * padata_alloc - allocate and initialize a padata instance + * + * @cpumask: cpumask that padata uses for parallelization + * @wq: workqueue to use for the allocated padata instance + */ +struct padata_instance *padata_alloc(const struct cpumask *cpumask, + struct workqueue_struct *wq) +{ + int err; + struct padata_instance *pinst; + struct parallel_data *pd; + + pinst = kzalloc(sizeof(struct padata_instance), GFP_KERNEL); + if (!pinst) + goto err; + + pd = padata_alloc_pd(pinst, cpumask); + if (!pd) + goto err_free_inst; + + rcu_assign_pointer(pinst->pd, pd); + + pinst->wq = wq; + + cpumask_copy(pinst->cpumask, cpumask); + + pinst->flags = 0; + + pinst->cpu_notifier.notifier_call = padata_cpu_callback; + pinst->cpu_notifier.priority = 0; + err = register_hotcpu_notifier(&pinst->cpu_notifier); + if (err) + goto err_free_pd; + + mutex_init(&pinst->lock); + + return pinst; + +err_free_pd: + padata_free_pd(pd); +err_free_inst: + kfree(pinst); +err: + return NULL; +} +EXPORT_SYMBOL(padata_alloc); + +/* + * padata_free - free a padata instance + * + * @ padata_inst: padata instance to free + */ +void padata_free(struct padata_instance *pinst) +{ + padata_stop(pinst); + + synchronize_rcu(); + + while (atomic_read(&pinst->pd->refcnt) != 0) + yield(); + + unregister_hotcpu_notifier(&pinst->cpu_notifier); + padata_free_pd(pinst->pd); + kfree(pinst); +} +EXPORT_SYMBOL(padata_free); diff --git a/kernel/perf_event.c b/kernel/perf_event.c index d27746bd3a0..2ae7409bf38 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -3259,8 +3259,6 @@ static void perf_event_task_output(struct perf_event *event, task_event->event_id.tid = perf_event_tid(event, task); task_event->event_id.ptid = perf_event_tid(event, current); - task_event->event_id.time = perf_clock(); - perf_output_put(&handle, task_event->event_id); perf_output_end(&handle); @@ -3268,7 +3266,7 @@ static void perf_event_task_output(struct perf_event *event, static int perf_event_task_match(struct perf_event *event) { - if (event->state != PERF_EVENT_STATE_ACTIVE) + if (event->state < PERF_EVENT_STATE_INACTIVE) return 0; if (event->cpu != -1 && event->cpu != smp_processor_id()) @@ -3300,7 +3298,7 @@ static void perf_event_task_event(struct perf_task_event *task_event) cpuctx = &get_cpu_var(perf_cpu_context); perf_event_task_ctx(&cpuctx->ctx, task_event); if (!ctx) - ctx = rcu_dereference(task_event->task->perf_event_ctxp); + ctx = rcu_dereference(current->perf_event_ctxp); if (ctx) perf_event_task_ctx(ctx, task_event); put_cpu_var(perf_cpu_context); @@ -3331,6 +3329,7 @@ static void perf_event_task(struct task_struct *task, /* .ppid */ /* .tid */ /* .ptid */ + .time = perf_clock(), }, }; @@ -3380,7 +3379,7 @@ static void perf_event_comm_output(struct perf_event *event, static int perf_event_comm_match(struct perf_event *event) { - if (event->state != PERF_EVENT_STATE_ACTIVE) + if (event->state < PERF_EVENT_STATE_INACTIVE) return 0; if (event->cpu != -1 && event->cpu != smp_processor_id()) @@ -3500,7 +3499,7 @@ static void perf_event_mmap_output(struct perf_event *event, static int perf_event_mmap_match(struct perf_event *event, struct perf_mmap_event *mmap_event) { - if (event->state != PERF_EVENT_STATE_ACTIVE) + if (event->state < PERF_EVENT_STATE_INACTIVE) return 0; if (event->cpu != -1 && event->cpu != smp_processor_id()) @@ -4580,7 +4579,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, if (attr->type >= PERF_TYPE_MAX) return -EINVAL; - if (attr->__reserved_1 || attr->__reserved_2) + if (attr->__reserved_1) return -EINVAL; if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 91e09d3b2eb..5c36ea9d55d 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -27,6 +27,15 @@ config PM_DEBUG code. This is helpful when debugging and reporting PM bugs, like suspend support. +config PM_ADVANCED_DEBUG + bool "Extra PM attributes in sysfs for low-level debugging/testing" + depends on PM_DEBUG + default n + ---help--- + Add extra sysfs attributes allowing one to access some Power Management + fields of device objects from user space. If you are not a kernel + developer interested in debugging/testing Power Management, say "no". + config PM_VERBOSE bool "Verbose Power Management debugging" depends on PM_DEBUG @@ -85,6 +94,11 @@ config PM_SLEEP depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE default y +config PM_SLEEP_ADVANCED_DEBUG + bool + depends on PM_ADVANCED_DEBUG + default n + config SUSPEND bool "Suspend to RAM and standby" depends on PM && ARCH_SUSPEND_POSSIBLE @@ -222,3 +236,8 @@ config PM_RUNTIME and the bus type drivers of the buses the devices are on are responsible for the actual handling of the autosuspend requests and wake-up events. + +config PM_OPS + bool + depends on PM_SLEEP || PM_RUNTIME + default y diff --git a/kernel/power/main.c b/kernel/power/main.c index 0998c713905..b58800b21fc 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -44,6 +44,32 @@ int pm_notifier_call_chain(unsigned long val) == NOTIFY_BAD) ? -EINVAL : 0; } +/* If set, devices may be suspended and resumed asynchronously. */ +int pm_async_enabled = 1; + +static ssize_t pm_async_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", pm_async_enabled); +} + +static ssize_t pm_async_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned long val; + + if (strict_strtoul(buf, 10, &val)) + return -EINVAL; + + if (val > 1) + return -EINVAL; + + pm_async_enabled = val; + return n; +} + +power_attr(pm_async); + #ifdef CONFIG_PM_DEBUG int pm_test_level = TEST_NONE; @@ -208,9 +234,12 @@ static struct attribute * g[] = { #ifdef CONFIG_PM_TRACE &pm_trace_attr.attr, #endif -#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_PM_DEBUG) +#ifdef CONFIG_PM_SLEEP + &pm_async_attr.attr, +#ifdef CONFIG_PM_DEBUG &pm_test_attr.attr, #endif +#endif NULL, }; diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 36cb168e433..830cadecbdf 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1181,7 +1181,7 @@ static void free_unnecessary_pages(void) memory_bm_position_reset(©_bm); - while (to_free_normal > 0 && to_free_highmem > 0) { + while (to_free_normal > 0 || to_free_highmem > 0) { unsigned long pfn = memory_bm_next_pfn(©_bm); struct page *page = pfn_to_page(pfn); @@ -1500,7 +1500,7 @@ asmlinkage int swsusp_save(void) { unsigned int nr_pages, nr_highmem; - printk(KERN_INFO "PM: Creating hibernation image: \n"); + printk(KERN_INFO "PM: Creating hibernation image:\n"); drain_local_pages(NULL); nr_pages = count_data_pages(); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 09b2b0ae9e9..1d575733d4e 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -657,10 +657,6 @@ int swsusp_read(unsigned int *flags_p) struct swsusp_info *header; *flags_p = swsusp_header->flags; - if (IS_ERR(resume_bdev)) { - pr_debug("PM: Image device not initialised\n"); - return PTR_ERR(resume_bdev); - } memset(&snapshot, 0, sizeof(struct snapshot_handle)); error = snapshot_write_next(&snapshot, PAGE_SIZE); diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c deleted file mode 100644 index 5b3601bd189..00000000000 --- a/kernel/power/swsusp.c +++ /dev/null @@ -1,58 +0,0 @@ -/* - * linux/kernel/power/swsusp.c - * - * This file provides code to write suspend image to swap and read it back. - * - * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu> - * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@suse.cz> - * - * This file is released under the GPLv2. - * - * I'd like to thank the following people for their work: - * - * Pavel Machek <pavel@ucw.cz>: - * Modifications, defectiveness pointing, being with me at the very beginning, - * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17. - * - * Steve Doddi <dirk@loth.demon.co.uk>: - * Support the possibility of hardware state restoring. - * - * Raph <grey.havens@earthling.net>: - * Support for preserving states of network devices and virtual console - * (including X and svgatextmode) - * - * Kurt Garloff <garloff@suse.de>: - * Straightened the critical function in order to prevent compilers from - * playing tricks with local variables. - * - * Andreas Mohr <a.mohr@mailto.de> - * - * Alex Badea <vampire@go.ro>: - * Fixed runaway init - * - * Rafael J. Wysocki <rjw@sisk.pl> - * Reworked the freeing of memory and the handling of swap - * - * More state savers are welcome. Especially for the scsi layer... - * - * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt - */ - -#include <linux/mm.h> -#include <linux/suspend.h> -#include <linux/spinlock.h> -#include <linux/kernel.h> -#include <linux/major.h> -#include <linux/swap.h> -#include <linux/pm.h> -#include <linux/swapops.h> -#include <linux/bootmem.h> -#include <linux/syscalls.h> -#include <linux/highmem.h> -#include <linux/time.h> -#include <linux/rbtree.h> -#include <linux/io.h> - -#include "power.h" - -int in_suspend __nosavedata = 0; diff --git a/kernel/power/user.c b/kernel/power/user.c index bf0014d6a5f..4d2289626a8 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -195,6 +195,15 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, return res; } +static void snapshot_deprecated_ioctl(unsigned int cmd) +{ + if (printk_ratelimit()) + printk(KERN_NOTICE "%pf: ioctl '%.8x' is deprecated and will " + "be removed soon, update your suspend-to-disk " + "utilities\n", + __builtin_return_address(0), cmd); +} + static long snapshot_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -246,8 +255,9 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, data->frozen = 0; break; - case SNAPSHOT_CREATE_IMAGE: case SNAPSHOT_ATOMIC_SNAPSHOT: + snapshot_deprecated_ioctl(cmd); + case SNAPSHOT_CREATE_IMAGE: if (data->mode != O_RDONLY || !data->frozen || data->ready) { error = -EPERM; break; @@ -275,8 +285,9 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, data->ready = 0; break; - case SNAPSHOT_PREF_IMAGE_SIZE: case SNAPSHOT_SET_IMAGE_SIZE: + snapshot_deprecated_ioctl(cmd); + case SNAPSHOT_PREF_IMAGE_SIZE: image_size = arg; break; @@ -290,15 +301,17 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, error = put_user(size, (loff_t __user *)arg); break; - case SNAPSHOT_AVAIL_SWAP_SIZE: case SNAPSHOT_AVAIL_SWAP: + snapshot_deprecated_ioctl(cmd); + case SNAPSHOT_AVAIL_SWAP_SIZE: size = count_swap_pages(data->swap, 1); size <<= PAGE_SHIFT; error = put_user(size, (loff_t __user *)arg); break; - case SNAPSHOT_ALLOC_SWAP_PAGE: case SNAPSHOT_GET_SWAP_PAGE: + snapshot_deprecated_ioctl(cmd); + case SNAPSHOT_ALLOC_SWAP_PAGE: if (data->swap < 0 || data->swap >= MAX_SWAPFILES) { error = -ENODEV; break; @@ -321,6 +334,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, break; case SNAPSHOT_SET_SWAP_FILE: /* This ioctl is deprecated */ + snapshot_deprecated_ioctl(cmd); if (!swsusp_swap_in_use()) { /* * User space encodes device types as two-byte values, @@ -362,6 +376,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, break; case SNAPSHOT_PMOPS: /* This ioctl is deprecated */ + snapshot_deprecated_ioctl(cmd); error = -EINVAL; switch (arg) { diff --git a/kernel/resource.c b/kernel/resource.c index af96c1e4b54..24e9e60c145 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -188,6 +188,36 @@ static int __release_resource(struct resource *old) return -EINVAL; } +static void __release_child_resources(struct resource *r) +{ + struct resource *tmp, *p; + resource_size_t size; + + p = r->child; + r->child = NULL; + while (p) { + tmp = p; + p = p->sibling; + + tmp->parent = NULL; + tmp->sibling = NULL; + __release_child_resources(tmp); + + printk(KERN_DEBUG "release child resource %pR\n", tmp); + /* need to restore size, and keep flags */ + size = resource_size(tmp); + tmp->start = 0; + tmp->end = size - 1; + } +} + +void release_child_resources(struct resource *r) +{ + write_lock(&resource_lock); + __release_child_resources(r); + write_unlock(&resource_lock); +} + /** * request_resource - request and reserve an I/O or memory resource * @root: root resource descriptor @@ -303,8 +333,10 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, static int find_resource(struct resource *root, struct resource *new, resource_size_t size, resource_size_t min, resource_size_t max, resource_size_t align, - void (*alignf)(void *, struct resource *, - resource_size_t, resource_size_t), + resource_size_t (*alignf)(void *, + const struct resource *, + resource_size_t, + resource_size_t), void *alignf_data) { struct resource *this = root->child; @@ -330,7 +362,7 @@ static int find_resource(struct resource *root, struct resource *new, tmp.end = max; tmp.start = ALIGN(tmp.start, align); if (alignf) - alignf(alignf_data, &tmp, size, align); + tmp.start = alignf(alignf_data, &tmp, size, align); if (tmp.start < tmp.end && tmp.end - tmp.start >= size - 1) { new->start = tmp.start; new->end = tmp.start + size - 1; @@ -358,8 +390,10 @@ static int find_resource(struct resource *root, struct resource *new, int allocate_resource(struct resource *root, struct resource *new, resource_size_t size, resource_size_t min, resource_size_t max, resource_size_t align, - void (*alignf)(void *, struct resource *, - resource_size_t, resource_size_t), + resource_size_t (*alignf)(void *, + const struct resource *, + resource_size_t, + resource_size_t), void *alignf_data) { int err; diff --git a/kernel/smp.c b/kernel/smp.c index f1040842244..9867b6bfefc 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -12,8 +12,6 @@ #include <linux/smp.h> #include <linux/cpu.h> -static DEFINE_PER_CPU(struct call_single_queue, call_single_queue); - static struct { struct list_head queue; raw_spinlock_t lock; @@ -33,12 +31,14 @@ struct call_function_data { cpumask_var_t cpumask; }; +static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data); + struct call_single_queue { struct list_head list; raw_spinlock_t lock; }; -static DEFINE_PER_CPU(struct call_function_data, cfd_data); +static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_queue, call_single_queue); static int hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) @@ -256,7 +256,7 @@ void generic_smp_call_function_single_interrupt(void) } } -static DEFINE_PER_CPU(struct call_single_data, csd_data); +static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data); /* * smp_call_function_single - Run a function on a specific CPU diff --git a/kernel/softirq.c b/kernel/softirq.c index a09502e2ef7..7c1a67ef027 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -500,22 +500,17 @@ EXPORT_SYMBOL(tasklet_kill); */ /* - * The trampoline is called when the hrtimer expires. If this is - * called from the hrtimer interrupt then we schedule the tasklet as - * the timer callback function expects to run in softirq context. If - * it's called in softirq context anyway (i.e. high resolution timers - * disabled) then the hrtimer callback is called right away. + * The trampoline is called when the hrtimer expires. It schedules a tasklet + * to run __tasklet_hrtimer_trampoline() which in turn will call the intended + * hrtimer callback, but from softirq context. */ static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer) { struct tasklet_hrtimer *ttimer = container_of(timer, struct tasklet_hrtimer, timer); - if (hrtimer_is_hres_active(timer)) { - tasklet_hi_schedule(&ttimer->tasklet); - return HRTIMER_NORESTART; - } - return ttimer->function(timer); + tasklet_hi_schedule(&ttimer->tasklet); + return HRTIMER_NORESTART; } /* diff --git a/kernel/softlockup.c b/kernel/softlockup.c index d22579087e2..0d4c7898ab8 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -25,6 +25,7 @@ static DEFINE_SPINLOCK(print_lock); static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */ static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */ static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); +static DEFINE_PER_CPU(bool, softlock_touch_sync); static int __read_mostly did_panic; int __read_mostly softlockup_thresh = 60; @@ -79,6 +80,12 @@ void touch_softlockup_watchdog(void) } EXPORT_SYMBOL(touch_softlockup_watchdog); +void touch_softlockup_watchdog_sync(void) +{ + __raw_get_cpu_var(softlock_touch_sync) = true; + __raw_get_cpu_var(softlockup_touch_ts) = 0; +} + void touch_all_softlockup_watchdogs(void) { int cpu; @@ -118,6 +125,14 @@ void softlockup_tick(void) } if (touch_ts == 0) { + if (unlikely(per_cpu(softlock_touch_sync, this_cpu))) { + /* + * If the time stamp was touched atomically + * make sure the scheduler tick is up to date. + */ + per_cpu(softlock_touch_sync, this_cpu) = false; + sched_clock_tick(); + } __touch_softlockup_watchdog(); return; } diff --git a/kernel/sys.c b/kernel/sys.c index 26a6b73a6b8..18bde979f34 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -222,6 +222,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who) if (which > PRIO_USER || which < PRIO_PROCESS) return -EINVAL; + rcu_read_lock(); read_lock(&tasklist_lock); switch (which) { case PRIO_PROCESS: @@ -267,6 +268,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who) } out_unlock: read_unlock(&tasklist_lock); + rcu_read_unlock(); return retval; } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 7faaa32fbf4..e2ab064c6d4 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -880,6 +880,7 @@ void getboottime(struct timespec *ts) set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec); } +EXPORT_SYMBOL_GPL(getboottime); /** * monotonic_to_bootbased - Convert the monotonic time to boot based. @@ -889,6 +890,7 @@ void monotonic_to_bootbased(struct timespec *ts) { *ts = timespec_add_safe(*ts, total_sleep_time); } +EXPORT_SYMBOL_GPL(monotonic_to_bootbased); unsigned long get_seconds(void) { |