diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/capability.c | 12 | ||||
-rw-r--r-- | kernel/cred.c | 12 | ||||
-rw-r--r-- | kernel/freezer.c | 4 | ||||
-rw-r--r-- | kernel/hung_task.c | 2 | ||||
-rw-r--r-- | kernel/irq/Kconfig | 4 | ||||
-rw-r--r-- | kernel/irq/Makefile | 1 | ||||
-rw-r--r-- | kernel/irq/chip.c | 3 | ||||
-rw-r--r-- | kernel/irq/debug.h | 1 | ||||
-rw-r--r-- | kernel/irq/generic-chip.c | 354 | ||||
-rw-r--r-- | kernel/irq/irqdesc.c | 22 | ||||
-rw-r--r-- | kernel/irq/manage.c | 3 | ||||
-rw-r--r-- | kernel/irq/settings.h | 17 | ||||
-rw-r--r-- | kernel/kexec.c | 9 | ||||
-rw-r--r-- | kernel/kmod.c | 16 | ||||
-rw-r--r-- | kernel/lockdep.c | 206 | ||||
-rw-r--r-- | kernel/module.c | 105 | ||||
-rw-r--r-- | kernel/params.c | 23 | ||||
-rw-r--r-- | kernel/power/Kconfig | 10 | ||||
-rw-r--r-- | kernel/power/hibernate.c | 58 | ||||
-rw-r--r-- | kernel/power/main.c | 1 | ||||
-rw-r--r-- | kernel/power/power.h | 4 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 33 | ||||
-rw-r--r-- | kernel/power/suspend.c | 14 | ||||
-rw-r--r-- | kernel/power/user.c | 5 | ||||
-rw-r--r-- | kernel/sys.c | 3 | ||||
-rw-r--r-- | kernel/time/clocksource.c | 4 | ||||
-rw-r--r-- | kernel/time/tick-broadcast.c | 12 |
27 files changed, 793 insertions, 145 deletions
diff --git a/kernel/capability.c b/kernel/capability.c index bf0c734d0c1..32a80e08ff4 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -399,3 +399,15 @@ bool task_ns_capable(struct task_struct *t, int cap) return ns_capable(task_cred_xxx(t, user)->user_ns, cap); } EXPORT_SYMBOL(task_ns_capable); + +/** + * nsown_capable - Check superior capability to one's own user_ns + * @cap: The capability in question + * + * Return true if the current task has the given superior capability + * targeted at its own user namespace. + */ +bool nsown_capable(int cap) +{ + return ns_capable(current_user_ns(), cap); +} diff --git a/kernel/cred.c b/kernel/cred.c index 5557b55048d..8093c16b84b 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -54,6 +54,7 @@ struct cred init_cred = { .cap_effective = CAP_INIT_EFF_SET, .cap_bset = CAP_INIT_BSET, .user = INIT_USER, + .user_ns = &init_user_ns, .group_info = &init_groups, #ifdef CONFIG_KEYS .tgcred = &init_tgcred, @@ -410,6 +411,11 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) goto error_put; } + /* cache user_ns in cred. Doesn't need a refcount because it will + * stay pinned by cred->user + */ + new->user_ns = new->user->user_ns; + #ifdef CONFIG_KEYS /* new threads get their own thread keyrings if their parent already * had one */ @@ -741,12 +747,6 @@ int set_create_files_as(struct cred *new, struct inode *inode) } EXPORT_SYMBOL(set_create_files_as); -struct user_namespace *current_user_ns(void) -{ - return _current_user_ns(); -} -EXPORT_SYMBOL(current_user_ns); - #ifdef CONFIG_DEBUG_CREDENTIALS bool creds_are_invalid(const struct cred *cred) diff --git a/kernel/freezer.c b/kernel/freezer.c index 66ecd2ead21..7b01de98bb6 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -17,7 +17,7 @@ static inline void frozen_process(void) { if (!unlikely(current->flags & PF_NOFREEZE)) { current->flags |= PF_FROZEN; - wmb(); + smp_wmb(); } clear_freeze_flag(current); } @@ -93,7 +93,7 @@ bool freeze_task(struct task_struct *p, bool sig_only) * the task as frozen and next clears its TIF_FREEZE. */ if (!freezing(p)) { - rmb(); + smp_rmb(); if (frozen(p)) return false; diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 53ead174da2..ea640120ab8 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -33,7 +33,7 @@ unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; /* * Zero means infinite timeout - no checking done: */ -unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120; +unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT; unsigned long __read_mostly sysctl_hung_task_warnings = 10; diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index c574f9a12c4..d1d051b38e0 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -48,6 +48,10 @@ config IRQ_PREFLOW_FASTEOI config IRQ_EDGE_EOI_HANDLER bool +# Generic configurable interrupt chip implementation +config GENERIC_IRQ_CHIP + bool + # Support forced irq threading config IRQ_FORCED_THREADING bool diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index 54329cd7b3e..73290056cfb 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile @@ -1,5 +1,6 @@ obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o +obj-$(CONFIG_GENERIC_IRQ_CHIP) += generic-chip.o obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 4af1e2b244c..d5a3009da71 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -310,6 +310,7 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc) out_unlock: raw_spin_unlock(&desc->lock); } +EXPORT_SYMBOL_GPL(handle_simple_irq); /** * handle_level_irq - Level type irq handler @@ -573,6 +574,7 @@ __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, if (handle != handle_bad_irq && is_chained) { irq_settings_set_noprobe(desc); irq_settings_set_norequest(desc); + irq_settings_set_nothread(desc); irq_startup(desc); } out: @@ -612,6 +614,7 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) irq_put_desc_unlock(desc, flags); } +EXPORT_SYMBOL_GPL(irq_modify_status); /** * irq_cpu_online - Invoke all irq_cpu_online functions. diff --git a/kernel/irq/debug.h b/kernel/irq/debug.h index 306cba37e9a..97a8bfadc88 100644 --- a/kernel/irq/debug.h +++ b/kernel/irq/debug.h @@ -27,6 +27,7 @@ static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc) P(IRQ_PER_CPU); P(IRQ_NOPROBE); P(IRQ_NOREQUEST); + P(IRQ_NOTHREAD); P(IRQ_NOAUTOEN); PS(IRQS_AUTODETECT); diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c new file mode 100644 index 00000000000..31a9db71190 --- /dev/null +++ b/kernel/irq/generic-chip.c @@ -0,0 +1,354 @@ +/* + * Library implementing the most common irq chip callback functions + * + * Copyright (C) 2011, Thomas Gleixner + */ +#include <linux/io.h> +#include <linux/irq.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/kernel_stat.h> +#include <linux/syscore_ops.h> + +#include "internals.h" + +static LIST_HEAD(gc_list); +static DEFINE_RAW_SPINLOCK(gc_lock); + +static inline struct irq_chip_regs *cur_regs(struct irq_data *d) +{ + return &container_of(d->chip, struct irq_chip_type, chip)->regs; +} + +/** + * irq_gc_noop - NOOP function + * @d: irq_data + */ +void irq_gc_noop(struct irq_data *d) +{ +} + +/** + * irq_gc_mask_disable_reg - Mask chip via disable register + * @d: irq_data + * + * Chip has separate enable/disable registers instead of a single mask + * register. + */ +void irq_gc_mask_disable_reg(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + u32 mask = 1 << (d->irq - gc->irq_base); + + irq_gc_lock(gc); + irq_reg_writel(mask, gc->reg_base + cur_regs(d)->disable); + gc->mask_cache &= ~mask; + irq_gc_unlock(gc); +} + +/** + * irq_gc_mask_set_mask_bit - Mask chip via setting bit in mask register + * @d: irq_data + * + * Chip has a single mask register. Values of this register are cached + * and protected by gc->lock + */ +void irq_gc_mask_set_bit(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + u32 mask = 1 << (d->irq - gc->irq_base); + + irq_gc_lock(gc); + gc->mask_cache |= mask; + irq_reg_writel(gc->mask_cache, gc->reg_base + cur_regs(d)->mask); + irq_gc_unlock(gc); +} + +/** + * irq_gc_mask_set_mask_bit - Mask chip via clearing bit in mask register + * @d: irq_data + * + * Chip has a single mask register. Values of this register are cached + * and protected by gc->lock + */ +void irq_gc_mask_clr_bit(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + u32 mask = 1 << (d->irq - gc->irq_base); + + irq_gc_lock(gc); + gc->mask_cache &= ~mask; + irq_reg_writel(gc->mask_cache, gc->reg_base + cur_regs(d)->mask); + irq_gc_unlock(gc); +} + +/** + * irq_gc_unmask_enable_reg - Unmask chip via enable register + * @d: irq_data + * + * Chip has separate enable/disable registers instead of a single mask + * register. + */ +void irq_gc_unmask_enable_reg(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + u32 mask = 1 << (d->irq - gc->irq_base); + + irq_gc_lock(gc); + irq_reg_writel(mask, gc->reg_base + cur_regs(d)->enable); + gc->mask_cache |= mask; + irq_gc_unlock(gc); +} + +/** + * irq_gc_ack - Ack pending interrupt + * @d: irq_data + */ +void irq_gc_ack(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + u32 mask = 1 << (d->irq - gc->irq_base); + + irq_gc_lock(gc); + irq_reg_writel(mask, gc->reg_base + cur_regs(d)->ack); + irq_gc_unlock(gc); +} + +/** + * irq_gc_mask_disable_reg_and_ack- Mask and ack pending interrupt + * @d: irq_data + */ +void irq_gc_mask_disable_reg_and_ack(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + u32 mask = 1 << (d->irq - gc->irq_base); + + irq_gc_lock(gc); + irq_reg_writel(mask, gc->reg_base + cur_regs(d)->mask); + irq_reg_writel(mask, gc->reg_base + cur_regs(d)->ack); + irq_gc_unlock(gc); +} + +/** + * irq_gc_eoi - EOI interrupt + * @d: irq_data + */ +void irq_gc_eoi(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + u32 mask = 1 << (d->irq - gc->irq_base); + + irq_gc_lock(gc); + irq_reg_writel(mask, gc->reg_base + cur_regs(d)->eoi); + irq_gc_unlock(gc); +} + +/** + * irq_gc_set_wake - Set/clr wake bit for an interrupt + * @d: irq_data + * + * For chips where the wake from suspend functionality is not + * configured in a separate register and the wakeup active state is + * just stored in a bitmask. + */ +int irq_gc_set_wake(struct irq_data *d, unsigned int on) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + u32 mask = 1 << (d->irq - gc->irq_base); + + if (!(mask & gc->wake_enabled)) + return -EINVAL; + + irq_gc_lock(gc); + if (on) + gc->wake_active |= mask; + else + gc->wake_active &= ~mask; + irq_gc_unlock(gc); + return 0; +} + +/** + * irq_alloc_generic_chip - Allocate a generic chip and initialize it + * @name: Name of the irq chip + * @num_ct: Number of irq_chip_type instances associated with this + * @irq_base: Interrupt base nr for this chip + * @reg_base: Register base address (virtual) + * @handler: Default flow handler associated with this chip + * + * Returns an initialized irq_chip_generic structure. The chip defaults + * to the primary (index 0) irq_chip_type and @handler + */ +struct irq_chip_generic * +irq_alloc_generic_chip(const char *name, int num_ct, unsigned int irq_base, + void __iomem *reg_base, irq_flow_handler_t handler) +{ + struct irq_chip_generic *gc; + unsigned long sz = sizeof(*gc) + num_ct * sizeof(struct irq_chip_type); + + gc = kzalloc(sz, GFP_KERNEL); + if (gc) { + raw_spin_lock_init(&gc->lock); + gc->num_ct = num_ct; + gc->irq_base = irq_base; + gc->reg_base = reg_base; + gc->chip_types->chip.name = name; + gc->chip_types->handler = handler; + } + return gc; +} + +/* + * Separate lockdep class for interrupt chip which can nest irq_desc + * lock. + */ +static struct lock_class_key irq_nested_lock_class; + +/** + * irq_setup_generic_chip - Setup a range of interrupts with a generic chip + * @gc: Generic irq chip holding all data + * @msk: Bitmask holding the irqs to initialize relative to gc->irq_base + * @flags: Flags for initialization + * @clr: IRQ_* bits to clear + * @set: IRQ_* bits to set + * + * Set up max. 32 interrupts starting from gc->irq_base. Note, this + * initializes all interrupts to the primary irq_chip_type and its + * associated handler. + */ +void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk, + enum irq_gc_flags flags, unsigned int clr, + unsigned int set) +{ + struct irq_chip_type *ct = gc->chip_types; + unsigned int i; + + raw_spin_lock(&gc_lock); + list_add_tail(&gc->list, &gc_list); + raw_spin_unlock(&gc_lock); + + /* Init mask cache ? */ + if (flags & IRQ_GC_INIT_MASK_CACHE) + gc->mask_cache = irq_reg_readl(gc->reg_base + ct->regs.mask); + + for (i = gc->irq_base; msk; msk >>= 1, i++) { + if (!msk & 0x01) + continue; + + if (flags & IRQ_GC_INIT_NESTED_LOCK) + irq_set_lockdep_class(i, &irq_nested_lock_class); + + irq_set_chip_and_handler(i, &ct->chip, ct->handler); + irq_set_chip_data(i, gc); + irq_modify_status(i, clr, set); + } + gc->irq_cnt = i - gc->irq_base; +} + +/** + * irq_setup_alt_chip - Switch to alternative chip + * @d: irq_data for this interrupt + * @type Flow type to be initialized + * + * Only to be called from chip->irq_set_type() callbacks. + */ +int irq_setup_alt_chip(struct irq_data *d, unsigned int type) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct irq_chip_type *ct = gc->chip_types; + unsigned int i; + + for (i = 0; i < gc->num_ct; i++, ct++) { + if (ct->type & type) { + d->chip = &ct->chip; + irq_data_to_desc(d)->handle_irq = ct->handler; + return 0; + } + } + return -EINVAL; +} + +/** + * irq_remove_generic_chip - Remove a chip + * @gc: Generic irq chip holding all data + * @msk: Bitmask holding the irqs to initialize relative to gc->irq_base + * @clr: IRQ_* bits to clear + * @set: IRQ_* bits to set + * + * Remove up to 32 interrupts starting from gc->irq_base. + */ +void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk, + unsigned int clr, unsigned int set) +{ + unsigned int i = gc->irq_base; + + raw_spin_lock(&gc_lock); + list_del(&gc->list); + raw_spin_unlock(&gc_lock); + + for (; msk; msk >>= 1, i++) { + if (!msk & 0x01) + continue; + + /* Remove handler first. That will mask the irq line */ + irq_set_handler(i, NULL); + irq_set_chip(i, &no_irq_chip); + irq_set_chip_data(i, NULL); + irq_modify_status(i, clr, set); + } +} + +#ifdef CONFIG_PM +static int irq_gc_suspend(void) +{ + struct irq_chip_generic *gc; + + list_for_each_entry(gc, &gc_list, list) { + struct irq_chip_type *ct = gc->chip_types; + + if (ct->chip.irq_suspend) + ct->chip.irq_suspend(irq_get_irq_data(gc->irq_base)); + } + return 0; +} + +static void irq_gc_resume(void) +{ + struct irq_chip_generic *gc; + + list_for_each_entry(gc, &gc_list, list) { + struct irq_chip_type *ct = gc->chip_types; + + if (ct->chip.irq_resume) + ct->chip.irq_resume(irq_get_irq_data(gc->irq_base)); + } +} +#else +#define irq_gc_suspend NULL +#define irq_gc_resume NULL +#endif + +static void irq_gc_shutdown(void) +{ + struct irq_chip_generic *gc; + + list_for_each_entry(gc, &gc_list, list) { + struct irq_chip_type *ct = gc->chip_types; + + if (ct->chip.irq_pm_shutdown) + ct->chip.irq_pm_shutdown(irq_get_irq_data(gc->irq_base)); + } +} + +static struct syscore_ops irq_gc_syscore_ops = { + .suspend = irq_gc_suspend, + .resume = irq_gc_resume, + .shutdown = irq_gc_shutdown, +}; + +static int __init irq_gc_init_ops(void) +{ + register_syscore_ops(&irq_gc_syscore_ops); + return 0; +} +device_initcall(irq_gc_init_ops); diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 2c039c9b938..886e80347b3 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -22,7 +22,7 @@ */ static struct lock_class_key irq_desc_lock_class; -#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS) +#if defined(CONFIG_SMP) static void __init init_irq_default_affinity(void) { alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT); @@ -290,6 +290,22 @@ static int irq_expand_nr_irqs(unsigned int nr) #endif /* !CONFIG_SPARSE_IRQ */ +/** + * generic_handle_irq - Invoke the handler for a particular irq + * @irq: The irq number to handle + * + */ +int generic_handle_irq(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (!desc) + return -EINVAL; + generic_handle_irq_desc(irq, desc); + return 0; +} +EXPORT_SYMBOL_GPL(generic_handle_irq); + /* Dynamic interrupt handling */ /** @@ -311,6 +327,7 @@ void irq_free_descs(unsigned int from, unsigned int cnt) bitmap_clear(allocated_irqs, from, cnt); mutex_unlock(&sparse_irq_lock); } +EXPORT_SYMBOL_GPL(irq_free_descs); /** * irq_alloc_descs - allocate and initialize a range of irq descriptors @@ -351,6 +368,7 @@ err: mutex_unlock(&sparse_irq_lock); return ret; } +EXPORT_SYMBOL_GPL(irq_alloc_descs); /** * irq_reserve_irqs - mark irqs allocated @@ -430,7 +448,6 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) *per_cpu_ptr(desc->kstat_irqs, cpu) : 0; } -#ifdef CONFIG_GENERIC_HARDIRQS unsigned int kstat_irqs(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); @@ -443,4 +460,3 @@ unsigned int kstat_irqs(unsigned int irq) sum += *per_cpu_ptr(desc->kstat_irqs, cpu); return sum; } -#endif /* CONFIG_GENERIC_HARDIRQS */ diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 07c1611f389..f7ce0021e1c 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -900,7 +900,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) */ new->handler = irq_nested_primary_handler; } else { - irq_setup_forced_threading(new); + if (irq_settings_can_thread(desc)) + irq_setup_forced_threading(new); } /* diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h index 0d91730b633..f1667833d44 100644 --- a/kernel/irq/settings.h +++ b/kernel/irq/settings.h @@ -8,6 +8,7 @@ enum { _IRQ_LEVEL = IRQ_LEVEL, _IRQ_NOPROBE = IRQ_NOPROBE, _IRQ_NOREQUEST = IRQ_NOREQUEST, + _IRQ_NOTHREAD = IRQ_NOTHREAD, _IRQ_NOAUTOEN = IRQ_NOAUTOEN, _IRQ_MOVE_PCNTXT = IRQ_MOVE_PCNTXT, _IRQ_NO_BALANCING = IRQ_NO_BALANCING, @@ -20,6 +21,7 @@ enum { #define IRQ_LEVEL GOT_YOU_MORON #define IRQ_NOPROBE GOT_YOU_MORON #define IRQ_NOREQUEST GOT_YOU_MORON +#define IRQ_NOTHREAD GOT_YOU_MORON #define IRQ_NOAUTOEN GOT_YOU_MORON #define IRQ_NESTED_THREAD GOT_YOU_MORON #undef IRQF_MODIFY_MASK @@ -94,6 +96,21 @@ static inline void irq_settings_set_norequest(struct irq_desc *desc) desc->status_use_accessors |= _IRQ_NOREQUEST; } +static inline bool irq_settings_can_thread(struct irq_desc *desc) +{ + return !(desc->status_use_accessors & _IRQ_NOTHREAD); +} + +static inline void irq_settings_clr_nothread(struct irq_desc *desc) +{ + desc->status_use_accessors &= ~_IRQ_NOTHREAD; +} + +static inline void irq_settings_set_nothread(struct irq_desc *desc) +{ + desc->status_use_accessors |= _IRQ_NOTHREAD; +} + static inline bool irq_settings_can_probe(struct irq_desc *desc) { return !(desc->status_use_accessors & _IRQ_NOPROBE); diff --git a/kernel/kexec.c b/kernel/kexec.c index 87b77de03dd..8d814cbc810 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1531,13 +1531,7 @@ int kernel_kexec(void) if (error) goto Enable_cpus; local_irq_disable(); - /* Suspend system devices */ - error = sysdev_suspend(PMSG_FREEZE); - if (!error) { - error = syscore_suspend(); - if (error) - sysdev_resume(); - } + error = syscore_suspend(); if (error) goto Enable_irqs; } else @@ -1553,7 +1547,6 @@ int kernel_kexec(void) #ifdef CONFIG_KEXEC_JUMP if (kexec_image->preserve_context) { syscore_resume(); - sysdev_resume(); Enable_irqs: local_irq_enable(); Enable_cpus: diff --git a/kernel/kmod.c b/kernel/kmod.c index 9cd0591c96a..5ae0ff38425 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -245,7 +245,6 @@ static void __call_usermodehelper(struct work_struct *work) } } -#ifdef CONFIG_PM_SLEEP /* * If set, call_usermodehelper_exec() will exit immediately returning -EBUSY * (used for preventing user land processes from being created after the user @@ -301,6 +300,15 @@ void usermodehelper_enable(void) usermodehelper_disabled = 0; } +/** + * usermodehelper_is_disabled - check if new helpers are allowed to be started + */ +bool usermodehelper_is_disabled(void) +{ + return usermodehelper_disabled; +} +EXPORT_SYMBOL_GPL(usermodehelper_is_disabled); + static void helper_lock(void) { atomic_inc(&running_helpers); @@ -312,12 +320,6 @@ static void helper_unlock(void) if (atomic_dec_and_test(&running_helpers)) wake_up(&running_helpers_waitq); } -#else /* CONFIG_PM_SLEEP */ -#define usermodehelper_disabled 0 - -static inline void helper_lock(void) {} -static inline void helper_unlock(void) {} -#endif /* CONFIG_PM_SLEEP */ /** * call_usermodehelper_setup - prepare to call a usermode helper diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 53a68956f13..63437d065ac 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -490,6 +490,18 @@ void get_usage_chars(struct lock_class *class, char usage[LOCK_USAGE_CHARS]) usage[i] = '\0'; } +static int __print_lock_name(struct lock_class *class) +{ + char str[KSYM_NAME_LEN]; + const char *name; + + name = class->name; + if (!name) + name = __get_key_name(class->key, str); + + return printk("%s", name); +} + static void print_lock_name(struct lock_class *class) { char str[KSYM_NAME_LEN], usage[LOCK_USAGE_CHARS]; @@ -1053,6 +1065,56 @@ print_circular_bug_entry(struct lock_list *target, int depth) return 0; } +static void +print_circular_lock_scenario(struct held_lock *src, + struct held_lock *tgt, + struct lock_list *prt) +{ + struct lock_class *source = hlock_class(src); + struct lock_class *target = hlock_class(tgt); + struct lock_class *parent = prt->class; + + /* + * A direct locking problem where unsafe_class lock is taken + * directly by safe_class lock, then all we need to show + * is the deadlock scenario, as it is obvious that the + * unsafe lock is taken under the safe lock. + * + * But if there is a chain instead, where the safe lock takes + * an intermediate lock (middle_class) where this lock is + * not the same as the safe lock, then the lock chain is + * used to describe the problem. Otherwise we would need + * to show a different CPU case for each link in the chain + * from the safe_class lock to the unsafe_class lock. + */ + if (parent != source) { + printk("Chain exists of:\n "); + __print_lock_name(source); + printk(" --> "); + __print_lock_name(parent); + printk(" --> "); + __print_lock_name(target); + printk("\n\n"); + } + + printk(" Possible unsafe locking scenario:\n\n"); + printk(" CPU0 CPU1\n"); + printk(" ---- ----\n"); + printk(" lock("); + __print_lock_name(target); + printk(");\n"); + printk(" lock("); + __print_lock_name(parent); + printk(");\n"); + printk(" lock("); + __print_lock_name(target); + printk(");\n"); + printk(" lock("); + __print_lock_name(source); + printk(");\n"); + printk("\n *** DEADLOCK ***\n\n"); +} + /* * When a circular dependency is detected, print the * header first: @@ -1096,6 +1158,7 @@ static noinline int print_circular_bug(struct lock_list *this, { struct task_struct *curr = current; struct lock_list *parent; + struct lock_list *first_parent; int depth; if (!debug_locks_off_graph_unlock() || debug_locks_silent) @@ -1109,6 +1172,7 @@ static noinline int print_circular_bug(struct lock_list *this, print_circular_bug_header(target, depth, check_src, check_tgt); parent = get_lock_parent(target); + first_parent = parent; while (parent) { print_circular_bug_entry(parent, --depth); @@ -1116,6 +1180,9 @@ static noinline int print_circular_bug(struct lock_list *this, } printk("\nother info that might help us debug this:\n\n"); + print_circular_lock_scenario(check_src, check_tgt, + first_parent); + lockdep_print_held_locks(curr); printk("\nstack backtrace:\n"); @@ -1314,7 +1381,7 @@ print_shortest_lock_dependencies(struct lock_list *leaf, printk("\n"); if (depth == 0 && (entry != root)) { - printk("lockdep:%s bad BFS generated tree\n", __func__); + printk("lockdep:%s bad path found in chain graph\n", __func__); break; } @@ -1325,6 +1392,62 @@ print_shortest_lock_dependencies(struct lock_list *leaf, return; } +static void +print_irq_lock_scenario(struct lock_list *safe_entry, + struct lock_list *unsafe_entry, + struct lock_class *prev_class, + struct lock_class *next_class) +{ + struct lock_class *safe_class = safe_entry->class; + struct lock_class *unsafe_class = unsafe_entry->class; + struct lock_class *middle_class = prev_class; + + if (middle_class == safe_class) + middle_class = next_class; + + /* + * A direct locking problem where unsafe_class lock is taken + * directly by safe_class lock, then all we need to show + * is the deadlock scenario, as it is obvious that the + * unsafe lock is taken under the safe lock. + * + * But if there is a chain instead, where the safe lock takes + * an intermediate lock (middle_class) where this lock is + * not the same as the safe lock, then the lock chain is + * used to describe the problem. Otherwise we would need + * to show a different CPU case for each link in the chain + * from the safe_class lock to the unsafe_class lock. + */ + if (middle_class != unsafe_class) { + printk("Chain exists of:\n "); + __print_lock_name(safe_class); + printk(" --> "); + __print_lock_name(middle_class); + printk(" --> "); + __print_lock_name(unsafe_class); + printk("\n\n"); + } + + printk(" Possible interrupt unsafe locking scenario:\n\n"); + printk(" CPU0 CPU1\n"); + printk(" ---- ----\n"); + printk(" lock("); + __print_lock_name(unsafe_class); + printk(");\n"); + printk(" local_irq_disable();\n"); + printk(" lock("); + __print_lock_name(safe_class); + printk(");\n"); + printk(" lock("); + __print_lock_name(middle_class); + printk(");\n"); + printk(" <Interrupt>\n"); + printk(" lock("); + __print_lock_name(safe_class); + printk(");\n"); + printk("\n *** DEADLOCK ***\n\n"); +} + static int print_bad_irq_dependency(struct task_struct *curr, struct lock_list *prev_root, @@ -1376,6 +1499,9 @@ print_bad_irq_dependency(struct task_struct *curr, print_stack_trace(forwards_entry->class->usage_traces + bit2, 1); printk("\nother info that might help us debug this:\n\n"); + print_irq_lock_scenario(backwards_entry, forwards_entry, + hlock_class(prev), hlock_class(next)); + lockdep_print_held_locks(curr); printk("\nthe dependencies between %s-irq-safe lock", irqclass); @@ -1539,6 +1665,26 @@ static inline void inc_chains(void) #endif +static void +print_deadlock_scenario(struct held_lock *nxt, + struct held_lock *prv) +{ + struct lock_class *next = hlock_class(nxt); + struct lock_class *prev = hlock_class(prv); + + printk(" Possible unsafe locking scenario:\n\n"); + printk(" CPU0\n"); + printk(" ----\n"); + printk(" lock("); + __print_lock_name(prev); + printk(");\n"); + printk(" lock("); + __print_lock_name(next); + printk(");\n"); + printk("\n *** DEADLOCK ***\n\n"); + printk(" May be due to missing lock nesting notation\n\n"); +} + static int print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, struct held_lock *next) @@ -1557,6 +1703,7 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, print_lock(prev); printk("\nother info that might help us debug this:\n"); + print_deadlock_scenario(next, prev); lockdep_print_held_locks(curr); printk("\nstack backtrace:\n"); @@ -1826,7 +1973,7 @@ static inline int lookup_chain_cache(struct task_struct *curr, struct list_head *hash_head = chainhashentry(chain_key); struct lock_chain *chain; struct held_lock *hlock_curr, *hlock_next; - int i, j, n, cn; + int i, j; if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) return 0; @@ -1886,15 +2033,9 @@ cache_hit: } i++; chain->depth = curr->lockdep_depth + 1 - i; - cn = nr_chain_hlocks; - while (cn + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS) { - n = cmpxchg(&nr_chain_hlocks, cn, cn + chain->depth); - if (n == cn) - break; - cn = n; - } - if (likely(cn + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) { - chain->base = cn; + if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) { + chain->base = nr_chain_hlocks; + nr_chain_hlocks += chain->depth; for (j = 0; j < chain->depth - 1; j++, i++) { int lock_id = curr->held_locks[i].class_idx - 1; chain_hlocks[chain->base + j] = lock_id; @@ -2011,6 +2152,24 @@ static void check_chain_key(struct task_struct *curr) #endif } +static void +print_usage_bug_scenario(struct held_lock *lock) +{ + struct lock_class *class = hlock_class(lock); + + printk(" Possible unsafe locking scenario:\n\n"); + printk(" CPU0\n"); + printk(" ----\n"); + printk(" lock("); + __print_lock_name(class); + printk(");\n"); + printk(" <Interrupt>\n"); + printk(" lock("); + __print_lock_name(class); + printk(");\n"); + printk("\n *** DEADLOCK ***\n\n"); +} + static int print_usage_bug(struct task_struct *curr, struct held_lock *this, enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit) @@ -2039,6 +2198,8 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this, print_irqtrace_events(curr); printk("\nother info that might help us debug this:\n"); + print_usage_bug_scenario(this); + lockdep_print_held_locks(curr); printk("\nstack backtrace:\n"); @@ -2073,6 +2234,10 @@ print_irq_inversion_bug(struct task_struct *curr, struct held_lock *this, int forwards, const char *irqclass) { + struct lock_list *entry = other; + struct lock_list *middle = NULL; + int depth; + if (!debug_locks_off_graph_unlock() || debug_locks_silent) return 0; @@ -2091,6 +2256,25 @@ print_irq_inversion_bug(struct task_struct *curr, printk("\n\nand interrupts could create inverse lock ordering between them.\n\n"); printk("\nother info that might help us debug this:\n"); + + /* Find a middle lock (if one exists) */ + depth = get_lock_depth(other); + do { + if (depth == 0 && (entry != root)) { + printk("lockdep:%s bad path found in chain graph\n", __func__); + break; + } + middle = entry; + entry = get_lock_parent(entry); + depth--; + } while (entry && entry != root && (depth >= 0)); + if (forwards) + print_irq_lock_scenario(root, other, + middle ? middle->class : root->class, other->class); + else + print_irq_lock_scenario(other, root, + middle ? middle->class : other->class, root->class); + lockdep_print_held_locks(curr); printk("\nthe shortest dependencies between 2nd lock and 1st lock:\n"); diff --git a/kernel/module.c b/kernel/module.c index d5938a5c19c..22879725678 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -57,6 +57,7 @@ #include <linux/kmemleak.h> #include <linux/jump_label.h> #include <linux/pfn.h> +#include <linux/bsearch.h> #define CREATE_TRACE_POINTS #include <trace/events/module.h> @@ -240,23 +241,24 @@ static bool each_symbol_in_section(const struct symsearch *arr, struct module *owner, bool (*fn)(const struct symsearch *syms, struct module *owner, - unsigned int symnum, void *data), + void *data), void *data) { - unsigned int i, j; + unsigned int j; for (j = 0; j < arrsize; j++) { - for (i = 0; i < arr[j].stop - arr[j].start; i++) - if (fn(&arr[j], owner, i, data)) - return true; + if (fn(&arr[j], owner, data)) + return true; } return false; } /* Returns true as soon as fn returns true, otherwise false. */ -bool each_symbol(bool (*fn)(const struct symsearch *arr, struct module *owner, - unsigned int symnum, void *data), void *data) +bool each_symbol_section(bool (*fn)(const struct symsearch *arr, + struct module *owner, + void *data), + void *data) { struct module *mod; static const struct symsearch arr[] = { @@ -309,7 +311,7 @@ bool each_symbol(bool (*fn)(const struct symsearch *arr, struct module *owner, } return false; } -EXPORT_SYMBOL_GPL(each_symbol); +EXPORT_SYMBOL_GPL(each_symbol_section); struct find_symbol_arg { /* Input */ @@ -323,15 +325,12 @@ struct find_symbol_arg { const struct kernel_symbol *sym; }; -static bool find_symbol_in_section(const struct symsearch *syms, - struct module *owner, - unsigned int symnum, void *data) +static bool check_symbol(const struct symsearch *syms, + struct module *owner, + unsigned int symnum, void *data) { struct find_symbol_arg *fsa = data; - if (strcmp(syms->start[symnum].name, fsa->name) != 0) - return false; - if (!fsa->gplok) { if (syms->licence == GPL_ONLY) return false; @@ -365,6 +364,30 @@ static bool find_symbol_in_section(const struct symsearch *syms, return true; } +static int cmp_name(const void *va, const void *vb) +{ + const char *a; + const struct kernel_symbol *b; + a = va; b = vb; + return strcmp(a, b->name); +} + +static bool find_symbol_in_section(const struct symsearch *syms, + struct module *owner, + void *data) +{ + struct find_symbol_arg *fsa = data; + struct kernel_symbol *sym; + + sym = bsearch(fsa->name, syms->start, syms->stop - syms->start, + sizeof(struct kernel_symbol), cmp_name); + + if (sym != NULL && check_symbol(syms, owner, sym - syms->start, data)) + return true; + + return false; +} + /* Find a symbol and return it, along with, (optional) crc and * (optional) module which owns it. Needs preempt disabled or module_mutex. */ const struct kernel_symbol *find_symbol(const char *name, @@ -379,7 +402,7 @@ const struct kernel_symbol *find_symbol(const char *name, fsa.gplok = gplok; fsa.warn = warn; - if (each_symbol(find_symbol_in_section, &fsa)) { + if (each_symbol_section(find_symbol_in_section, &fsa)) { if (owner) *owner = fsa.owner; if (crc) @@ -1607,27 +1630,28 @@ static void set_section_ro_nx(void *base, } } -/* Setting memory back to RW+NX before releasing it */ -void unset_section_ro_nx(struct module *mod, void *module_region) +static void unset_module_core_ro_nx(struct module *mod) { - unsigned long total_pages; - - if (mod->module_core == module_region) { - /* Set core as NX+RW */ - total_pages = MOD_NUMBER_OF_PAGES(mod->module_core, mod->core_size); - set_memory_nx((unsigned long)mod->module_core, total_pages); - set_memory_rw((unsigned long)mod->module_core, total_pages); + set_page_attributes(mod->module_core + mod->core_text_size, + mod->module_core + mod->core_size, + set_memory_x); + set_page_attributes(mod->module_core, + mod->module_core + mod->core_ro_size, + set_memory_rw); +} - } else if (mod->module_init == module_region) { - /* Set init as NX+RW */ - total_pages = MOD_NUMBER_OF_PAGES(mod->module_init, mod->init_size); - set_memory_nx((unsigned long)mod->module_init, total_pages); - set_memory_rw((unsigned long)mod->module_init, total_pages); - } +static void unset_module_init_ro_nx(struct module *mod) +{ + set_page_attributes(mod->module_init + mod->init_text_size, + mod->module_init + mod->init_size, + set_memory_x); + set_page_attributes(mod->module_init, + mod->module_init + mod->init_ro_size, + set_memory_rw); } /* Iterate through all modules and set each module's text as RW */ -void set_all_modules_text_rw() +void set_all_modules_text_rw(void) { struct module *mod; @@ -1648,7 +1672,7 @@ void set_all_modules_text_rw() } /* Iterate through all modules and set each module's text as RO */ -void set_all_modules_text_ro() +void set_all_modules_text_ro(void) { struct module *mod; @@ -1669,7 +1693,8 @@ void set_all_modules_text_ro() } #else static inline void set_section_ro_nx(void *base, unsigned long text_size, unsigned long ro_size, unsigned long total_size) { } -static inline void unset_section_ro_nx(struct module *mod, void *module_region) { } +static void unset_module_core_ro_nx(struct module *mod) { } +static void unset_module_init_ro_nx(struct module *mod) { } #endif /* Free a module, remove from lists, etc. */ @@ -1696,7 +1721,7 @@ static void free_module(struct module *mod) destroy_params(mod->kp, mod->num_kp); /* This may be NULL, but that's OK */ - unset_section_ro_nx(mod, mod->module_init); + unset_module_init_ro_nx(mod); module_free(mod, mod->module_init); kfree(mod->args); percpu_modfree(mod); @@ -1705,7 +1730,7 @@ static void free_module(struct module *mod) lockdep_free_key_range(mod->module_core, mod->core_size); /* Finally, free the core (containing the module structure) */ - unset_section_ro_nx(mod, mod->module_core); + unset_module_core_ro_nx(mod); module_free(mod, mod->module_core); #ifdef CONFIG_MPU @@ -2030,11 +2055,8 @@ static const struct kernel_symbol *lookup_symbol(const char *name, const struct kernel_symbol *start, const struct kernel_symbol *stop) { - const struct kernel_symbol *ks = start; - for (; ks < stop; ks++) - if (strcmp(ks->name, name) == 0) - return ks; - return NULL; + return bsearch(name, start, stop - start, + sizeof(struct kernel_symbol), cmp_name); } static int is_exported(const char *name, unsigned long value, @@ -2931,10 +2953,11 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, mod->symtab = mod->core_symtab; mod->strtab = mod->core_strtab; #endif - unset_section_ro_nx(mod, mod->module_init); + unset_module_init_ro_nx(mod); module_free(mod, mod->module_init); mod->module_init = NULL; mod->init_size = 0; + mod->init_ro_size = 0; mod->init_text_size = 0; mutex_unlock(&module_mutex); diff --git a/kernel/params.c b/kernel/params.c index 7ab388a48a2..ed72e133086 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -297,21 +297,15 @@ EXPORT_SYMBOL(param_ops_charp); int param_set_bool(const char *val, const struct kernel_param *kp) { bool v; + int ret; /* No equals means "set"... */ if (!val) val = "1"; /* One of =[yYnN01] */ - switch (val[0]) { - case 'y': case 'Y': case '1': - v = true; - break; - case 'n': case 'N': case '0': - v = false; - break; - default: - return -EINVAL; - } + ret = strtobool(val, &v); + if (ret) + return ret; if (kp->flags & KPARAM_ISBOOL) *(bool *)kp->arg = v; @@ -821,15 +815,18 @@ ssize_t __modver_version_show(struct module_attribute *mattr, return sprintf(buf, "%s\n", vattr->version); } -extern struct module_version_attribute __start___modver[], __stop___modver[]; +extern const struct module_version_attribute *__start___modver[]; +extern const struct module_version_attribute *__stop___modver[]; static void __init version_sysfs_builtin(void) { - const struct module_version_attribute *vattr; + const struct module_version_attribute **p; struct module_kobject *mk; int err; - for (vattr = __start___modver; vattr < __stop___modver; vattr++) { + for (p = __start___modver; p < __stop___modver; p++) { + const struct module_version_attribute *vattr = *p; + mk = locate_module_kobject(vattr->module_name); if (mk) { err = sysfs_create_file(&mk->kobj, &vattr->mattr.attr); diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 6de9a8fc341..87f4d24b55b 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -125,12 +125,6 @@ config PM_DEBUG code. This is helpful when debugging and reporting PM bugs, like suspend support. -config PM_VERBOSE - bool "Verbose Power Management debugging" - depends on PM_DEBUG - ---help--- - This option enables verbose messages from the Power Management code. - config PM_ADVANCED_DEBUG bool "Extra PM attributes in sysfs for low-level debugging/testing" depends on PM_DEBUG @@ -229,3 +223,7 @@ config PM_OPP representing individual voltage domains and provides SOC implementations a ready to use framework to manage OPPs. For more information, read <file:Documentation/power/opp.txt> + +config PM_RUNTIME_CLK + def_bool y + depends on PM_RUNTIME && HAVE_CLK diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 50aae660174..f9bec56d882 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -272,12 +272,7 @@ static int create_image(int platform_mode) local_irq_disable(); - error = sysdev_suspend(PMSG_FREEZE); - if (!error) { - error = syscore_suspend(); - if (error) - sysdev_resume(); - } + error = syscore_suspend(); if (error) { printk(KERN_ERR "PM: Some system devices failed to power down, " "aborting hibernation\n"); @@ -302,7 +297,6 @@ static int create_image(int platform_mode) Power_up: syscore_resume(); - sysdev_resume(); /* NOTE: dpm_resume_noirq() is just a resume() for devices * that suspended with irqs off ... no overall powerup. */ @@ -333,20 +327,25 @@ static int create_image(int platform_mode) int hibernation_snapshot(int platform_mode) { + pm_message_t msg = PMSG_RECOVER; int error; error = platform_begin(platform_mode); if (error) goto Close; + error = dpm_prepare(PMSG_FREEZE); + if (error) + goto Complete_devices; + /* Preallocate image memory before shutting down devices. */ error = hibernate_preallocate_memory(); if (error) - goto Close; + goto Complete_devices; suspend_console(); pm_restrict_gfp_mask(); - error = dpm_suspend_start(PMSG_FREEZE); + error = dpm_suspend(PMSG_FREEZE); if (error) goto Recover_platform; @@ -364,13 +363,17 @@ int hibernation_snapshot(int platform_mode) if (error || !in_suspend) swsusp_free(); - dpm_resume_end(in_suspend ? - (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); + msg = in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE; + dpm_resume(msg); if (error || !in_suspend) pm_restore_gfp_mask(); resume_console(); + + Complete_devices: + dpm_complete(msg); + Close: platform_end(platform_mode); return error; @@ -409,12 +412,7 @@ static int resume_target_kernel(bool platform_mode) local_irq_disable(); - error = sysdev_suspend(PMSG_QUIESCE); - if (!error) { - error = syscore_suspend(); - if (error) - sysdev_resume(); - } + error = syscore_suspend(); if (error) goto Enable_irqs; @@ -442,7 +440,6 @@ static int resume_target_kernel(bool platform_mode) touch_softlockup_watchdog(); syscore_resume(); - sysdev_resume(); Enable_irqs: local_irq_enable(); @@ -528,7 +525,6 @@ int hibernation_platform_enter(void) goto Platform_finish; local_irq_disable(); - sysdev_suspend(PMSG_HIBERNATE); syscore_suspend(); if (pm_wakeup_pending()) { error = -EAGAIN; @@ -541,7 +537,6 @@ int hibernation_platform_enter(void) Power_up: syscore_resume(); - sysdev_resume(); local_irq_enable(); enable_nonboot_cpus(); @@ -982,10 +977,33 @@ static ssize_t image_size_store(struct kobject *kobj, struct kobj_attribute *att power_attr(image_size); +static ssize_t reserved_size_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", reserved_size); +} + +static ssize_t reserved_size_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned long size; + + if (sscanf(buf, "%lu", &size) == 1) { + reserved_size = size; + return n; + } + + return -EINVAL; +} + +power_attr(reserved_size); + static struct attribute * g[] = { &disk_attr.attr, &resume_attr.attr, &image_size_attr.attr, + &reserved_size_attr.attr, NULL, }; diff --git a/kernel/power/main.c b/kernel/power/main.c index de9aef8742f..2981af4ce7c 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -337,6 +337,7 @@ static int __init pm_init(void) if (error) return error; hibernate_image_size_init(); + hibernate_reserved_size_init(); power_kobj = kobject_create_and_add("power", NULL); if (!power_kobj) return -ENOMEM; diff --git a/kernel/power/power.h b/kernel/power/power.h index 03634be55f6..9a00a0a2628 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -15,6 +15,7 @@ struct swsusp_info { #ifdef CONFIG_HIBERNATION /* kernel/power/snapshot.c */ +extern void __init hibernate_reserved_size_init(void); extern void __init hibernate_image_size_init(void); #ifdef CONFIG_ARCH_HIBERNATION_HEADER @@ -55,6 +56,7 @@ extern int hibernation_platform_enter(void); #else /* !CONFIG_HIBERNATION */ +static inline void hibernate_reserved_size_init(void) {} static inline void hibernate_image_size_init(void) {} #endif /* !CONFIG_HIBERNATION */ @@ -72,6 +74,8 @@ static struct kobj_attribute _name##_attr = { \ /* Preferred image size in bytes (default 500 MB) */ extern unsigned long image_size; +/* Size of memory reserved for drivers (default SPARE_PAGES x PAGE_SIZE) */ +extern unsigned long reserved_size; extern int in_suspend; extern dev_t swsusp_resume_device; extern sector_t swsusp_resume_block; diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index ca0aacc2487..ace55889f70 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -41,16 +41,28 @@ static void swsusp_set_page_forbidden(struct page *); static void swsusp_unset_page_forbidden(struct page *); /* + * Number of bytes to reserve for memory allocations made by device drivers + * from their ->freeze() and ->freeze_noirq() callbacks so that they don't + * cause image creation to fail (tunable via /sys/power/reserved_size). + */ +unsigned long reserved_size; + +void __init hibernate_reserved_size_init(void) +{ + reserved_size = SPARE_PAGES * PAGE_SIZE; +} + +/* * Preferred image size in bytes (tunable via /sys/power/image_size). - * When it is set to N, the image creating code will do its best to - * ensure the image size will not exceed N bytes, but if that is - * impossible, it will try to create the smallest image possible. + * When it is set to N, swsusp will do its best to ensure the image + * size will not exceed N bytes, but if that is impossible, it will + * try to create the smallest image possible. */ unsigned long image_size; void __init hibernate_image_size_init(void) { - image_size = (totalram_pages / 3) * PAGE_SIZE; + image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE; } /* List of PBEs needed for restoring the pages that were allocated before @@ -1263,11 +1275,13 @@ static unsigned long minimum_image_size(unsigned long saveable) * frame in use. We also need a number of page frames to be free during * hibernation for allocations made while saving the image and for device * drivers, in case they need to allocate memory from their hibernation - * callbacks (these two numbers are given by PAGES_FOR_IO and SPARE_PAGES, - * respectively, both of which are rough estimates). To make this happen, we - * compute the total number of available page frames and allocate at least + * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough + * estimate) and reserverd_size divided by PAGE_SIZE (which is tunable through + * /sys/power/reserved_size, respectively). To make this happen, we compute the + * total number of available page frames and allocate at least * - * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 + 2 * SPARE_PAGES + * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 + * + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE) * * of them, which corresponds to the maximum size of a hibernation image. * @@ -1322,7 +1336,8 @@ int hibernate_preallocate_memory(void) count -= totalreserve_pages; /* Compute the maximum number of saveable pages to leave in memory. */ - max_size = (count - (size + PAGES_FOR_IO)) / 2 - 2 * SPARE_PAGES; + max_size = (count - (size + PAGES_FOR_IO)) / 2 + - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE); /* Compute the desired number of image pages specified by image_size. */ size = DIV_ROUND_UP(image_size, PAGE_SIZE); if (size > max_size) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 8935369d503..1c41ba21541 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -163,19 +163,13 @@ static int suspend_enter(suspend_state_t state) arch_suspend_disable_irqs(); BUG_ON(!irqs_disabled()); - error = sysdev_suspend(PMSG_SUSPEND); - if (!error) { - error = syscore_suspend(); - if (error) - sysdev_resume(); - } + error = syscore_suspend(); if (!error) { if (!(suspend_test(TEST_CORE) || pm_wakeup_pending())) { error = suspend_ops->enter(state); events_check_enabled = false; } syscore_resume(); - sysdev_resume(); } arch_suspend_enable_irqs(); @@ -216,7 +210,6 @@ int suspend_devices_and_enter(suspend_state_t state) goto Close; } suspend_console(); - pm_restrict_gfp_mask(); suspend_test_start(); error = dpm_suspend_start(PMSG_SUSPEND); if (error) { @@ -227,13 +220,12 @@ int suspend_devices_and_enter(suspend_state_t state) if (suspend_test(TEST_DEVICES)) goto Recover_platform; - suspend_enter(state); + error = suspend_enter(state); Resume_devices: suspend_test_start(); dpm_resume_end(PMSG_RESUME); suspend_test_finish("resume devices"); - pm_restore_gfp_mask(); resume_console(); Close: if (suspend_ops->end) @@ -294,7 +286,9 @@ int enter_state(suspend_state_t state) goto Finish; pr_debug("PM: Entering %s sleep\n", pm_states[state]); + pm_restrict_gfp_mask(); error = suspend_devices_and_enter(state); + pm_restore_gfp_mask(); Finish: pr_debug("PM: Finishing wakeup.\n"); diff --git a/kernel/power/user.c b/kernel/power/user.c index c36c3b9e8a8..7d02d33be69 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -135,8 +135,10 @@ static int snapshot_release(struct inode *inode, struct file *filp) free_basic_memory_bitmaps(); data = filp->private_data; free_all_swap_pages(data->swap); - if (data->frozen) + if (data->frozen) { + pm_restore_gfp_mask(); thaw_processes(); + } pm_notifier_call_chain(data->mode == O_RDONLY ? PM_POST_HIBERNATION : PM_POST_RESTORE); atomic_inc(&snapshot_device_available); @@ -379,6 +381,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, * PM_HIBERNATION_PREPARE */ error = suspend_devices_and_enter(PM_SUSPEND_MEM); + data->ready = 0; break; case SNAPSHOT_PLATFORM_SUPPORT: diff --git a/kernel/sys.c b/kernel/sys.c index af468edf096..f0c10385f30 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -315,7 +315,6 @@ void kernel_restart_prepare(char *cmd) blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); system_state = SYSTEM_RESTART; device_shutdown(); - sysdev_shutdown(); syscore_shutdown(); } @@ -354,7 +353,6 @@ static void kernel_shutdown_prepare(enum system_states state) void kernel_halt(void) { kernel_shutdown_prepare(SYSTEM_HALT); - sysdev_shutdown(); syscore_shutdown(); printk(KERN_EMERG "System halted.\n"); kmsg_dump(KMSG_DUMP_HALT); @@ -374,7 +372,6 @@ void kernel_power_off(void) if (pm_power_off_prepare) pm_power_off_prepare(); disable_nonboot_cpus(); - sysdev_shutdown(); syscore_shutdown(); printk(KERN_EMERG "Power down.\n"); kmsg_dump(KMSG_DUMP_POWEROFF); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 6519cf62d9c..0e17c10f8a9 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -685,8 +685,8 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) /* Add clocksource to the clcoksource list */ mutex_lock(&clocksource_mutex); clocksource_enqueue(cs); - clocksource_select(); clocksource_enqueue_watchdog(cs); + clocksource_select(); mutex_unlock(&clocksource_mutex); return 0; } @@ -706,8 +706,8 @@ int clocksource_register(struct clocksource *cs) mutex_lock(&clocksource_mutex); clocksource_enqueue(cs); - clocksource_select(); clocksource_enqueue_watchdog(cs); + clocksource_select(); mutex_unlock(&clocksource_mutex); return 0; } diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index da800ffa810..723c7637e55 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -522,10 +522,11 @@ static void tick_broadcast_init_next_event(struct cpumask *mask, */ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { + int cpu = smp_processor_id(); + /* Set it up only once ! */ if (bc->event_handler != tick_handle_oneshot_broadcast) { int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC; - int cpu = smp_processor_id(); bc->event_handler = tick_handle_oneshot_broadcast; clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); @@ -551,6 +552,15 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) tick_broadcast_set_event(tick_next_period, 1); } else bc->next_event.tv64 = KTIME_MAX; + } else { + /* + * The first cpu which switches to oneshot mode sets + * the bit for all other cpus which are in the general + * (periodic) broadcast mask. So the bit is set and + * would prevent the first broadcast enter after this + * to program the bc device. + */ + tick_broadcast_clear_oneshot(cpu); } } |