diff options
Diffstat (limited to 'kernel')
33 files changed, 1122 insertions, 423 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index eca595e2fd5..2da48d3515e 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -9,7 +9,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ - notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ + notifier.o ksysfs.o sched_clock.o cred.o \ async.o range.o obj-y += groups.o diff --git a/kernel/async.c b/kernel/async.c index d5fe7af0de2..4c2843c0043 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -120,7 +120,7 @@ static void async_run_entry_fn(struct work_struct *work) struct async_entry *entry = container_of(work, struct async_entry, work); unsigned long flags; - ktime_t calltime, delta, rettime; + ktime_t uninitialized_var(calltime), delta, rettime; /* 1) move self to the running queue */ spin_lock_irqsave(&async_lock, flags); @@ -269,7 +269,7 @@ EXPORT_SYMBOL_GPL(async_synchronize_full_domain); void async_synchronize_cookie_domain(async_cookie_t cookie, struct list_head *running) { - ktime_t starttime, delta, endtime; + ktime_t uninitialized_var(starttime), delta, endtime; if (initcall_debug && system_state == SYSTEM_BOOTING) { printk(KERN_DEBUG "async_waiting @ %i\n", task_pid_nr(current)); diff --git a/kernel/cred.c b/kernel/cred.c index 8ef31f53c44..bb55d052d85 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -644,6 +644,9 @@ void __init cred_init(void) */ struct cred *prepare_kernel_cred(struct task_struct *daemon) { +#ifdef CONFIG_KEYS + struct thread_group_cred *tgcred; +#endif const struct cred *old; struct cred *new; @@ -651,6 +654,14 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) if (!new) return NULL; +#ifdef CONFIG_KEYS + tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); + if (!tgcred) { + kmem_cache_free(cred_jar, new); + return NULL; + } +#endif + kdebug("prepare_kernel_cred() alloc %p", new); if (daemon) @@ -667,8 +678,11 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) get_group_info(new->group_info); #ifdef CONFIG_KEYS - atomic_inc(&init_tgcred.usage); - new->tgcred = &init_tgcred; + atomic_set(&tgcred->usage, 1); + spin_lock_init(&tgcred->lock); + tgcred->process_keyring = NULL; + tgcred->session_keyring = NULL; + new->tgcred = tgcred; new->request_key_auth = NULL; new->thread_keyring = NULL; new->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; diff --git a/kernel/freezer.c b/kernel/freezer.c index 7b01de98bb6..66a594e8ad2 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -67,7 +67,7 @@ static void fake_signal_wake_up(struct task_struct *p) unsigned long flags; spin_lock_irqsave(&p->sighand->siglock, flags); - signal_wake_up(p, 0); + signal_wake_up(p, 1); spin_unlock_irqrestore(&p->sighand->siglock, flags); } diff --git a/kernel/futex.c b/kernel/futex.c index 11cbe052b2e..1511dff0cfd 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -854,7 +854,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) { struct task_struct *new_owner; struct futex_pi_state *pi_state = this->pi_state; - u32 curval, newval; + u32 uninitialized_var(curval), newval; if (!pi_state) return -EINVAL; @@ -916,7 +916,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) static int unlock_futex_pi(u32 __user *uaddr, u32 uval) { - u32 oldval; + u32 uninitialized_var(oldval); /* * There is no waiter, so we unlock the futex. The owner died @@ -1576,7 +1576,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; struct futex_pi_state *pi_state = q->pi_state; struct task_struct *oldowner = pi_state->owner; - u32 uval, curval, newval; + u32 uval, uninitialized_var(curval), newval; int ret; /* Owner died? */ @@ -1793,7 +1793,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, * * Returns: * 0 - uaddr contains val and hb has been locked - * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked + * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked */ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, struct futex_q *q, struct futex_hash_bucket **hb) @@ -2481,7 +2481,7 @@ err_unlock: */ int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) { - u32 uval, nval, mval; + u32 uval, uninitialized_var(nval), mval; retry: if (get_user(uval, uaddr)) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index d5a3009da71..dc5114b4c16 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -178,7 +178,7 @@ void irq_shutdown(struct irq_desc *desc) desc->depth = 1; if (desc->irq_data.chip->irq_shutdown) desc->irq_data.chip->irq_shutdown(&desc->irq_data); - if (desc->irq_data.chip->irq_disable) + else if (desc->irq_data.chip->irq_disable) desc->irq_data.chip->irq_disable(&desc->irq_data); else desc->irq_data.chip->irq_mask(&desc->irq_data); diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index d5828da3fd3..b57a3776de4 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -29,7 +29,11 @@ void irq_domain_add(struct irq_domain *domain) */ for (hwirq = 0; hwirq < domain->nr_irq; hwirq++) { d = irq_get_irq_data(irq_domain_to_irq(domain, hwirq)); - if (d || d->domain) { + if (!d) { + WARN(1, "error: assigning domain to non existant irq_desc"); + return; + } + if (d->domain) { /* things are broken; just report, don't clean up */ WARN(1, "error: irq_desc already assigned to a domain"); return; diff --git a/kernel/kmod.c b/kernel/kmod.c index ddc7644c130..a4bea97c75b 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -114,10 +114,12 @@ int __request_module(bool wait, const char *fmt, ...) atomic_inc(&kmod_concurrent); if (atomic_read(&kmod_concurrent) > max_modprobes) { /* We may be blaming an innocent here, but unlikely */ - if (kmod_loop_msg++ < 5) + if (kmod_loop_msg < 5) { printk(KERN_ERR "request_module: runaway loop modprobe %s\n", module_name); + kmod_loop_msg++; + } atomic_dec(&kmod_concurrent); return -ENOMEM; } diff --git a/kernel/params.c b/kernel/params.c index 22df3e0d142..821788947e4 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -67,20 +67,27 @@ static void maybe_kfree_parameter(void *param) } } -static inline char dash2underscore(char c) +static char dash2underscore(char c) { if (c == '-') return '_'; return c; } -static inline int parameq(const char *input, const char *paramname) +bool parameqn(const char *a, const char *b, size_t n) { - unsigned int i; - for (i = 0; dash2underscore(input[i]) == paramname[i]; i++) - if (input[i] == '\0') - return 1; - return 0; + size_t i; + + for (i = 0; i < n; i++) { + if (dash2underscore(a[i]) != dash2underscore(b[i])) + return false; + } + return true; +} + +bool parameq(const char *a, const char *b) +{ + return parameqn(a, b, strlen(a)+1); } static int parse_one(char *param, diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 41440cca55a..e7cb76dc18f 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -250,7 +250,7 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) do { times->utime = cputime_add(times->utime, t->utime); times->stime = cputime_add(times->stime, t->stime); - times->sum_exec_runtime += t->se.sum_exec_runtime; + times->sum_exec_runtime += task_sched_runtime(t); } while_each_thread(tsk, t); out: rcu_read_unlock(); @@ -274,9 +274,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) struct task_cputime sum; unsigned long flags; - raw_spin_lock_irqsave(&cputimer->lock, flags); if (!cputimer->running) { - cputimer->running = 1; /* * The POSIX timer interface allows for absolute time expiry * values through the TIMER_ABSTIME flag, therefore we have @@ -284,8 +282,11 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) * it. */ thread_group_cputime(tsk, &sum); + raw_spin_lock_irqsave(&cputimer->lock, flags); + cputimer->running = 1; update_gt_cputime(&cputimer->cputime, &sum); - } + } else + raw_spin_lock_irqsave(&cputimer->lock, flags); *times = cputimer->cputime; raw_spin_unlock_irqrestore(&cputimer->lock, flags); } @@ -312,7 +313,8 @@ static int cpu_clock_sample_group(const clockid_t which_clock, cpu->cpu = cputime.utime; break; case CPUCLOCK_SCHED: - cpu->sched = thread_group_sched_runtime(p); + thread_group_cputime(p, &cputime); + cpu->sched = cputime.sum_exec_runtime; break; } return 0; diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 3744c594b19..cedd9982306 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -27,6 +27,7 @@ config HIBERNATION select HIBERNATE_CALLBACKS select LZO_COMPRESS select LZO_DECOMPRESS + select CRC32 ---help--- Enable the suspend to disk (STD) functionality, which is usually called "hibernation" in user interfaces. STD checkpoints the @@ -65,6 +66,9 @@ config HIBERNATION For more information take a look at <file:Documentation/power/swsusp.txt>. +config ARCH_SAVE_PAGE_KEYS + bool + config PM_STD_PARTITION string "Default resume partition" depends on HIBERNATION diff --git a/kernel/power/Makefile b/kernel/power/Makefile index c5ebc6a9064..07e0e28ffba 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -1,8 +1,8 @@ ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG -obj-$(CONFIG_PM) += main.o -obj-$(CONFIG_PM_SLEEP) += console.o +obj-$(CONFIG_PM) += main.o qos.o +obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o obj-$(CONFIG_FREEZER) += process.o obj-$(CONFIG_SUSPEND) += suspend.o obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o diff --git a/kernel/power/console.c b/kernel/power/console.c index 218e5af9015..b1dc456474b 100644 --- a/kernel/power/console.c +++ b/kernel/power/console.c @@ -1,5 +1,5 @@ /* - * drivers/power/process.c - Functions for saving/restoring console. + * Functions for saving/restoring console. * * Originally from swsusp. */ @@ -10,7 +10,6 @@ #include <linux/module.h> #include "power.h" -#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE) #define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1) static int orig_fgconsole, orig_kmsg; @@ -32,4 +31,3 @@ void pm_restore_console(void) vt_kmsg_redirect(orig_kmsg); } } -#endif diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 8f7b1db1ece..1c53f7fad5f 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -14,6 +14,7 @@ #include <linux/reboot.h> #include <linux/string.h> #include <linux/device.h> +#include <linux/async.h> #include <linux/kmod.h> #include <linux/delay.h> #include <linux/fs.h> @@ -29,12 +30,14 @@ #include "power.h" -static int nocompress = 0; -static int noresume = 0; +static int nocompress; +static int noresume; +static int resume_wait; +static int resume_delay; static char resume_file[256] = CONFIG_PM_STD_PARTITION; dev_t swsusp_resume_device; sector_t swsusp_resume_block; -int in_suspend __nosavedata = 0; +int in_suspend __nosavedata; enum { HIBERNATION_INVALID, @@ -334,13 +337,17 @@ int hibernation_snapshot(int platform_mode) if (error) goto Close; - error = dpm_prepare(PMSG_FREEZE); - if (error) - goto Complete_devices; - /* Preallocate image memory before shutting down devices. */ error = hibernate_preallocate_memory(); if (error) + goto Close; + + error = freeze_kernel_threads(); + if (error) + goto Close; + + error = dpm_prepare(PMSG_FREEZE); + if (error) goto Complete_devices; suspend_console(); @@ -463,7 +470,7 @@ static int resume_target_kernel(bool platform_mode) * @platform_mode: If set, use platform driver to prepare for the transition. * * This routine must be called with pm_mutex held. If it is successful, control - * reappears in the restored target kernel in hibernation_snaphot(). + * reappears in the restored target kernel in hibernation_snapshot(). */ int hibernation_restore(int platform_mode) { @@ -650,6 +657,9 @@ int hibernate(void) flags |= SF_PLATFORM_MODE; if (nocompress) flags |= SF_NOCOMPRESS_MODE; + else + flags |= SF_CRC32_MODE; + pr_debug("PM: writing image.\n"); error = swsusp_write(flags); swsusp_free(); @@ -724,6 +734,12 @@ static int software_resume(void) pr_debug("PM: Checking hibernation image partition %s\n", resume_file); + if (resume_delay) { + printk(KERN_INFO "Waiting %dsec before reading resume device...\n", + resume_delay); + ssleep(resume_delay); + } + /* Check if the device is there */ swsusp_resume_device = name_to_dev_t(resume_file); if (!swsusp_resume_device) { @@ -732,6 +748,13 @@ static int software_resume(void) * to wait for this to finish. */ wait_for_device_probe(); + + if (resume_wait) { + while ((swsusp_resume_device = name_to_dev_t(resume_file)) == 0) + msleep(10); + async_synchronize_full(); + } + /* * We can't depend on SCSI devices being available after loading * one of their modules until scsi_complete_async_scans() is @@ -1060,7 +1083,21 @@ static int __init noresume_setup(char *str) return 1; } +static int __init resumewait_setup(char *str) +{ + resume_wait = 1; + return 1; +} + +static int __init resumedelay_setup(char *str) +{ + resume_delay = simple_strtoul(str, NULL, 0); + return 1; +} + __setup("noresume", noresume_setup); __setup("resume_offset=", resume_offset_setup); __setup("resume=", resume_setup); __setup("hibernate=", hibernate_setup); +__setup("resumewait", resumewait_setup); +__setup("resumedelay=", resumedelay_setup); diff --git a/kernel/power/main.c b/kernel/power/main.c index 6c601f87196..a52e88425a3 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -12,6 +12,8 @@ #include <linux/string.h> #include <linux/resume-trace.h> #include <linux/workqueue.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> #include "power.h" @@ -131,6 +133,101 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr, power_attr(pm_test); #endif /* CONFIG_PM_DEBUG */ +#ifdef CONFIG_DEBUG_FS +static char *suspend_step_name(enum suspend_stat_step step) +{ + switch (step) { + case SUSPEND_FREEZE: + return "freeze"; + case SUSPEND_PREPARE: + return "prepare"; + case SUSPEND_SUSPEND: + return "suspend"; + case SUSPEND_SUSPEND_NOIRQ: + return "suspend_noirq"; + case SUSPEND_RESUME_NOIRQ: + return "resume_noirq"; + case SUSPEND_RESUME: + return "resume"; + default: + return ""; + } +} + +static int suspend_stats_show(struct seq_file *s, void *unused) +{ + int i, index, last_dev, last_errno, last_step; + + last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1; + last_dev %= REC_FAILED_NUM; + last_errno = suspend_stats.last_failed_errno + REC_FAILED_NUM - 1; + last_errno %= REC_FAILED_NUM; + last_step = suspend_stats.last_failed_step + REC_FAILED_NUM - 1; + last_step %= REC_FAILED_NUM; + seq_printf(s, "%s: %d\n%s: %d\n%s: %d\n%s: %d\n" + "%s: %d\n%s: %d\n%s: %d\n%s: %d\n", + "success", suspend_stats.success, + "fail", suspend_stats.fail, + "failed_freeze", suspend_stats.failed_freeze, + "failed_prepare", suspend_stats.failed_prepare, + "failed_suspend", suspend_stats.failed_suspend, + "failed_suspend_noirq", + suspend_stats.failed_suspend_noirq, + "failed_resume", suspend_stats.failed_resume, + "failed_resume_noirq", + suspend_stats.failed_resume_noirq); + seq_printf(s, "failures:\n last_failed_dev:\t%-s\n", + suspend_stats.failed_devs[last_dev]); + for (i = 1; i < REC_FAILED_NUM; i++) { + index = last_dev + REC_FAILED_NUM - i; + index %= REC_FAILED_NUM; + seq_printf(s, "\t\t\t%-s\n", + suspend_stats.failed_devs[index]); + } + seq_printf(s, " last_failed_errno:\t%-d\n", + suspend_stats.errno[last_errno]); + for (i = 1; i < REC_FAILED_NUM; i++) { + index = last_errno + REC_FAILED_NUM - i; + index %= REC_FAILED_NUM; + seq_printf(s, "\t\t\t%-d\n", + suspend_stats.errno[index]); + } + seq_printf(s, " last_failed_step:\t%-s\n", + suspend_step_name( + suspend_stats.failed_steps[last_step])); + for (i = 1; i < REC_FAILED_NUM; i++) { + index = last_step + REC_FAILED_NUM - i; + index %= REC_FAILED_NUM; + seq_printf(s, "\t\t\t%-s\n", + suspend_step_name( + suspend_stats.failed_steps[index])); + } + + return 0; +} + +static int suspend_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, suspend_stats_show, NULL); +} + +static const struct file_operations suspend_stats_operations = { + .open = suspend_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init pm_debugfs_init(void) +{ + debugfs_create_file("suspend_stats", S_IFREG | S_IRUGO, + NULL, NULL, &suspend_stats_operations); + return 0; +} + +late_initcall(pm_debugfs_init); +#endif /* CONFIG_DEBUG_FS */ + #endif /* CONFIG_PM_SLEEP */ struct kobject *power_kobj; @@ -194,6 +291,11 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, } if (state < PM_SUSPEND_MAX && *s) error = enter_state(state); + if (error) { + suspend_stats.fail++; + dpm_save_failed_errno(error); + } else + suspend_stats.success++; #endif Exit: diff --git a/kernel/power/power.h b/kernel/power/power.h index 9a00a0a2628..23a2db1ec44 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -146,6 +146,7 @@ extern int swsusp_swap_in_use(void); */ #define SF_PLATFORM_MODE 1 #define SF_NOCOMPRESS_MODE 2 +#define SF_CRC32_MODE 4 /* kernel/power/hibernate.c */ extern int swsusp_check(void); @@ -228,7 +229,8 @@ extern int pm_test_level; #ifdef CONFIG_SUSPEND_FREEZER static inline int suspend_freeze_processes(void) { - return freeze_processes(); + int error = freeze_processes(); + return error ? : freeze_kernel_threads(); } static inline void suspend_thaw_processes(void) diff --git a/kernel/power/process.c b/kernel/power/process.c index 0cf3a27a6c9..addbbe5531b 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -135,7 +135,7 @@ static int try_to_freeze_tasks(bool sig_only) } /** - * freeze_processes - tell processes to enter the refrigerator + * freeze_processes - Signal user space processes to enter the refrigerator. */ int freeze_processes(void) { @@ -143,20 +143,30 @@ int freeze_processes(void) printk("Freezing user space processes ... "); error = try_to_freeze_tasks(true); - if (error) - goto Exit; - printk("done.\n"); + if (!error) { + printk("done."); + oom_killer_disable(); + } + printk("\n"); + BUG_ON(in_atomic()); + + return error; +} + +/** + * freeze_kernel_threads - Make freezable kernel threads go to the refrigerator. + */ +int freeze_kernel_threads(void) +{ + int error; printk("Freezing remaining freezable tasks ... "); error = try_to_freeze_tasks(false); - if (error) - goto Exit; - printk("done."); + if (!error) + printk("done."); - oom_killer_disable(); - Exit: - BUG_ON(in_atomic()); printk("\n"); + BUG_ON(in_atomic()); return error; } diff --git a/kernel/pm_qos_params.c b/kernel/power/qos.c index 37f05d0f079..1c1797dd1d1 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/power/qos.c @@ -29,7 +29,7 @@ /*#define DEBUG*/ -#include <linux/pm_qos_params.h> +#include <linux/pm_qos.h> #include <linux/sched.h> #include <linux/spinlock.h> #include <linux/slab.h> @@ -45,62 +45,57 @@ #include <linux/uaccess.h> /* - * locking rule: all changes to requests or notifiers lists + * locking rule: all changes to constraints or notifiers lists * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock * held, taken with _irqsave. One lock to rule them all */ -enum pm_qos_type { - PM_QOS_MAX, /* return the largest value */ - PM_QOS_MIN /* return the smallest value */ -}; - -/* - * Note: The lockless read path depends on the CPU accessing - * target_value atomically. Atomic access is only guaranteed on all CPU - * types linux supports for 32 bit quantites - */ struct pm_qos_object { - struct plist_head requests; - struct blocking_notifier_head *notifiers; + struct pm_qos_constraints *constraints; struct miscdevice pm_qos_power_miscdev; char *name; - s32 target_value; /* Do not change to 64 bit */ - s32 default_value; - enum pm_qos_type type; }; static DEFINE_SPINLOCK(pm_qos_lock); static struct pm_qos_object null_pm_qos; + static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier); -static struct pm_qos_object cpu_dma_pm_qos = { - .requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests), - .notifiers = &cpu_dma_lat_notifier, - .name = "cpu_dma_latency", +static struct pm_qos_constraints cpu_dma_constraints = { + .list = PLIST_HEAD_INIT(cpu_dma_constraints.list), .target_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, .default_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, .type = PM_QOS_MIN, + .notifiers = &cpu_dma_lat_notifier, +}; +static struct pm_qos_object cpu_dma_pm_qos = { + .constraints = &cpu_dma_constraints, }; static BLOCKING_NOTIFIER_HEAD(network_lat_notifier); -static struct pm_qos_object network_lat_pm_qos = { - .requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests), - .notifiers = &network_lat_notifier, - .name = "network_latency", +static struct pm_qos_constraints network_lat_constraints = { + .list = PLIST_HEAD_INIT(network_lat_constraints.list), .target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, .default_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, - .type = PM_QOS_MIN + .type = PM_QOS_MIN, + .notifiers = &network_lat_notifier, +}; +static struct pm_qos_object network_lat_pm_qos = { + .constraints = &network_lat_constraints, + .name = "network_latency", }; static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier); -static struct pm_qos_object network_throughput_pm_qos = { - .requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests), - .notifiers = &network_throughput_notifier, - .name = "network_throughput", +static struct pm_qos_constraints network_tput_constraints = { + .list = PLIST_HEAD_INIT(network_tput_constraints.list), .target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, .default_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, .type = PM_QOS_MAX, + .notifiers = &network_throughput_notifier, +}; +static struct pm_qos_object network_throughput_pm_qos = { + .constraints = &network_tput_constraints, + .name = "network_throughput", }; @@ -127,17 +122,17 @@ static const struct file_operations pm_qos_power_fops = { }; /* unlocked internal variant */ -static inline int pm_qos_get_value(struct pm_qos_object *o) +static inline int pm_qos_get_value(struct pm_qos_constraints *c) { - if (plist_head_empty(&o->requests)) - return o->default_value; + if (plist_head_empty(&c->list)) + return c->default_value; - switch (o->type) { + switch (c->type) { case PM_QOS_MIN: - return plist_first(&o->requests)->prio; + return plist_first(&c->list)->prio; case PM_QOS_MAX: - return plist_last(&o->requests)->prio; + return plist_last(&c->list)->prio; default: /* runtime check for not using enum */ @@ -145,69 +140,73 @@ static inline int pm_qos_get_value(struct pm_qos_object *o) } } -static inline s32 pm_qos_read_value(struct pm_qos_object *o) +s32 pm_qos_read_value(struct pm_qos_constraints *c) { - return o->target_value; + return c->target_value; } -static inline void pm_qos_set_value(struct pm_qos_object *o, s32 value) +static inline void pm_qos_set_value(struct pm_qos_constraints *c, s32 value) { - o->target_value = value; + c->target_value = value; } -static void update_target(struct pm_qos_object *o, struct plist_node *node, - int del, int value) +/** + * pm_qos_update_target - manages the constraints list and calls the notifiers + * if needed + * @c: constraints data struct + * @node: request to add to the list, to update or to remove + * @action: action to take on the constraints list + * @value: value of the request to add or update + * + * This function returns 1 if the aggregated constraint value has changed, 0 + * otherwise. + */ +int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node, + enum pm_qos_req_action action, int value) { unsigned long flags; - int prev_value, curr_value; + int prev_value, curr_value, new_value; spin_lock_irqsave(&pm_qos_lock, flags); - prev_value = pm_qos_get_value(o); - /* PM_QOS_DEFAULT_VALUE is a signal that the value is unchanged */ - if (value != PM_QOS_DEFAULT_VALUE) { + prev_value = pm_qos_get_value(c); + if (value == PM_QOS_DEFAULT_VALUE) + new_value = c->default_value; + else + new_value = value; + + switch (action) { + case PM_QOS_REMOVE_REQ: + plist_del(node, &c->list); + break; + case PM_QOS_UPDATE_REQ: /* * to change the list, we atomically remove, reinit * with new value and add, then see if the extremal * changed */ - plist_del(node, &o->requests); - plist_node_init(node, value); - plist_add(node, &o->requests); - } else if (del) { - plist_del(node, &o->requests); - } else { - plist_add(node, &o->requests); + plist_del(node, &c->list); + case PM_QOS_ADD_REQ: + plist_node_init(node, new_value); + plist_add(node, &c->list); + break; + default: + /* no action */ + ; } - curr_value = pm_qos_get_value(o); - pm_qos_set_value(o, curr_value); + + curr_value = pm_qos_get_value(c); + pm_qos_set_value(c, curr_value); + spin_unlock_irqrestore(&pm_qos_lock, flags); - if (prev_value != curr_value) - blocking_notifier_call_chain(o->notifiers, + if (prev_value != curr_value) { + blocking_notifier_call_chain(c->notifiers, (unsigned long)curr_value, NULL); -} - -static int register_pm_qos_misc(struct pm_qos_object *qos) -{ - qos->pm_qos_power_miscdev.minor = MISC_DYNAMIC_MINOR; - qos->pm_qos_power_miscdev.name = qos->name; - qos->pm_qos_power_miscdev.fops = &pm_qos_power_fops; - - return misc_register(&qos->pm_qos_power_miscdev); -} - -static int find_pm_qos_object_by_minor(int minor) -{ - int pm_qos_class; - - for (pm_qos_class = 0; - pm_qos_class < PM_QOS_NUM_CLASSES; pm_qos_class++) { - if (minor == - pm_qos_array[pm_qos_class]->pm_qos_power_miscdev.minor) - return pm_qos_class; + return 1; + } else { + return 0; } - return -1; } /** @@ -218,11 +217,11 @@ static int find_pm_qos_object_by_minor(int minor) */ int pm_qos_request(int pm_qos_class) { - return pm_qos_read_value(pm_qos_array[pm_qos_class]); + return pm_qos_read_value(pm_qos_array[pm_qos_class]->constraints); } EXPORT_SYMBOL_GPL(pm_qos_request); -int pm_qos_request_active(struct pm_qos_request_list *req) +int pm_qos_request_active(struct pm_qos_request *req) { return req->pm_qos_class != 0; } @@ -230,40 +229,36 @@ EXPORT_SYMBOL_GPL(pm_qos_request_active); /** * pm_qos_add_request - inserts new qos request into the list - * @dep: pointer to a preallocated handle + * @req: pointer to a preallocated handle * @pm_qos_class: identifies which list of qos request to use * @value: defines the qos request * * This function inserts a new entry in the pm_qos_class list of requested qos * performance characteristics. It recomputes the aggregate QoS expectations - * for the pm_qos_class of parameters and initializes the pm_qos_request_list + * for the pm_qos_class of parameters and initializes the pm_qos_request * handle. Caller needs to save this handle for later use in updates and * removal. */ -void pm_qos_add_request(struct pm_qos_request_list *dep, +void pm_qos_add_request(struct pm_qos_request *req, int pm_qos_class, s32 value) { - struct pm_qos_object *o = pm_qos_array[pm_qos_class]; - int new_value; + if (!req) /*guard against callers passing in null */ + return; - if (pm_qos_request_active(dep)) { + if (pm_qos_request_active(req)) { WARN(1, KERN_ERR "pm_qos_add_request() called for already added request\n"); return; } - if (value == PM_QOS_DEFAULT_VALUE) - new_value = o->default_value; - else - new_value = value; - plist_node_init(&dep->list, new_value); - dep->pm_qos_class = pm_qos_class; - update_target(o, &dep->list, 0, PM_QOS_DEFAULT_VALUE); + req->pm_qos_class = pm_qos_class; + pm_qos_update_target(pm_qos_array[pm_qos_class]->constraints, + &req->node, PM_QOS_ADD_REQ, value); } EXPORT_SYMBOL_GPL(pm_qos_add_request); /** * pm_qos_update_request - modifies an existing qos request - * @pm_qos_req : handle to list element holding a pm_qos request to use + * @req : handle to list element holding a pm_qos request to use * @value: defines the qos request * * Updates an existing qos request for the pm_qos_class of parameters along @@ -271,56 +266,47 @@ EXPORT_SYMBOL_GPL(pm_qos_add_request); * * Attempts are made to make this code callable on hot code paths. */ -void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req, +void pm_qos_update_request(struct pm_qos_request *req, s32 new_value) { - s32 temp; - struct pm_qos_object *o; - - if (!pm_qos_req) /*guard against callers passing in null */ + if (!req) /*guard against callers passing in null */ return; - if (!pm_qos_request_active(pm_qos_req)) { + if (!pm_qos_request_active(req)) { WARN(1, KERN_ERR "pm_qos_update_request() called for unknown object\n"); return; } - o = pm_qos_array[pm_qos_req->pm_qos_class]; - - if (new_value == PM_QOS_DEFAULT_VALUE) - temp = o->default_value; - else - temp = new_value; - - if (temp != pm_qos_req->list.prio) - update_target(o, &pm_qos_req->list, 0, temp); + if (new_value != req->node.prio) + pm_qos_update_target( + pm_qos_array[req->pm_qos_class]->constraints, + &req->node, PM_QOS_UPDATE_REQ, new_value); } EXPORT_SYMBOL_GPL(pm_qos_update_request); /** * pm_qos_remove_request - modifies an existing qos request - * @pm_qos_req: handle to request list element + * @req: handle to request list element * - * Will remove pm qos request from the list of requests and + * Will remove pm qos request from the list of constraints and * recompute the current target value for the pm_qos_class. Call this * on slow code paths. */ -void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req) +void pm_qos_remove_request(struct pm_qos_request *req) { - struct pm_qos_object *o; - - if (pm_qos_req == NULL) + if (!req) /*guard against callers passing in null */ return; /* silent return to keep pcm code cleaner */ - if (!pm_qos_request_active(pm_qos_req)) { + if (!pm_qos_request_active(req)) { WARN(1, KERN_ERR "pm_qos_remove_request() called for unknown object\n"); return; } - o = pm_qos_array[pm_qos_req->pm_qos_class]; - update_target(o, &pm_qos_req->list, 1, PM_QOS_DEFAULT_VALUE); - memset(pm_qos_req, 0, sizeof(*pm_qos_req)); + pm_qos_update_target(pm_qos_array[req->pm_qos_class]->constraints, + &req->node, PM_QOS_REMOVE_REQ, + PM_QOS_DEFAULT_VALUE); + memset(req, 0, sizeof(*req)); } EXPORT_SYMBOL_GPL(pm_qos_remove_request); @@ -337,7 +323,8 @@ int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier) int retval; retval = blocking_notifier_chain_register( - pm_qos_array[pm_qos_class]->notifiers, notifier); + pm_qos_array[pm_qos_class]->constraints->notifiers, + notifier); return retval; } @@ -356,19 +343,43 @@ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier) int retval; retval = blocking_notifier_chain_unregister( - pm_qos_array[pm_qos_class]->notifiers, notifier); + pm_qos_array[pm_qos_class]->constraints->notifiers, + notifier); return retval; } EXPORT_SYMBOL_GPL(pm_qos_remove_notifier); +/* User space interface to PM QoS classes via misc devices */ +static int register_pm_qos_misc(struct pm_qos_object *qos) +{ + qos->pm_qos_power_miscdev.minor = MISC_DYNAMIC_MINOR; + qos->pm_qos_power_miscdev.name = qos->name; + qos->pm_qos_power_miscdev.fops = &pm_qos_power_fops; + + return misc_register(&qos->pm_qos_power_miscdev); +} + +static int find_pm_qos_object_by_minor(int minor) +{ + int pm_qos_class; + + for (pm_qos_class = 0; + pm_qos_class < PM_QOS_NUM_CLASSES; pm_qos_class++) { + if (minor == + pm_qos_array[pm_qos_class]->pm_qos_power_miscdev.minor) + return pm_qos_class; + } + return -1; +} + static int pm_qos_power_open(struct inode *inode, struct file *filp) { long pm_qos_class; pm_qos_class = find_pm_qos_object_by_minor(iminor(inode)); if (pm_qos_class >= 0) { - struct pm_qos_request_list *req = kzalloc(sizeof(*req), GFP_KERNEL); + struct pm_qos_request *req = kzalloc(sizeof(*req), GFP_KERNEL); if (!req) return -ENOMEM; @@ -383,7 +394,7 @@ static int pm_qos_power_open(struct inode *inode, struct file *filp) static int pm_qos_power_release(struct inode *inode, struct file *filp) { - struct pm_qos_request_list *req; + struct pm_qos_request *req; req = filp->private_data; pm_qos_remove_request(req); @@ -398,17 +409,15 @@ static ssize_t pm_qos_power_read(struct file *filp, char __user *buf, { s32 value; unsigned long flags; - struct pm_qos_object *o; - struct pm_qos_request_list *pm_qos_req = filp->private_data; + struct pm_qos_request *req = filp->private_data; - if (!pm_qos_req) + if (!req) return -EINVAL; - if (!pm_qos_request_active(pm_qos_req)) + if (!pm_qos_request_active(req)) return -EINVAL; - o = pm_qos_array[pm_qos_req->pm_qos_class]; spin_lock_irqsave(&pm_qos_lock, flags); - value = pm_qos_get_value(o); + value = pm_qos_get_value(pm_qos_array[req->pm_qos_class]->constraints); spin_unlock_irqrestore(&pm_qos_lock, flags); return simple_read_from_buffer(buf, count, f_pos, &value, sizeof(s32)); @@ -418,7 +427,7 @@ static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, size_t count, loff_t *f_pos) { s32 value; - struct pm_qos_request_list *pm_qos_req; + struct pm_qos_request *req; if (count == sizeof(s32)) { if (copy_from_user(&value, buf, sizeof(s32))) @@ -449,8 +458,8 @@ static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, return -EINVAL; } - pm_qos_req = filp->private_data; - pm_qos_update_request(pm_qos_req, value); + req = filp->private_data; + pm_qos_update_request(req, value); return count; } diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 06efa54f93d..cbe2c144139 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1339,6 +1339,9 @@ int hibernate_preallocate_memory(void) count += highmem; count -= totalreserve_pages; + /* Add number of pages required for page keys (s390 only). */ + size += page_key_additional_pages(saveable); + /* Compute the maximum number of saveable pages to leave in memory. */ max_size = (count - (size + PAGES_FOR_IO)) / 2 - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE); @@ -1662,6 +1665,8 @@ pack_pfns(unsigned long *buf, struct memory_bitmap *bm) buf[j] = memory_bm_next_pfn(bm); if (unlikely(buf[j] == BM_END_OF_MAP)) break; + /* Save page key for data page (s390 only). */ + page_key_read(buf + j); } } @@ -1821,6 +1826,9 @@ static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) if (unlikely(buf[j] == BM_END_OF_MAP)) break; + /* Extract and buffer page key for data page (s390 only). */ + page_key_memorize(buf + j); + if (memory_bm_pfn_present(bm, buf[j])) memory_bm_set_bit(bm, buf[j]); else @@ -2223,6 +2231,11 @@ int snapshot_write_next(struct snapshot_handle *handle) if (error) return error; + /* Allocate buffer for page keys. */ + error = page_key_alloc(nr_copy_pages); + if (error) + return error; + } else if (handle->cur <= nr_meta_pages + 1) { error = unpack_orig_pfns(buffer, ©_bm); if (error) @@ -2243,6 +2256,8 @@ int snapshot_write_next(struct snapshot_handle *handle) } } else { copy_last_highmem_page(); + /* Restore page key for data page (s390 only). */ + page_key_write(handle->buffer); handle->buffer = get_buffer(&orig_bm, &ca); if (IS_ERR(handle->buffer)) return PTR_ERR(handle->buffer); @@ -2264,6 +2279,9 @@ int snapshot_write_next(struct snapshot_handle *handle) void snapshot_write_finalize(struct snapshot_handle *handle) { copy_last_highmem_page(); + /* Restore page key for data page (s390 only). */ + page_key_write(handle->buffer); + page_key_free(); /* Free only if we have loaded the image entirely */ if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) { memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index b6b71ad2208..fdd4263b995 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -104,7 +104,10 @@ static int suspend_prepare(void) goto Finish; error = suspend_freeze_processes(); - if (!error) + if (error) { + suspend_stats.failed_freeze++; + dpm_save_failed_step(SUSPEND_FREEZE); + } else return 0; suspend_thaw_processes(); @@ -315,8 +318,16 @@ int enter_state(suspend_state_t state) */ int pm_suspend(suspend_state_t state) { - if (state > PM_SUSPEND_ON && state <= PM_SUSPEND_MAX) - return enter_state(state); + int ret; + if (state > PM_SUSPEND_ON && state < PM_SUSPEND_MAX) { + ret = enter_state(state); + if (ret) { + suspend_stats.fail++; + dpm_save_failed_errno(ret); + } else + suspend_stats.success++; + return ret; + } return -EINVAL; } EXPORT_SYMBOL(pm_suspend); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 7c97c3a0eee..11a594c4ba2 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -27,6 +27,10 @@ #include <linux/slab.h> #include <linux/lzo.h> #include <linux/vmalloc.h> +#include <linux/cpumask.h> +#include <linux/atomic.h> +#include <linux/kthread.h> +#include <linux/crc32.h> #include "power.h" @@ -43,8 +47,7 @@ * allocated and populated one at a time, so we only need one memory * page to set up the entire structure. * - * During resume we also only need to use one swap_map_page structure - * at a time. + * During resume we pick up all swap_map_page structures into a list. */ #define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(sector_t) - 1) @@ -54,6 +57,11 @@ struct swap_map_page { sector_t next_swap; }; +struct swap_map_page_list { + struct swap_map_page *map; + struct swap_map_page_list *next; +}; + /** * The swap_map_handle structure is used for handling swap in * a file-alike way @@ -61,13 +69,18 @@ struct swap_map_page { struct swap_map_handle { struct swap_map_page *cur; + struct swap_map_page_list *maps; sector_t cur_swap; sector_t first_sector; unsigned int k; + unsigned long nr_free_pages, written; + u32 crc32; }; struct swsusp_header { - char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int)]; + char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int) - + sizeof(u32)]; + u32 crc32; sector_t image; unsigned int flags; /* Flags to pass to the "boot" kernel */ char orig_sig[10]; @@ -199,6 +212,8 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags) memcpy(swsusp_header->sig, HIBERNATE_SIG, 10); swsusp_header->image = handle->first_sector; swsusp_header->flags = flags; + if (flags & SF_CRC32_MODE) + swsusp_header->crc32 = handle->crc32; error = hib_bio_write_page(swsusp_resume_block, swsusp_header, NULL); } else { @@ -245,6 +260,7 @@ static int swsusp_swap_check(void) static int write_page(void *buf, sector_t offset, struct bio **bio_chain) { void *src; + int ret; if (!offset) return -ENOSPC; @@ -254,9 +270,17 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain) if (src) { copy_page(src, buf); } else { - WARN_ON_ONCE(1); - bio_chain = NULL; /* Go synchronous */ - src = buf; + ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */ + if (ret) + return ret; + src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); + if (src) { + copy_page(src, buf); + } else { + WARN_ON_ONCE(1); + bio_chain = NULL; /* Go synchronous */ + src = buf; + } } } else { src = buf; @@ -293,6 +317,8 @@ static int get_swap_writer(struct swap_map_handle *handle) goto err_rel; } handle->k = 0; + handle->nr_free_pages = nr_free_pages() >> 1; + handle->written = 0; handle->first_sector = handle->cur_swap; return 0; err_rel: @@ -316,20 +342,23 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, return error; handle->cur->entries[handle->k++] = offset; if (handle->k >= MAP_PAGE_ENTRIES) { - error = hib_wait_on_bio_chain(bio_chain); - if (error) - goto out; offset = alloc_swapdev_block(root_swap); if (!offset) return -ENOSPC; handle->cur->next_swap = offset; - error = write_page(handle->cur, handle->cur_swap, NULL); + error = write_page(handle->cur, handle->cur_swap, bio_chain); if (error) goto out; clear_page(handle->cur); handle->cur_swap = offset; handle->k = 0; } + if (bio_chain && ++handle->written > handle->nr_free_pages) { + error = hib_wait_on_bio_chain(bio_chain); + if (error) + goto out; + handle->written = 0; + } out: return error; } @@ -372,6 +401,13 @@ static int swap_writer_finish(struct swap_map_handle *handle, LZO_HEADER, PAGE_SIZE) #define LZO_CMP_SIZE (LZO_CMP_PAGES * PAGE_SIZE) +/* Maximum number of threads for compression/decompression. */ +#define LZO_THREADS 3 + +/* Maximum number of pages for read buffering. */ +#define LZO_READ_PAGES (MAP_PAGE_ENTRIES * 8) + + /** * save_image - save the suspend image data */ @@ -419,6 +455,92 @@ static int save_image(struct swap_map_handle *handle, return ret; } +/** + * Structure used for CRC32. + */ +struct crc_data { + struct task_struct *thr; /* thread */ + atomic_t ready; /* ready to start flag */ + atomic_t stop; /* ready to stop flag */ + unsigned run_threads; /* nr current threads */ + wait_queue_head_t go; /* start crc update */ + wait_queue_head_t done; /* crc update done */ + u32 *crc32; /* points to handle's crc32 */ + size_t *unc_len[LZO_THREADS]; /* uncompressed lengths */ + unsigned char *unc[LZO_THREADS]; /* uncompressed data */ +}; + +/** + * CRC32 update function that runs in its own thread. + */ +static int crc32_threadfn(void *data) +{ + struct crc_data *d = data; + unsigned i; + + while (1) { + wait_event(d->go, atomic_read(&d->ready) || + kthread_should_stop()); + if (kthread_should_stop()) { + d->thr = NULL; + atomic_set(&d->stop, 1); + wake_up(&d->done); + break; + } + atomic_set(&d->ready, 0); + + for (i = 0; i < d->run_threads; i++) + *d->crc32 = crc32_le(*d->crc32, + d->unc[i], *d->unc_len[i]); + atomic_set(&d->stop, 1); + wake_up(&d->done); + } + return 0; +} +/** + * Structure used for LZO data compression. + */ +struct cmp_data { + struct task_struct *thr; /* thread */ + atomic_t ready; /* ready to start flag */ + atomic_t stop; /* ready to stop flag */ + int ret; /* return code */ + wait_queue_head_t go; /* start compression */ + wait_queue_head_t done; /* compression done */ + size_t unc_len; /* uncompressed length */ + size_t cmp_len; /* compressed length */ + unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */ + unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */ + unsigned char wrk[LZO1X_1_MEM_COMPRESS]; /* compression workspace */ +}; + +/** + * Compression function that runs in its own thread. + */ +static int lzo_compress_threadfn(void *data) +{ + struct cmp_data *d = data; + + while (1) { + wait_event(d->go, atomic_read(&d->ready) || + kthread_should_stop()); + if (kthread_should_stop()) { + d->thr = NULL; + d->ret = -1; + atomic_set(&d->stop, 1); + wake_up(&d->done); + break; + } + atomic_set(&d->ready, 0); + + d->ret = lzo1x_1_compress(d->unc, d->unc_len, + d->cmp + LZO_HEADER, &d->cmp_len, + d->wrk); + atomic_set(&d->stop, 1); + wake_up(&d->done); + } + return 0; +} /** * save_image_lzo - Save the suspend image data compressed with LZO. @@ -437,42 +559,93 @@ static int save_image_lzo(struct swap_map_handle *handle, struct bio *bio; struct timeval start; struct timeval stop; - size_t off, unc_len, cmp_len; - unsigned char *unc, *cmp, *wrk, *page; + size_t off; + unsigned thr, run_threads, nr_threads; + unsigned char *page = NULL; + struct cmp_data *data = NULL; + struct crc_data *crc = NULL; + + /* + * We'll limit the number of threads for compression to limit memory + * footprint. + */ + nr_threads = num_online_cpus() - 1; + nr_threads = clamp_val(nr_threads, 1, LZO_THREADS); page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); if (!page) { printk(KERN_ERR "PM: Failed to allocate LZO page\n"); - return -ENOMEM; + ret = -ENOMEM; + goto out_clean; } - wrk = vmalloc(LZO1X_1_MEM_COMPRESS); - if (!wrk) { - printk(KERN_ERR "PM: Failed to allocate LZO workspace\n"); - free_page((unsigned long)page); - return -ENOMEM; + data = vmalloc(sizeof(*data) * nr_threads); + if (!data) { + printk(KERN_ERR "PM: Failed to allocate LZO data\n"); + ret = -ENOMEM; + goto out_clean; } + for (thr = 0; thr < nr_threads; thr++) + memset(&data[thr], 0, offsetof(struct cmp_data, go)); - unc = vmalloc(LZO_UNC_SIZE); - if (!unc) { - printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n"); - vfree(wrk); - free_page((unsigned long)page); - return -ENOMEM; + crc = kmalloc(sizeof(*crc), GFP_KERNEL); + if (!crc) { + printk(KERN_ERR "PM: Failed to allocate crc\n"); + ret = -ENOMEM; + goto out_clean; + } + memset(crc, 0, offsetof(struct crc_data, go)); + + /* + * Start the compression threads. + */ + for (thr = 0; thr < nr_threads; thr++) { + init_waitqueue_head(&data[thr].go); + init_waitqueue_head(&data[thr].done); + + data[thr].thr = kthread_run(lzo_compress_threadfn, + &data[thr], + "image_compress/%u", thr); + if (IS_ERR(data[thr].thr)) { + data[thr].thr = NULL; + printk(KERN_ERR + "PM: Cannot start compression threads\n"); + ret = -ENOMEM; + goto out_clean; + } } - cmp = vmalloc(LZO_CMP_SIZE); - if (!cmp) { - printk(KERN_ERR "PM: Failed to allocate LZO compressed\n"); - vfree(unc); - vfree(wrk); - free_page((unsigned long)page); - return -ENOMEM; + /* + * Adjust number of free pages after all allocations have been done. + * We don't want to run out of pages when writing. + */ + handle->nr_free_pages = nr_free_pages() >> 1; + + /* + * Start the CRC32 thread. + */ + init_waitqueue_head(&crc->go); + init_waitqueue_head(&crc->done); + + handle->crc32 = 0; + crc->crc32 = &handle->crc32; + for (thr = 0; thr < nr_threads; thr++) { + crc->unc[thr] = data[thr].unc; + crc->unc_len[thr] = &data[thr].unc_len; + } + + crc->thr = kthread_run(crc32_threadfn, crc, "image_crc32"); + if (IS_ERR(crc->thr)) { + crc->thr = NULL; + printk(KERN_ERR "PM: Cannot start CRC32 thread\n"); + ret = -ENOMEM; + goto out_clean; } printk(KERN_INFO + "PM: Using %u thread(s) for compression.\n" "PM: Compressing and saving image data (%u pages) ... ", - nr_to_write); + nr_threads, nr_to_write); m = nr_to_write / 100; if (!m) m = 1; @@ -480,55 +653,83 @@ static int save_image_lzo(struct swap_map_handle *handle, bio = NULL; do_gettimeofday(&start); for (;;) { - for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) { - ret = snapshot_read_next(snapshot); - if (ret < 0) - goto out_finish; - - if (!ret) + for (thr = 0; thr < nr_threads; thr++) { + for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) { + ret = snapshot_read_next(snapshot); + if (ret < 0) + goto out_finish; + + if (!ret) + break; + + memcpy(data[thr].unc + off, + data_of(*snapshot), PAGE_SIZE); + + if (!(nr_pages % m)) + printk(KERN_CONT "\b\b\b\b%3d%%", + nr_pages / m); + nr_pages++; + } + if (!off) break; - memcpy(unc + off, data_of(*snapshot), PAGE_SIZE); + data[thr].unc_len = off; - if (!(nr_pages % m)) - printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m); - nr_pages++; + atomic_set(&data[thr].ready, 1); + wake_up(&data[thr].go); } - if (!off) + if (!thr) break; - unc_len = off; - ret = lzo1x_1_compress(unc, unc_len, - cmp + LZO_HEADER, &cmp_len, wrk); - if (ret < 0) { - printk(KERN_ERR "PM: LZO compression failed\n"); - break; - } + crc->run_threads = thr; + atomic_set(&crc->ready, 1); + wake_up(&crc->go); - if (unlikely(!cmp_len || - cmp_len > lzo1x_worst_compress(unc_len))) { - printk(KERN_ERR "PM: Invalid LZO compressed length\n"); - ret = -1; - break; - } + for (run_threads = thr, thr = 0; thr < run_threads; thr++) { + wait_event(data[thr].done, + atomic_read(&data[thr].stop)); + atomic_set(&data[thr].stop, 0); - *(size_t *)cmp = cmp_len; + ret = data[thr].ret; - /* - * Given we are writing one page at a time to disk, we copy - * that much from the buffer, although the last bit will likely - * be smaller than full page. This is OK - we saved the length - * of the compressed data, so any garbage at the end will be - * discarded when we read it. - */ - for (off = 0; off < LZO_HEADER + cmp_len; off += PAGE_SIZE) { - memcpy(page, cmp + off, PAGE_SIZE); + if (ret < 0) { + printk(KERN_ERR "PM: LZO compression failed\n"); + goto out_finish; + } - ret = swap_write_page(handle, page, &bio); - if (ret) + if (unlikely(!data[thr].cmp_len || + data[thr].cmp_len > + lzo1x_worst_compress(data[thr].unc_len))) { + printk(KERN_ERR + "PM: Invalid LZO compressed length\n"); + ret = -1; goto out_finish; + } + + *(size_t *)data[thr].cmp = data[thr].cmp_len; + + /* + * Given we are writing one page at a time to disk, we + * copy that much from the buffer, although the last + * bit will likely be smaller than full page. This is + * OK - we saved the length of the compressed data, so + * any garbage at the end will be discarded when we + * read it. + */ + for (off = 0; + off < LZO_HEADER + data[thr].cmp_len; + off += PAGE_SIZE) { + memcpy(page, data[thr].cmp + off, PAGE_SIZE); + + ret = swap_write_page(handle, page, &bio); + if (ret) + goto out_finish; + } } + + wait_event(crc->done, atomic_read(&crc->stop)); + atomic_set(&crc->stop, 0); } out_finish: @@ -536,16 +737,25 @@ out_finish: do_gettimeofday(&stop); if (!ret) ret = err2; - if (!ret) + if (!ret) { printk(KERN_CONT "\b\b\b\bdone\n"); - else + } else { printk(KERN_CONT "\n"); + } swsusp_show_speed(&start, &stop, nr_to_write, "Wrote"); - - vfree(cmp); - vfree(unc); - vfree(wrk); - free_page((unsigned long)page); +out_clean: + if (crc) { + if (crc->thr) + kthread_stop(crc->thr); + kfree(crc); + } + if (data) { + for (thr = 0; thr < nr_threads; thr++) + if (data[thr].thr) + kthread_stop(data[thr].thr); + vfree(data); + } + if (page) free_page((unsigned long)page); return ret; } @@ -625,8 +835,15 @@ out_finish: static void release_swap_reader(struct swap_map_handle *handle) { - if (handle->cur) - free_page((unsigned long)handle->cur); + struct swap_map_page_list *tmp; + + while (handle->maps) { + if (handle->maps->map) + free_page((unsigned long)handle->maps->map); + tmp = handle->maps; + handle->maps = handle->maps->next; + kfree(tmp); + } handle->cur = NULL; } @@ -634,22 +851,46 @@ static int get_swap_reader(struct swap_map_handle *handle, unsigned int *flags_p) { int error; + struct swap_map_page_list *tmp, *last; + sector_t offset; *flags_p = swsusp_header->flags; if (!swsusp_header->image) /* how can this happen? */ return -EINVAL; - handle->cur = (struct swap_map_page *)get_zeroed_page(__GFP_WAIT | __GFP_HIGH); - if (!handle->cur) - return -ENOMEM; + handle->cur = NULL; + last = handle->maps = NULL; + offset = swsusp_header->image; + while (offset) { + tmp = kmalloc(sizeof(*handle->maps), GFP_KERNEL); + if (!tmp) { + release_swap_reader(handle); + return -ENOMEM; + } + memset(tmp, 0, sizeof(*tmp)); + if (!handle->maps) + handle->maps = tmp; + if (last) + last->next = tmp; + last = tmp; + + tmp->map = (struct swap_map_page *) + __get_free_page(__GFP_WAIT | __GFP_HIGH); + if (!tmp->map) { + release_swap_reader(handle); + return -ENOMEM; + } - error = hib_bio_read_page(swsusp_header->image, handle->cur, NULL); - if (error) { - release_swap_reader(handle); - return error; + error = hib_bio_read_page(offset, tmp->map, NULL); + if (error) { + release_swap_reader(handle); + return error; + } + offset = tmp->map->next_swap; } handle->k = 0; + handle->cur = handle->maps->map; return 0; } @@ -658,6 +899,7 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf, { sector_t offset; int error; + struct swap_map_page_list *tmp; if (!handle->cur) return -EINVAL; @@ -668,13 +910,15 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf, if (error) return error; if (++handle->k >= MAP_PAGE_ENTRIES) { - error = hib_wait_on_bio_chain(bio_chain); handle->k = 0; - offset = handle->cur->next_swap; - if (!offset) + free_page((unsigned long)handle->maps->map); + tmp = handle->maps; + handle->maps = handle->maps->next; + kfree(tmp); + if (!handle->maps) release_swap_reader(handle); - else if (!error) - error = hib_bio_read_page(offset, handle->cur, NULL); + else + handle->cur = handle->maps->map; } return error; } @@ -697,7 +941,7 @@ static int load_image(struct swap_map_handle *handle, unsigned int nr_to_read) { unsigned int m; - int error = 0; + int ret = 0; struct timeval start; struct timeval stop; struct bio *bio; @@ -713,15 +957,15 @@ static int load_image(struct swap_map_handle *handle, bio = NULL; do_gettimeofday(&start); for ( ; ; ) { - error = snapshot_write_next(snapshot); - if (error <= 0) + ret = snapshot_write_next(snapshot); + if (ret <= 0) break; - error = swap_read_page(handle, data_of(*snapshot), &bio); - if (error) + ret = swap_read_page(handle, data_of(*snapshot), &bio); + if (ret) break; if (snapshot->sync_read) - error = hib_wait_on_bio_chain(&bio); - if (error) + ret = hib_wait_on_bio_chain(&bio); + if (ret) break; if (!(nr_pages % m)) printk("\b\b\b\b%3d%%", nr_pages / m); @@ -729,17 +973,61 @@ static int load_image(struct swap_map_handle *handle, } err2 = hib_wait_on_bio_chain(&bio); do_gettimeofday(&stop); - if (!error) - error = err2; - if (!error) { + if (!ret) + ret = err2; + if (!ret) { printk("\b\b\b\bdone\n"); snapshot_write_finalize(snapshot); if (!snapshot_image_loaded(snapshot)) - error = -ENODATA; + ret = -ENODATA; } else printk("\n"); swsusp_show_speed(&start, &stop, nr_to_read, "Read"); - return error; + return ret; +} + +/** + * Structure used for LZO data decompression. + */ +struct dec_data { + struct task_struct *thr; /* thread */ + atomic_t ready; /* ready to start flag */ + atomic_t stop; /* ready to stop flag */ + int ret; /* return code */ + wait_queue_head_t go; /* start decompression */ + wait_queue_head_t done; /* decompression done */ + size_t unc_len; /* uncompressed length */ + size_t cmp_len; /* compressed length */ + unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */ + unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */ +}; + +/** + * Deompression function that runs in its own thread. + */ +static int lzo_decompress_threadfn(void *data) +{ + struct dec_data *d = data; + + while (1) { + wait_event(d->go, atomic_read(&d->ready) || + kthread_should_stop()); + if (kthread_should_stop()) { + d->thr = NULL; + d->ret = -1; + atomic_set(&d->stop, 1); + wake_up(&d->done); + break; + } + atomic_set(&d->ready, 0); + + d->unc_len = LZO_UNC_SIZE; + d->ret = lzo1x_decompress_safe(d->cmp + LZO_HEADER, d->cmp_len, + d->unc, &d->unc_len); + atomic_set(&d->stop, 1); + wake_up(&d->done); + } + return 0; } /** @@ -753,50 +1041,120 @@ static int load_image_lzo(struct swap_map_handle *handle, unsigned int nr_to_read) { unsigned int m; - int error = 0; + int ret = 0; + int eof = 0; struct bio *bio; struct timeval start; struct timeval stop; unsigned nr_pages; - size_t i, off, unc_len, cmp_len; - unsigned char *unc, *cmp, *page[LZO_CMP_PAGES]; - - for (i = 0; i < LZO_CMP_PAGES; i++) { - page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); - if (!page[i]) { - printk(KERN_ERR "PM: Failed to allocate LZO page\n"); + size_t off; + unsigned i, thr, run_threads, nr_threads; + unsigned ring = 0, pg = 0, ring_size = 0, + have = 0, want, need, asked = 0; + unsigned long read_pages; + unsigned char **page = NULL; + struct dec_data *data = NULL; + struct crc_data *crc = NULL; + + /* + * We'll limit the number of threads for decompression to limit memory + * footprint. + */ + nr_threads = num_online_cpus() - 1; + nr_threads = clamp_val(nr_threads, 1, LZO_THREADS); + + page = vmalloc(sizeof(*page) * LZO_READ_PAGES); + if (!page) { + printk(KERN_ERR "PM: Failed to allocate LZO page\n"); + ret = -ENOMEM; + goto out_clean; + } - while (i) - free_page((unsigned long)page[--i]); + data = vmalloc(sizeof(*data) * nr_threads); + if (!data) { + printk(KERN_ERR "PM: Failed to allocate LZO data\n"); + ret = -ENOMEM; + goto out_clean; + } + for (thr = 0; thr < nr_threads; thr++) + memset(&data[thr], 0, offsetof(struct dec_data, go)); - return -ENOMEM; + crc = kmalloc(sizeof(*crc), GFP_KERNEL); + if (!crc) { + printk(KERN_ERR "PM: Failed to allocate crc\n"); + ret = -ENOMEM; + goto out_clean; + } + memset(crc, 0, offsetof(struct crc_data, go)); + + /* + * Start the decompression threads. + */ + for (thr = 0; thr < nr_threads; thr++) { + init_waitqueue_head(&data[thr].go); + init_waitqueue_head(&data[thr].done); + + data[thr].thr = kthread_run(lzo_decompress_threadfn, + &data[thr], + "image_decompress/%u", thr); + if (IS_ERR(data[thr].thr)) { + data[thr].thr = NULL; + printk(KERN_ERR + "PM: Cannot start decompression threads\n"); + ret = -ENOMEM; + goto out_clean; } } - unc = vmalloc(LZO_UNC_SIZE); - if (!unc) { - printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n"); - - for (i = 0; i < LZO_CMP_PAGES; i++) - free_page((unsigned long)page[i]); - - return -ENOMEM; + /* + * Start the CRC32 thread. + */ + init_waitqueue_head(&crc->go); + init_waitqueue_head(&crc->done); + + handle->crc32 = 0; + crc->crc32 = &handle->crc32; + for (thr = 0; thr < nr_threads; thr++) { + crc->unc[thr] = data[thr].unc; + crc->unc_len[thr] = &data[thr].unc_len; } - cmp = vmalloc(LZO_CMP_SIZE); - if (!cmp) { - printk(KERN_ERR "PM: Failed to allocate LZO compressed\n"); + crc->thr = kthread_run(crc32_threadfn, crc, "image_crc32"); + if (IS_ERR(crc->thr)) { + crc->thr = NULL; + printk(KERN_ERR "PM: Cannot start CRC32 thread\n"); + ret = -ENOMEM; + goto out_clean; + } - vfree(unc); - for (i = 0; i < LZO_CMP_PAGES; i++) - free_page((unsigned long)page[i]); + /* + * Adjust number of pages for read buffering, in case we are short. + */ + read_pages = (nr_free_pages() - snapshot_get_image_size()) >> 1; + read_pages = clamp_val(read_pages, LZO_CMP_PAGES, LZO_READ_PAGES); - return -ENOMEM; + for (i = 0; i < read_pages; i++) { + page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ? + __GFP_WAIT | __GFP_HIGH : + __GFP_WAIT); + if (!page[i]) { + if (i < LZO_CMP_PAGES) { + ring_size = i; + printk(KERN_ERR + "PM: Failed to allocate LZO pages\n"); + ret = -ENOMEM; + goto out_clean; + } else { + break; + } + } } + want = ring_size = i; printk(KERN_INFO + "PM: Using %u thread(s) for decompression.\n" "PM: Loading and decompressing image data (%u pages) ... ", - nr_to_read); + nr_threads, nr_to_read); m = nr_to_read / 100; if (!m) m = 1; @@ -804,85 +1162,189 @@ static int load_image_lzo(struct swap_map_handle *handle, bio = NULL; do_gettimeofday(&start); - error = snapshot_write_next(snapshot); - if (error <= 0) + ret = snapshot_write_next(snapshot); + if (ret <= 0) goto out_finish; - for (;;) { - error = swap_read_page(handle, page[0], NULL); /* sync */ - if (error) - break; - - cmp_len = *(size_t *)page[0]; - if (unlikely(!cmp_len || - cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) { - printk(KERN_ERR "PM: Invalid LZO compressed length\n"); - error = -1; - break; + for(;;) { + for (i = 0; !eof && i < want; i++) { + ret = swap_read_page(handle, page[ring], &bio); + if (ret) { + /* + * On real read error, finish. On end of data, + * set EOF flag and just exit the read loop. + */ + if (handle->cur && + handle->cur->entries[handle->k]) { + goto out_finish; + } else { + eof = 1; + break; + } + } + if (++ring >= ring_size) + ring = 0; } + asked += i; + want -= i; - for (off = PAGE_SIZE, i = 1; - off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) { - error = swap_read_page(handle, page[i], &bio); - if (error) + /* + * We are out of data, wait for some more. + */ + if (!have) { + if (!asked) + break; + + ret = hib_wait_on_bio_chain(&bio); + if (ret) goto out_finish; + have += asked; + asked = 0; + if (eof) + eof = 2; } - error = hib_wait_on_bio_chain(&bio); /* need all data now */ - if (error) - goto out_finish; - - for (off = 0, i = 0; - off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) { - memcpy(cmp + off, page[i], PAGE_SIZE); + if (crc->run_threads) { + wait_event(crc->done, atomic_read(&crc->stop)); + atomic_set(&crc->stop, 0); + crc->run_threads = 0; } - unc_len = LZO_UNC_SIZE; - error = lzo1x_decompress_safe(cmp + LZO_HEADER, cmp_len, - unc, &unc_len); - if (error < 0) { - printk(KERN_ERR "PM: LZO decompression failed\n"); - break; + for (thr = 0; have && thr < nr_threads; thr++) { + data[thr].cmp_len = *(size_t *)page[pg]; + if (unlikely(!data[thr].cmp_len || + data[thr].cmp_len > + lzo1x_worst_compress(LZO_UNC_SIZE))) { + printk(KERN_ERR + "PM: Invalid LZO compressed length\n"); + ret = -1; + goto out_finish; + } + + need = DIV_ROUND_UP(data[thr].cmp_len + LZO_HEADER, + PAGE_SIZE); + if (need > have) { + if (eof > 1) { + ret = -1; + goto out_finish; + } + break; + } + + for (off = 0; + off < LZO_HEADER + data[thr].cmp_len; + off += PAGE_SIZE) { + memcpy(data[thr].cmp + off, + page[pg], PAGE_SIZE); + have--; + want++; + if (++pg >= ring_size) + pg = 0; + } + + atomic_set(&data[thr].ready, 1); + wake_up(&data[thr].go); } - if (unlikely(!unc_len || - unc_len > LZO_UNC_SIZE || - unc_len & (PAGE_SIZE - 1))) { - printk(KERN_ERR "PM: Invalid LZO uncompressed length\n"); - error = -1; - break; + /* + * Wait for more data while we are decompressing. + */ + if (have < LZO_CMP_PAGES && asked) { + ret = hib_wait_on_bio_chain(&bio); + if (ret) + goto out_finish; + have += asked; + asked = 0; + if (eof) + eof = 2; } - for (off = 0; off < unc_len; off += PAGE_SIZE) { - memcpy(data_of(*snapshot), unc + off, PAGE_SIZE); + for (run_threads = thr, thr = 0; thr < run_threads; thr++) { + wait_event(data[thr].done, + atomic_read(&data[thr].stop)); + atomic_set(&data[thr].stop, 0); + + ret = data[thr].ret; - if (!(nr_pages % m)) - printk("\b\b\b\b%3d%%", nr_pages / m); - nr_pages++; + if (ret < 0) { + printk(KERN_ERR + "PM: LZO decompression failed\n"); + goto out_finish; + } - error = snapshot_write_next(snapshot); - if (error <= 0) + if (unlikely(!data[thr].unc_len || + data[thr].unc_len > LZO_UNC_SIZE || + data[thr].unc_len & (PAGE_SIZE - 1))) { + printk(KERN_ERR + "PM: Invalid LZO uncompressed length\n"); + ret = -1; goto out_finish; + } + + for (off = 0; + off < data[thr].unc_len; off += PAGE_SIZE) { + memcpy(data_of(*snapshot), + data[thr].unc + off, PAGE_SIZE); + + if (!(nr_pages % m)) + printk("\b\b\b\b%3d%%", nr_pages / m); + nr_pages++; + + ret = snapshot_write_next(snapshot); + if (ret <= 0) { + crc->run_threads = thr + 1; + atomic_set(&crc->ready, 1); + wake_up(&crc->go); + goto out_finish; + } + } } + + crc->run_threads = thr; + atomic_set(&crc->ready, 1); + wake_up(&crc->go); } out_finish: + if (crc->run_threads) { + wait_event(crc->done, atomic_read(&crc->stop)); + atomic_set(&crc->stop, 0); + } do_gettimeofday(&stop); - if (!error) { + if (!ret) { printk("\b\b\b\bdone\n"); snapshot_write_finalize(snapshot); if (!snapshot_image_loaded(snapshot)) - error = -ENODATA; + ret = -ENODATA; + if (!ret) { + if (swsusp_header->flags & SF_CRC32_MODE) { + if(handle->crc32 != swsusp_header->crc32) { + printk(KERN_ERR + "PM: Invalid image CRC32!\n"); + ret = -ENODATA; + } + } + } } else printk("\n"); swsusp_show_speed(&start, &stop, nr_to_read, "Read"); - - vfree(cmp); - vfree(unc); - for (i = 0; i < LZO_CMP_PAGES; i++) +out_clean: + for (i = 0; i < ring_size; i++) free_page((unsigned long)page[i]); + if (crc) { + if (crc->thr) + kthread_stop(crc->thr); + kfree(crc); + } + if (data) { + for (thr = 0; thr < nr_threads; thr++) + if (data[thr].thr) + kthread_stop(data[thr].thr); + vfree(data); + } + if (page) vfree(page); - return error; + return ret; } /** diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 9de3ecfd20f..a70d2a5d8c7 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -744,20 +744,17 @@ int ptrace_request(struct task_struct *child, long request, break; si = child->last_siginfo; - if (unlikely(!si || si->si_code >> 8 != PTRACE_EVENT_STOP)) - break; - - child->jobctl |= JOBCTL_LISTENING; - - /* - * If NOTIFY is set, it means event happened between start - * of this trap and now. Trigger re-trap immediately. - */ - if (child->jobctl & JOBCTL_TRAP_NOTIFY) - signal_wake_up(child, true); - + if (likely(si && (si->si_code >> 8) == PTRACE_EVENT_STOP)) { + child->jobctl |= JOBCTL_LISTENING; + /* + * If NOTIFY is set, it means event happened between + * start of this trap and now. Trigger re-trap. + */ + if (child->jobctl & JOBCTL_TRAP_NOTIFY) + signal_wake_up(child, true); + ret = 0; + } unlock_task_sighand(child, &flags); - ret = 0; break; case PTRACE_DETACH: /* detach a process that was attached. */ diff --git a/kernel/resource.c b/kernel/resource.c index 3b3cedc5259..c8dc249da5c 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -419,6 +419,9 @@ static int __find_resource(struct resource *root, struct resource *old, else tmp.end = root->end; + if (tmp.end < tmp.start) + goto next; + resource_clip(&tmp, constraint->min, constraint->max); arch_remove_reservations(&tmp); @@ -436,8 +439,10 @@ static int __find_resource(struct resource *root, struct resource *old, return 0; } } - if (!this) + +next: if (!this || this->end == root->end) break; + if (this != old) tmp.start = this->end + 1; this = this->sibling; diff --git a/kernel/sched.c b/kernel/sched.c index ec5f472bc5b..8aa00803c1e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1739,7 +1739,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) #ifdef CONFIG_SMP /* * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be - * successfuly executed on another CPU. We must ensure that updates of + * successfully executed on another CPU. We must ensure that updates of * per-task data have been completed by this moment. */ smp_wmb(); @@ -3725,30 +3725,6 @@ unsigned long long task_sched_runtime(struct task_struct *p) } /* - * Return sum_exec_runtime for the thread group. - * In case the task is currently running, return the sum plus current's - * pending runtime that have not been accounted yet. - * - * Note that the thread group might have other running tasks as well, - * so the return value not includes other pending runtime that other - * running tasks might have. - */ -unsigned long long thread_group_sched_runtime(struct task_struct *p) -{ - struct task_cputime totals; - unsigned long flags; - struct rq *rq; - u64 ns; - - rq = task_rq_lock(p, &flags); - thread_group_cputime(p, &totals); - ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq); - task_rq_unlock(rq, p, &flags); - - return ns; -} - -/* * Account user cpu time to a process. * @p: the process that the cpu time gets accounted to * @cputime: the cpu time spent in user space since the last update @@ -4372,7 +4348,7 @@ static inline void sched_submit_work(struct task_struct *tsk) blk_schedule_flush_plug(tsk); } -asmlinkage void schedule(void) +asmlinkage void __sched schedule(void) { struct task_struct *tsk = current; diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 97540f0c9e4..af1177858be 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -1050,7 +1050,7 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) */ if (curr && unlikely(rt_task(curr)) && (curr->rt.nr_cpus_allowed < 2 || - curr->prio < p->prio) && + curr->prio <= p->prio) && (p->rt.nr_cpus_allowed > 1)) { int target = find_lowest_rq(p); @@ -1581,7 +1581,7 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p) p->rt.nr_cpus_allowed > 1 && rt_task(rq->curr) && (rq->curr->rt.nr_cpus_allowed < 2 || - rq->curr->prio < p->prio)) + rq->curr->prio <= p->prio)) push_rt_tasks(rq); } diff --git a/kernel/signal.c b/kernel/signal.c index 291c9700be7..d252be2d3de 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1344,13 +1344,24 @@ int kill_proc_info(int sig, struct siginfo *info, pid_t pid) return error; } +static int kill_as_cred_perm(const struct cred *cred, + struct task_struct *target) +{ + const struct cred *pcred = __task_cred(target); + if (cred->user_ns != pcred->user_ns) + return 0; + if (cred->euid != pcred->suid && cred->euid != pcred->uid && + cred->uid != pcred->suid && cred->uid != pcred->uid) + return 0; + return 1; +} + /* like kill_pid_info(), but doesn't use uid/euid of "current" */ -int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid, - uid_t uid, uid_t euid, u32 secid) +int kill_pid_info_as_cred(int sig, struct siginfo *info, struct pid *pid, + const struct cred *cred, u32 secid) { int ret = -EINVAL; struct task_struct *p; - const struct cred *pcred; unsigned long flags; if (!valid_signal(sig)) @@ -1362,10 +1373,7 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid, ret = -ESRCH; goto out_unlock; } - pcred = __task_cred(p); - if (si_fromuser(info) && - euid != pcred->suid && euid != pcred->uid && - uid != pcred->suid && uid != pcred->uid) { + if (si_fromuser(info) && !kill_as_cred_perm(cred, p)) { ret = -EPERM; goto out_unlock; } @@ -1384,7 +1392,7 @@ out_unlock: rcu_read_unlock(); return ret; } -EXPORT_SYMBOL_GPL(kill_pid_info_as_uid); +EXPORT_SYMBOL_GPL(kill_pid_info_as_cred); /* * kill_something_info() interprets pid in interesting ways just like kill(2). diff --git a/kernel/sys.c b/kernel/sys.c index 18ee1d2f647..58459509b14 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1172,7 +1172,7 @@ DECLARE_RWSEM(uts_sem); static int override_release(char __user *release, int len) { int ret = 0; - char buf[len]; + char buf[65]; if (current->personality & UNAME26) { char *rest = UTS_RELEASE; @@ -1759,6 +1759,7 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, sizeof(me->comm) - 1) < 0) return -EFAULT; set_task_comm(me, comm); + proc_comm_connector(me); return 0; case PR_GET_NAME: get_task_comm(comm, me); diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index e8bffbe2ba4..6318b511afa 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -214,7 +214,7 @@ static const struct bin_table bin_net_ipv4_route_table[] = { { CTL_INT, NET_IPV4_ROUTE_GC_MIN_INTERVAL, "gc_min_interval" }, { CTL_INT, NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS, "gc_min_interval_ms" }, { CTL_INT, NET_IPV4_ROUTE_GC_TIMEOUT, "gc_timeout" }, - { CTL_INT, NET_IPV4_ROUTE_GC_INTERVAL, "gc_interval" }, + /* NET_IPV4_ROUTE_GC_INTERVAL "gc_interval" no longer used */ { CTL_INT, NET_IPV4_ROUTE_REDIRECT_LOAD, "redirect_load" }, { CTL_INT, NET_IPV4_ROUTE_REDIRECT_NUMBER, "redirect_number" }, { CTL_INT, NET_IPV4_ROUTE_REDIRECT_SILENCE, "redirect_silence" }, diff --git a/kernel/taskstats.c b/kernel/taskstats.c index e19ce1454ee..e66046456f4 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -655,6 +655,7 @@ static struct genl_ops taskstats_ops = { .cmd = TASKSTATS_CMD_GET, .doit = taskstats_user_cmd, .policy = taskstats_cmd_get_policy, + .flags = GENL_ADMIN_PERM, }; static struct genl_ops cgroupstats_ops = { diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 761c510a06c..f49405f842f 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -53,6 +53,9 @@ endif obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o obj-$(CONFIG_TRACEPOINTS) += power-traces.o +ifeq ($(CONFIG_PM_RUNTIME),y) +obj-$(CONFIG_TRACEPOINTS) += rpm-traces.o +endif ifeq ($(CONFIG_TRACING),y) obj-$(CONFIG_KGDB_KDB) += trace_kdb.o endif diff --git a/kernel/trace/rpm-traces.c b/kernel/trace/rpm-traces.c new file mode 100644 index 00000000000..4b3b5eaf94d --- /dev/null +++ b/kernel/trace/rpm-traces.c @@ -0,0 +1,20 @@ +/* + * Power trace points + * + * Copyright (C) 2009 Ming Lei <ming.lei@canonical.com> + */ + +#include <linux/string.h> +#include <linux/types.h> +#include <linux/workqueue.h> +#include <linux/sched.h> +#include <linux/module.h> +#include <linux/usb.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/rpm.h> + +EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_return_int); +EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_idle); +EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_suspend); +EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_resume); diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 24dc60d9fa1..5bbfac85866 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -78,6 +78,7 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) #define KB 1024 #define MB (1024*KB) +#define KB_MASK (~(KB-1)) /* * fill in extended accounting fields */ @@ -95,14 +96,14 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) stats->hiwater_vm = get_mm_hiwater_vm(mm) * PAGE_SIZE / KB; mmput(mm); } - stats->read_char = p->ioac.rchar; - stats->write_char = p->ioac.wchar; - stats->read_syscalls = p->ioac.syscr; - stats->write_syscalls = p->ioac.syscw; + stats->read_char = p->ioac.rchar & KB_MASK; + stats->write_char = p->ioac.wchar & KB_MASK; + stats->read_syscalls = p->ioac.syscr & KB_MASK; + stats->write_syscalls = p->ioac.syscw & KB_MASK; #ifdef CONFIG_TASK_IO_ACCOUNTING - stats->read_bytes = p->ioac.read_bytes; - stats->write_bytes = p->ioac.write_bytes; - stats->cancelled_write_bytes = p->ioac.cancelled_write_bytes; + stats->read_bytes = p->ioac.read_bytes & KB_MASK; + stats->write_bytes = p->ioac.write_bytes & KB_MASK; + stats->cancelled_write_bytes = p->ioac.cancelled_write_bytes & KB_MASK; #else stats->read_bytes = 0; stats->write_bytes = 0; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 25fb1b0e53f..1783aabc612 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2412,8 +2412,13 @@ reflush: for_each_cwq_cpu(cpu, wq) { struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); + bool drained; - if (!cwq->nr_active && list_empty(&cwq->delayed_works)) + spin_lock_irq(&cwq->gcwq->lock); + drained = !cwq->nr_active && list_empty(&cwq->delayed_works); + spin_unlock_irq(&cwq->gcwq->lock); + + if (drained) continue; if (++flush_cnt == 10 || |