summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/audit.c2
-rw-r--r--kernel/cgroup.c45
-rw-r--r--kernel/debug/kdb/kdb_main.c2
-rw-r--r--kernel/fork.c41
-rw-r--r--kernel/futex.c55
-rw-r--r--kernel/hrtimer.c2
-rw-r--r--kernel/irq/irqdesc.c40
-rw-r--r--kernel/kexec.c2
-rw-r--r--kernel/latencytop.c23
-rw-r--r--kernel/panic.c1
-rw-r--r--kernel/perf_event.c2
-rw-r--r--kernel/power/Kconfig5
-rw-r--r--kernel/power/Makefile1
-rw-r--r--kernel/power/hibernate.c4
-rw-r--r--kernel/power/nvs.c136
-rw-r--r--kernel/power/suspend.c4
-rw-r--r--kernel/power/swap.c7
-rw-r--r--kernel/printk.c30
-rw-r--r--kernel/sched.c2
-rw-r--r--kernel/smp.c19
-rw-r--r--kernel/softirq.c19
-rw-r--r--kernel/sys.c6
-rw-r--r--kernel/sysctl.c31
-rw-r--r--kernel/sysctl_binary.c2
-rw-r--r--kernel/taskstats.c2
-rw-r--r--kernel/time.c4
-rw-r--r--kernel/time/clocksource.c2
-rw-r--r--kernel/time/ntp.c425
-rw-r--r--kernel/time/timekeeping.c43
-rw-r--r--kernel/trace/blktrace.c37
-rw-r--r--kernel/trace/trace_entries.h2
-rw-r--r--kernel/user_namespace.c15
33 files changed, 672 insertions, 341 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 5669f71dfdd..353d3fe8ba3 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -43,7 +43,7 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
-obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o
+obj-$(CONFIG_SMP) += smp.o
ifneq ($(CONFIG_SMP),y)
obj-y += up.o
endif
diff --git a/kernel/audit.c b/kernel/audit.c
index 77770a034d5..e4956244ae5 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -400,7 +400,7 @@ static void kauditd_send_skb(struct sk_buff *skb)
if (err < 0) {
BUG_ON(err != -ECONNREFUSED); /* Shouldn't happen */
printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid);
- audit_log_lost("auditd dissapeared\n");
+ audit_log_lost("auditd disappeared\n");
audit_pid = 0;
/* we might get lucky and get this in the next auditd */
audit_hold_skb(skb);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 51cddc11cd8..b24d7027b83 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -763,9 +763,8 @@ EXPORT_SYMBOL_GPL(cgroup_unlock);
* -> cgroup_mkdir.
*/
-static struct dentry *cgroup_lookup(struct inode *dir,
- struct dentry *dentry, struct nameidata *nd);
static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);
+static struct dentry *cgroup_lookup(struct inode *, struct dentry *, struct nameidata *);
static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
static int cgroup_populate_dir(struct cgroup *cgrp);
static const struct inode_operations cgroup_dir_inode_operations;
@@ -862,6 +861,11 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
iput(inode);
}
+static int cgroup_delete(const struct dentry *d)
+{
+ return 1;
+}
+
static void remove_dir(struct dentry *d)
{
struct dentry *parent = dget(d->d_parent);
@@ -912,7 +916,7 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
parent = dentry->d_parent;
spin_lock(&parent->d_lock);
- spin_lock(&dentry->d_lock);
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
list_del_init(&dentry->d_u.d_child);
spin_unlock(&dentry->d_lock);
spin_unlock(&parent->d_lock);
@@ -1451,6 +1455,11 @@ static int cgroup_set_super(struct super_block *sb, void *data)
static int cgroup_get_rootdir(struct super_block *sb)
{
+ static const struct dentry_operations cgroup_dops = {
+ .d_iput = cgroup_diput,
+ .d_delete = cgroup_delete,
+ };
+
struct inode *inode =
cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
struct dentry *dentry;
@@ -1468,6 +1477,8 @@ static int cgroup_get_rootdir(struct super_block *sb)
return -ENOMEM;
}
sb->s_root = dentry;
+ /* for everything else we want ->d_op set */
+ sb->s_d_op = &cgroup_dops;
return 0;
}
@@ -2197,6 +2208,14 @@ static const struct inode_operations cgroup_dir_inode_operations = {
.rename = cgroup_rename,
};
+static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+{
+ if (dentry->d_name.len > NAME_MAX)
+ return ERR_PTR(-ENAMETOOLONG);
+ d_add(dentry, NULL);
+ return NULL;
+}
+
/*
* Check if a file is a control file
*/
@@ -2207,26 +2226,6 @@ static inline struct cftype *__file_cft(struct file *file)
return __d_cft(file->f_dentry);
}
-static int cgroup_delete_dentry(const struct dentry *dentry)
-{
- return 1;
-}
-
-static struct dentry *cgroup_lookup(struct inode *dir,
- struct dentry *dentry, struct nameidata *nd)
-{
- static const struct dentry_operations cgroup_dentry_operations = {
- .d_delete = cgroup_delete_dentry,
- .d_iput = cgroup_diput,
- };
-
- if (dentry->d_name.len > NAME_MAX)
- return ERR_PTR(-ENAMETOOLONG);
- d_set_d_op(dentry, &cgroup_dentry_operations);
- d_add(dentry, NULL);
- return NULL;
-}
-
static int cgroup_create_file(struct dentry *dentry, mode_t mode,
struct super_block *sb)
{
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index a6e72976682..bd3e8e29caa 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -2914,7 +2914,7 @@ static void __init kdb_cmd_init(void)
}
}
-/* Intialize kdb_printf, breakpoint tables and kdb state */
+/* Initialize kdb_printf, breakpoint tables and kdb state */
void __init kdb_init(int lvl)
{
static int kdb_init_lvl = KDB_NOT_INITIALIZED;
diff --git a/kernel/fork.c b/kernel/fork.c
index d9b44f20b6b..25e429152dd 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -66,6 +66,7 @@
#include <linux/posix-timers.h>
#include <linux/user-return-notifier.h>
#include <linux/oom.h>
+#include <linux/khugepaged.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -330,6 +331,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
retval = ksm_fork(mm, oldmm);
if (retval)
goto out;
+ retval = khugepaged_fork(mm, oldmm);
+ if (retval)
+ goto out;
prev = NULL;
for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
@@ -529,6 +533,9 @@ void __mmdrop(struct mm_struct *mm)
mm_free_pgd(mm);
destroy_context(mm);
mmu_notifier_mm_destroy(mm);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ VM_BUG_ON(mm->pmd_huge_pte);
+#endif
free_mm(mm);
}
EXPORT_SYMBOL_GPL(__mmdrop);
@@ -543,6 +550,7 @@ void mmput(struct mm_struct *mm)
if (atomic_dec_and_test(&mm->mm_users)) {
exit_aio(mm);
ksm_exit(mm);
+ khugepaged_exit(mm); /* must run before exit_mmap */
exit_mmap(mm);
set_mm_exe_file(mm, NULL);
if (!list_empty(&mm->mmlist)) {
@@ -669,6 +677,10 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
mm->token_priority = 0;
mm->last_interval = 0;
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ mm->pmd_huge_pte = NULL;
+#endif
+
if (!mm_init(mm, tsk))
goto fail_nomem;
@@ -910,6 +922,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
sig->oom_adj = current->signal->oom_adj;
sig->oom_score_adj = current->signal->oom_score_adj;
+ sig->oom_score_adj_min = current->signal->oom_score_adj_min;
mutex_init(&sig->cred_guard_mutex);
@@ -1410,23 +1423,6 @@ long do_fork(unsigned long clone_flags,
}
/*
- * We hope to recycle these flags after 2.6.26
- */
- if (unlikely(clone_flags & CLONE_STOPPED)) {
- static int __read_mostly count = 100;
-
- if (count > 0 && printk_ratelimit()) {
- char comm[TASK_COMM_LEN];
-
- count--;
- printk(KERN_INFO "fork(): process `%s' used deprecated "
- "clone flags 0x%lx\n",
- get_task_comm(comm, current),
- clone_flags & CLONE_STOPPED);
- }
- }
-
- /*
* When called from kernel_thread, don't do user tracing stuff.
*/
if (likely(user_mode(regs)))
@@ -1464,16 +1460,7 @@ long do_fork(unsigned long clone_flags,
*/
p->flags &= ~PF_STARTING;
- if (unlikely(clone_flags & CLONE_STOPPED)) {
- /*
- * We'll start up with an immediate SIGSTOP.
- */
- sigaddset(&p->pending.signal, SIGSTOP);
- set_tsk_thread_flag(p, TIF_SIGPENDING);
- __set_task_state(p, TASK_STOPPED);
- } else {
- wake_up_new_task(p, clone_flags);
- }
+ wake_up_new_task(p, clone_flags);
tracehook_report_clone_complete(trace, regs,
clone_flags, nr, p);
diff --git a/kernel/futex.c b/kernel/futex.c
index 3019b92e691..52075633373 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -233,7 +233,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
{
unsigned long address = (unsigned long)uaddr;
struct mm_struct *mm = current->mm;
- struct page *page;
+ struct page *page, *page_head;
int err;
/*
@@ -265,11 +265,46 @@ again:
if (err < 0)
return err;
- page = compound_head(page);
- lock_page(page);
- if (!page->mapping) {
- unlock_page(page);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ page_head = page;
+ if (unlikely(PageTail(page))) {
put_page(page);
+ /* serialize against __split_huge_page_splitting() */
+ local_irq_disable();
+ if (likely(__get_user_pages_fast(address, 1, 1, &page) == 1)) {
+ page_head = compound_head(page);
+ /*
+ * page_head is valid pointer but we must pin
+ * it before taking the PG_lock and/or
+ * PG_compound_lock. The moment we re-enable
+ * irqs __split_huge_page_splitting() can
+ * return and the head page can be freed from
+ * under us. We can't take the PG_lock and/or
+ * PG_compound_lock on a page that could be
+ * freed from under us.
+ */
+ if (page != page_head) {
+ get_page(page_head);
+ put_page(page);
+ }
+ local_irq_enable();
+ } else {
+ local_irq_enable();
+ goto again;
+ }
+ }
+#else
+ page_head = compound_head(page);
+ if (page != page_head) {
+ get_page(page_head);
+ put_page(page);
+ }
+#endif
+
+ lock_page(page_head);
+ if (!page_head->mapping) {
+ unlock_page(page_head);
+ put_page(page_head);
goto again;
}
@@ -280,20 +315,20 @@ again:
* it's a read-only handle, it's expected that futexes attach to
* the object not the particular process.
*/
- if (PageAnon(page)) {
+ if (PageAnon(page_head)) {
key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
key->private.mm = mm;
key->private.address = address;
} else {
key->both.offset |= FUT_OFF_INODE; /* inode-based key */
- key->shared.inode = page->mapping->host;
- key->shared.pgoff = page->index;
+ key->shared.inode = page_head->mapping->host;
+ key->shared.pgoff = page_head->index;
}
get_futex_key_refs(key);
- unlock_page(page);
- put_page(page);
+ unlock_page(page_head);
+ put_page(page_head);
return 0;
}
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 45da2b6920a..0c8d7c04861 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1745,7 +1745,7 @@ schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta,
}
/*
- * A NULL parameter means "inifinte"
+ * A NULL parameter means "infinite"
*/
if (!expires) {
schedule();
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 9988d03797f..282f20230e6 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -72,6 +72,8 @@ static inline int desc_node(struct irq_desc *desc) { return 0; }
static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
{
+ int cpu;
+
desc->irq_data.irq = irq;
desc->irq_data.chip = &no_irq_chip;
desc->irq_data.chip_data = NULL;
@@ -83,7 +85,8 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
desc->irq_count = 0;
desc->irqs_unhandled = 0;
desc->name = NULL;
- memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs)));
+ for_each_possible_cpu(cpu)
+ *per_cpu_ptr(desc->kstat_irqs, cpu) = 0;
desc_smp_init(desc, node);
}
@@ -133,8 +136,7 @@ static struct irq_desc *alloc_desc(int irq, int node)
if (!desc)
return NULL;
/* allocate based on nr_cpu_ids */
- desc->kstat_irqs = kzalloc_node(nr_cpu_ids * sizeof(*desc->kstat_irqs),
- gfp, node);
+ desc->kstat_irqs = alloc_percpu(unsigned int);
if (!desc->kstat_irqs)
goto err_desc;
@@ -149,7 +151,7 @@ static struct irq_desc *alloc_desc(int irq, int node)
return desc;
err_kstat:
- kfree(desc->kstat_irqs);
+ free_percpu(desc->kstat_irqs);
err_desc:
kfree(desc);
return NULL;
@@ -166,7 +168,7 @@ static void free_desc(unsigned int irq)
mutex_unlock(&sparse_irq_lock);
free_masks(desc);
- kfree(desc->kstat_irqs);
+ free_percpu(desc->kstat_irqs);
kfree(desc);
}
@@ -234,7 +236,6 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
}
};
-static unsigned int kstat_irqs_all[NR_IRQS][NR_CPUS];
int __init early_irq_init(void)
{
int count, i, node = first_online_node;
@@ -250,7 +251,8 @@ int __init early_irq_init(void)
for (i = 0; i < count; i++) {
desc[i].irq_data.irq = i;
desc[i].irq_data.chip = &no_irq_chip;
- desc[i].kstat_irqs = kstat_irqs_all[i];
+ /* TODO : do this allocation on-demand ... */
+ desc[i].kstat_irqs = alloc_percpu(unsigned int);
alloc_masks(desc + i, GFP_KERNEL, node);
desc_smp_init(desc + i, node);
lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
@@ -275,6 +277,22 @@ static void free_desc(unsigned int irq)
static inline int alloc_descs(unsigned int start, unsigned int cnt, int node)
{
+#if defined(CONFIG_KSTAT_IRQS_ONDEMAND)
+ struct irq_desc *desc;
+ unsigned int i;
+
+ for (i = 0; i < cnt; i++) {
+ desc = irq_to_desc(start + i);
+ if (desc && !desc->kstat_irqs) {
+ unsigned int __percpu *stats = alloc_percpu(unsigned int);
+
+ if (!stats)
+ return -1;
+ if (cmpxchg(&desc->kstat_irqs, NULL, stats) != NULL)
+ free_percpu(stats);
+ }
+ }
+#endif
return start;
}
#endif /* !CONFIG_SPARSE_IRQ */
@@ -391,7 +409,9 @@ void dynamic_irq_cleanup(unsigned int irq)
unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
{
struct irq_desc *desc = irq_to_desc(irq);
- return desc ? desc->kstat_irqs[cpu] : 0;
+
+ return desc && desc->kstat_irqs ?
+ *per_cpu_ptr(desc->kstat_irqs, cpu) : 0;
}
#ifdef CONFIG_GENERIC_HARDIRQS
@@ -401,10 +421,10 @@ unsigned int kstat_irqs(unsigned int irq)
int cpu;
int sum = 0;
- if (!desc)
+ if (!desc || !desc->kstat_irqs)
return 0;
for_each_possible_cpu(cpu)
- sum += desc->kstat_irqs[cpu];
+ sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
return sum;
}
#endif /* CONFIG_GENERIC_HARDIRQS */
diff --git a/kernel/kexec.c b/kernel/kexec.c
index b55045bc756..ec19b92c7eb 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -163,7 +163,7 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
* just verifies it is an address we can use.
*
* Since the kernel does everything in page size chunks ensure
- * the destination addreses are page aligned. Too many
+ * the destination addresses are page aligned. Too many
* special cases crop of when we don't do this. The most
* insidious is getting overlapping destination addresses
* simply because addresses are changed to page size
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index 17110a4a4fc..ee74b35e528 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -241,24 +241,19 @@ static int lstats_show(struct seq_file *m, void *v)
seq_puts(m, "Latency Top version : v0.1\n");
for (i = 0; i < MAXLR; i++) {
- if (latency_record[i].backtrace[0]) {
+ struct latency_record *lr = &latency_record[i];
+
+ if (lr->backtrace[0]) {
int q;
- seq_printf(m, "%i %lu %lu ",
- latency_record[i].count,
- latency_record[i].time,
- latency_record[i].max);
+ seq_printf(m, "%i %lu %lu",
+ lr->count, lr->time, lr->max);
for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
- char sym[KSYM_SYMBOL_LEN];
- char *c;
- if (!latency_record[i].backtrace[q])
+ unsigned long bt = lr->backtrace[q];
+ if (!bt)
break;
- if (latency_record[i].backtrace[q] == ULONG_MAX)
+ if (bt == ULONG_MAX)
break;
- sprint_symbol(sym, latency_record[i].backtrace[q]);
- c = strchr(sym, '+');
- if (c)
- *c = 0;
- seq_printf(m, "%s ", sym);
+ seq_printf(m, " %ps", (void *)bt);
}
seq_printf(m, "\n");
}
diff --git a/kernel/panic.c b/kernel/panic.c
index 4c13b1a88eb..991bb87a170 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -34,6 +34,7 @@ static int pause_on_oops_flag;
static DEFINE_SPINLOCK(pause_on_oops_lock);
int panic_timeout;
+EXPORT_SYMBOL_GPL(panic_timeout);
ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index b782b7a79f0..05ebe841270 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4664,7 +4664,7 @@ int perf_swevent_get_recursion_context(void)
}
EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
-void inline perf_swevent_put_recursion_context(int rctx)
+inline void perf_swevent_put_recursion_context(int rctx)
{
struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index a5aff3ebad3..265729966ec 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -100,13 +100,9 @@ config PM_SLEEP_ADVANCED_DEBUG
depends on PM_ADVANCED_DEBUG
default n
-config SUSPEND_NVS
- bool
-
config SUSPEND
bool "Suspend to RAM and standby"
depends on PM && ARCH_SUSPEND_POSSIBLE
- select SUSPEND_NVS if HAS_IOMEM
default y
---help---
Allow the system to enter sleep states in which main memory is
@@ -140,7 +136,6 @@ config HIBERNATION
depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE
select LZO_COMPRESS
select LZO_DECOMPRESS
- select SUSPEND_NVS if HAS_IOMEM
---help---
Enable the suspend to disk (STD) functionality, which is usually
called "hibernation" in user interfaces. STD checkpoints the
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index b75597235d8..c350e18b53e 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -7,6 +7,5 @@ obj-$(CONFIG_SUSPEND) += suspend.o
obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o
obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \
block_io.o
-obj-$(CONFIG_SUSPEND_NVS) += nvs.o
obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 870f72bc72a..1832bd26421 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -51,14 +51,14 @@ enum {
static int hibernation_mode = HIBERNATION_SHUTDOWN;
-static struct platform_hibernation_ops *hibernation_ops;
+static const struct platform_hibernation_ops *hibernation_ops;
/**
* hibernation_set_ops - set the global hibernate operations
* @ops: the hibernation operations to use in subsequent hibernation transitions
*/
-void hibernation_set_ops(struct platform_hibernation_ops *ops)
+void hibernation_set_ops(const struct platform_hibernation_ops *ops)
{
if (ops && !(ops->begin && ops->end && ops->pre_snapshot
&& ops->prepare && ops->finish && ops->enter && ops->pre_restore
diff --git a/kernel/power/nvs.c b/kernel/power/nvs.c
deleted file mode 100644
index 1836db60bbb..00000000000
--- a/kernel/power/nvs.c
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * linux/kernel/power/hibernate_nvs.c - Routines for handling NVS memory
- *
- * Copyright (C) 2008,2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
- *
- * This file is released under the GPLv2.
- */
-
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/suspend.h>
-
-/*
- * Platforms, like ACPI, may want us to save some memory used by them during
- * suspend and to restore the contents of this memory during the subsequent
- * resume. The code below implements a mechanism allowing us to do that.
- */
-
-struct nvs_page {
- unsigned long phys_start;
- unsigned int size;
- void *kaddr;
- void *data;
- struct list_head node;
-};
-
-static LIST_HEAD(nvs_list);
-
-/**
- * suspend_nvs_register - register platform NVS memory region to save
- * @start - physical address of the region
- * @size - size of the region
- *
- * The NVS region need not be page-aligned (both ends) and we arrange
- * things so that the data from page-aligned addresses in this region will
- * be copied into separate RAM pages.
- */
-int suspend_nvs_register(unsigned long start, unsigned long size)
-{
- struct nvs_page *entry, *next;
-
- while (size > 0) {
- unsigned int nr_bytes;
-
- entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL);
- if (!entry)
- goto Error;
-
- list_add_tail(&entry->node, &nvs_list);
- entry->phys_start = start;
- nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK);
- entry->size = (size < nr_bytes) ? size : nr_bytes;
-
- start += entry->size;
- size -= entry->size;
- }
- return 0;
-
- Error:
- list_for_each_entry_safe(entry, next, &nvs_list, node) {
- list_del(&entry->node);
- kfree(entry);
- }
- return -ENOMEM;
-}
-
-/**
- * suspend_nvs_free - free data pages allocated for saving NVS regions
- */
-void suspend_nvs_free(void)
-{
- struct nvs_page *entry;
-
- list_for_each_entry(entry, &nvs_list, node)
- if (entry->data) {
- free_page((unsigned long)entry->data);
- entry->data = NULL;
- if (entry->kaddr) {
- iounmap(entry->kaddr);
- entry->kaddr = NULL;
- }
- }
-}
-
-/**
- * suspend_nvs_alloc - allocate memory necessary for saving NVS regions
- */
-int suspend_nvs_alloc(void)
-{
- struct nvs_page *entry;
-
- list_for_each_entry(entry, &nvs_list, node) {
- entry->data = (void *)__get_free_page(GFP_KERNEL);
- if (!entry->data) {
- suspend_nvs_free();
- return -ENOMEM;
- }
- }
- return 0;
-}
-
-/**
- * suspend_nvs_save - save NVS memory regions
- */
-void suspend_nvs_save(void)
-{
- struct nvs_page *entry;
-
- printk(KERN_INFO "PM: Saving platform NVS memory\n");
-
- list_for_each_entry(entry, &nvs_list, node)
- if (entry->data) {
- entry->kaddr = ioremap(entry->phys_start, entry->size);
- memcpy(entry->data, entry->kaddr, entry->size);
- }
-}
-
-/**
- * suspend_nvs_restore - restore NVS memory regions
- *
- * This function is going to be called with interrupts disabled, so it
- * cannot iounmap the virtual addresses used to access the NVS region.
- */
-void suspend_nvs_restore(void)
-{
- struct nvs_page *entry;
-
- printk(KERN_INFO "PM: Restoring platform NVS memory\n");
-
- list_for_each_entry(entry, &nvs_list, node)
- if (entry->data)
- memcpy(entry->kaddr, entry->data, entry->size);
-}
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 8850df68794..de6f86bfa30 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -31,13 +31,13 @@ const char *const pm_states[PM_SUSPEND_MAX] = {
[PM_SUSPEND_MEM] = "mem",
};
-static struct platform_suspend_ops *suspend_ops;
+static const struct platform_suspend_ops *suspend_ops;
/**
* suspend_set_ops - Set the global suspend method table.
* @ops: Pointer to ops structure.
*/
-void suspend_set_ops(struct platform_suspend_ops *ops)
+void suspend_set_ops(const struct platform_suspend_ops *ops)
{
mutex_lock(&pm_mutex);
suspend_ops = ops;
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 8c7e4832b9b..7c97c3a0eee 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -224,7 +224,7 @@ static int swsusp_swap_check(void)
return res;
root_swap = res;
- res = blkdev_get(hib_resume_bdev, FMODE_WRITE);
+ res = blkdev_get(hib_resume_bdev, FMODE_WRITE, NULL);
if (res)
return res;
@@ -888,7 +888,7 @@ out_finish:
/**
* swsusp_read - read the hibernation image.
* @flags_p: flags passed by the "frozen" kernel in the image header should
- * be written into this memeory location
+ * be written into this memory location
*/
int swsusp_read(unsigned int *flags_p)
@@ -930,7 +930,8 @@ int swsusp_check(void)
{
int error;
- hib_resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
+ hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device,
+ FMODE_READ, NULL);
if (!IS_ERR(hib_resume_bdev)) {
set_blocksize(hib_resume_bdev, PAGE_SIZE);
clear_page(swsusp_header);
diff --git a/kernel/printk.c b/kernel/printk.c
index f64b8997fc7..53d9a9ec88e 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -39,6 +39,7 @@
#include <linux/syslog.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
+#include <linux/rculist.h>
#include <asm/uaccess.h>
@@ -1502,7 +1503,7 @@ int kmsg_dump_register(struct kmsg_dumper *dumper)
/* Don't allow registering multiple times */
if (!dumper->registered) {
dumper->registered = 1;
- list_add_tail(&dumper->list, &dump_list);
+ list_add_tail_rcu(&dumper->list, &dump_list);
err = 0;
}
spin_unlock_irqrestore(&dump_list_lock, flags);
@@ -1526,29 +1527,16 @@ int kmsg_dump_unregister(struct kmsg_dumper *dumper)
spin_lock_irqsave(&dump_list_lock, flags);
if (dumper->registered) {
dumper->registered = 0;
- list_del(&dumper->list);
+ list_del_rcu(&dumper->list);
err = 0;
}
spin_unlock_irqrestore(&dump_list_lock, flags);
+ synchronize_rcu();
return err;
}
EXPORT_SYMBOL_GPL(kmsg_dump_unregister);
-static const char * const kmsg_reasons[] = {
- [KMSG_DUMP_OOPS] = "oops",
- [KMSG_DUMP_PANIC] = "panic",
- [KMSG_DUMP_KEXEC] = "kexec",
-};
-
-static const char *kmsg_to_str(enum kmsg_dump_reason reason)
-{
- if (reason >= ARRAY_SIZE(kmsg_reasons) || reason < 0)
- return "unknown";
-
- return kmsg_reasons[reason];
-}
-
/**
* kmsg_dump - dump kernel log to kernel message dumpers.
* @reason: the reason (oops, panic etc) for dumping
@@ -1587,13 +1575,9 @@ void kmsg_dump(enum kmsg_dump_reason reason)
l2 = chars;
}
- if (!spin_trylock_irqsave(&dump_list_lock, flags)) {
- printk(KERN_ERR "dump_kmsg: dump list lock is held during %s, skipping dump\n",
- kmsg_to_str(reason));
- return;
- }
- list_for_each_entry(dumper, &dump_list, list)
+ rcu_read_lock();
+ list_for_each_entry_rcu(dumper, &dump_list, list)
dumper->dump(dumper, reason, s1, l1, s2, l2);
- spin_unlock_irqrestore(&dump_list_lock, flags);
+ rcu_read_unlock();
}
#endif
diff --git a/kernel/sched.c b/kernel/sched.c
index a0eb0941fa8..ea3e5eff387 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2505,7 +2505,7 @@ out:
* try_to_wake_up_local - try to wake up a local task with rq lock held
* @p: the thread to be awakened
*
- * Put @p on the run-queue if it's not alredy there. The caller must
+ * Put @p on the run-queue if it's not already there. The caller must
* ensure that this_rq() is locked, @p is bound to this_rq() and not
* the current task. this_rq() stays locked over invocation.
*/
diff --git a/kernel/smp.c b/kernel/smp.c
index 12ed8b013e2..4ec30e06998 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -13,6 +13,7 @@
#include <linux/smp.h>
#include <linux/cpu.h>
+#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
static struct {
struct list_head queue;
raw_spinlock_t lock;
@@ -529,3 +530,21 @@ void ipi_call_unlock_irq(void)
{
raw_spin_unlock_irq(&call_function.lock);
}
+#endif /* USE_GENERIC_SMP_HELPERS */
+
+/*
+ * Call a function on all processors
+ */
+int on_each_cpu(void (*func) (void *info), void *info, int wait)
+{
+ int ret = 0;
+
+ preempt_disable();
+ ret = smp_call_function(func, info, wait);
+ local_irq_disable();
+ func(info);
+ local_irq_enable();
+ preempt_enable();
+ return ret;
+}
+EXPORT_SYMBOL(on_each_cpu);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 0823778f87f..68eb5efec38 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -885,25 +885,6 @@ static __init int spawn_ksoftirqd(void)
}
early_initcall(spawn_ksoftirqd);
-#ifdef CONFIG_SMP
-/*
- * Call a function on all processors
- */
-int on_each_cpu(void (*func) (void *info), void *info, int wait)
-{
- int ret = 0;
-
- preempt_disable();
- ret = smp_call_function(func, info, wait);
- local_irq_disable();
- func(info);
- local_irq_enable();
- preempt_enable();
- return ret;
-}
-EXPORT_SYMBOL(on_each_cpu);
-#endif
-
/*
* [ These __weak aliases are kept in a separate compilation unit, so that
* GCC does not inline them incorrectly. ]
diff --git a/kernel/sys.c b/kernel/sys.c
index 2745dcdb6c6..31b71a276b4 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -43,6 +43,8 @@
#include <linux/kprobes.h>
#include <linux/user_namespace.h>
+#include <linux/kmsg_dump.h>
+
#include <asm/uaccess.h>
#include <asm/io.h>
#include <asm/unistd.h>
@@ -285,6 +287,7 @@ out_unlock:
*/
void emergency_restart(void)
{
+ kmsg_dump(KMSG_DUMP_EMERG);
machine_emergency_restart();
}
EXPORT_SYMBOL_GPL(emergency_restart);
@@ -312,6 +315,7 @@ void kernel_restart(char *cmd)
printk(KERN_EMERG "Restarting system.\n");
else
printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd);
+ kmsg_dump(KMSG_DUMP_RESTART);
machine_restart(cmd);
}
EXPORT_SYMBOL_GPL(kernel_restart);
@@ -333,6 +337,7 @@ void kernel_halt(void)
kernel_shutdown_prepare(SYSTEM_HALT);
sysdev_shutdown();
printk(KERN_EMERG "System halted.\n");
+ kmsg_dump(KMSG_DUMP_HALT);
machine_halt();
}
@@ -351,6 +356,7 @@ void kernel_power_off(void)
disable_nonboot_cpus();
sysdev_shutdown();
printk(KERN_EMERG "Power down.\n");
+ kmsg_dump(KMSG_DUMP_POWEROFF);
machine_power_off();
}
EXPORT_SYMBOL_GPL(kernel_power_off);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ae5cbb1e3ce..bc86bb32e12 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -24,6 +24,7 @@
#include <linux/slab.h>
#include <linux/sysctl.h>
#include <linux/signal.h>
+#include <linux/printk.h>
#include <linux/proc_fs.h>
#include <linux/security.h>
#include <linux/ctype.h>
@@ -245,10 +246,6 @@ static struct ctl_table root_table[] = {
.mode = 0555,
.child = dev_table,
},
-/*
- * NOTE: do not add new entries to this table unless you have read
- * Documentation/sysctl/ctl_unnumbered.txt
- */
{ }
};
@@ -710,6 +707,15 @@ static struct ctl_table kern_table[] = {
.extra1 = &zero,
.extra2 = &one,
},
+ {
+ .procname = "kptr_restrict",
+ .data = &kptr_restrict,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &two,
+ },
#endif
{
.procname = "ngroups_max",
@@ -962,10 +968,6 @@ static struct ctl_table kern_table[] = {
.proc_handler = proc_dointvec,
},
#endif
-/*
- * NOTE: do not add new entries to this table unless you have read
- * Documentation/sysctl/ctl_unnumbered.txt
- */
{ }
};
@@ -1326,11 +1328,6 @@ static struct ctl_table vm_table[] = {
.extra2 = &one,
},
#endif
-
-/*
- * NOTE: do not add new entries to this table unless you have read
- * Documentation/sysctl/ctl_unnumbered.txt
- */
{ }
};
@@ -1486,10 +1483,6 @@ static struct ctl_table fs_table[] = {
.proc_handler = &pipe_proc_fn,
.extra1 = &pipe_min_size,
},
-/*
- * NOTE: do not add new entries to this table unless you have read
- * Documentation/sysctl/ctl_unnumbered.txt
- */
{ }
};
@@ -2899,7 +2892,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
}
}
-#else /* CONFIG_PROC_FS */
+#else /* CONFIG_PROC_SYSCTL */
int proc_dostring(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -2951,7 +2944,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
}
-#endif /* CONFIG_PROC_FS */
+#endif /* CONFIG_PROC_SYSCTL */
/*
* No sense putting this after each symbol definition, twice,
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 4b2545a136f..b875bedf7c9 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1192,7 +1192,7 @@ static ssize_t bin_dn_node_address(struct file *file,
buf[result] = '\0';
- /* Convert the decnet addresss to binary */
+ /* Convert the decnet address to binary */
result = -EIO;
nodep = strchr(buf, '.') + 1;
if (!nodep)
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 69691eb4b71..3971c6b9d58 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -348,7 +348,7 @@ static int parse(struct nlattr *na, struct cpumask *mask)
return ret;
}
-#ifdef CONFIG_IA64
+#if defined(CONFIG_64BIT) && !defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
#define TASKSTATS_NEEDS_PADDING 1
#endif
diff --git a/kernel/time.c b/kernel/time.c
index ba9b338d183..32174359576 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -238,7 +238,7 @@ EXPORT_SYMBOL(current_fs_time);
* Avoid unnecessary multiplications/divisions in the
* two most common HZ cases:
*/
-unsigned int inline jiffies_to_msecs(const unsigned long j)
+inline unsigned int jiffies_to_msecs(const unsigned long j)
{
#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
return (MSEC_PER_SEC / HZ) * j;
@@ -254,7 +254,7 @@ unsigned int inline jiffies_to_msecs(const unsigned long j)
}
EXPORT_SYMBOL(jiffies_to_msecs);
-unsigned int inline jiffies_to_usecs(const unsigned long j)
+inline unsigned int jiffies_to_usecs(const unsigned long j)
{
#if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ)
return (USEC_PER_SEC / HZ) * j;
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index df140cd3ea4..c50a034de30 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -679,7 +679,7 @@ EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
{
- /* Intialize mult/shift and max_idle_ns */
+ /* Initialize mult/shift and max_idle_ns */
__clocksource_updatefreq_scale(cs, scale, freq);
/* Add clocksource to the clcoksource list */
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index d2321891538..5c00242fa92 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -14,6 +14,7 @@
#include <linux/timex.h>
#include <linux/time.h>
#include <linux/mm.h>
+#include <linux/module.h>
/*
* NTP timekeeping variables:
@@ -74,6 +75,162 @@ static long time_adjust;
/* constant (boot-param configurable) NTP tick adjustment (upscaled) */
static s64 ntp_tick_adj;
+#ifdef CONFIG_NTP_PPS
+
+/*
+ * The following variables are used when a pulse-per-second (PPS) signal
+ * is available. They establish the engineering parameters of the clock
+ * discipline loop when controlled by the PPS signal.
+ */
+#define PPS_VALID 10 /* PPS signal watchdog max (s) */
+#define PPS_POPCORN 4 /* popcorn spike threshold (shift) */
+#define PPS_INTMIN 2 /* min freq interval (s) (shift) */
+#define PPS_INTMAX 8 /* max freq interval (s) (shift) */
+#define PPS_INTCOUNT 4 /* number of consecutive good intervals to
+ increase pps_shift or consecutive bad
+ intervals to decrease it */
+#define PPS_MAXWANDER 100000 /* max PPS freq wander (ns/s) */
+
+static int pps_valid; /* signal watchdog counter */
+static long pps_tf[3]; /* phase median filter */
+static long pps_jitter; /* current jitter (ns) */
+static struct timespec pps_fbase; /* beginning of the last freq interval */
+static int pps_shift; /* current interval duration (s) (shift) */
+static int pps_intcnt; /* interval counter */
+static s64 pps_freq; /* frequency offset (scaled ns/s) */
+static long pps_stabil; /* current stability (scaled ns/s) */
+
+/*
+ * PPS signal quality monitors
+ */
+static long pps_calcnt; /* calibration intervals */
+static long pps_jitcnt; /* jitter limit exceeded */
+static long pps_stbcnt; /* stability limit exceeded */
+static long pps_errcnt; /* calibration errors */
+
+
+/* PPS kernel consumer compensates the whole phase error immediately.
+ * Otherwise, reduce the offset by a fixed factor times the time constant.
+ */
+static inline s64 ntp_offset_chunk(s64 offset)
+{
+ if (time_status & STA_PPSTIME && time_status & STA_PPSSIGNAL)
+ return offset;
+ else
+ return shift_right(offset, SHIFT_PLL + time_constant);
+}
+
+static inline void pps_reset_freq_interval(void)
+{
+ /* the PPS calibration interval may end
+ surprisingly early */
+ pps_shift = PPS_INTMIN;
+ pps_intcnt = 0;
+}
+
+/**
+ * pps_clear - Clears the PPS state variables
+ *
+ * Must be called while holding a write on the xtime_lock
+ */
+static inline void pps_clear(void)
+{
+ pps_reset_freq_interval();
+ pps_tf[0] = 0;
+ pps_tf[1] = 0;
+ pps_tf[2] = 0;
+ pps_fbase.tv_sec = pps_fbase.tv_nsec = 0;
+ pps_freq = 0;
+}
+
+/* Decrease pps_valid to indicate that another second has passed since
+ * the last PPS signal. When it reaches 0, indicate that PPS signal is
+ * missing.
+ *
+ * Must be called while holding a write on the xtime_lock
+ */
+static inline void pps_dec_valid(void)
+{
+ if (pps_valid > 0)
+ pps_valid--;
+ else {
+ time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
+ STA_PPSWANDER | STA_PPSERROR);
+ pps_clear();
+ }
+}
+
+static inline void pps_set_freq(s64 freq)
+{
+ pps_freq = freq;
+}
+
+static inline int is_error_status(int status)
+{
+ return (time_status & (STA_UNSYNC|STA_CLOCKERR))
+ /* PPS signal lost when either PPS time or
+ * PPS frequency synchronization requested
+ */
+ || ((time_status & (STA_PPSFREQ|STA_PPSTIME))
+ && !(time_status & STA_PPSSIGNAL))
+ /* PPS jitter exceeded when
+ * PPS time synchronization requested */
+ || ((time_status & (STA_PPSTIME|STA_PPSJITTER))
+ == (STA_PPSTIME|STA_PPSJITTER))
+ /* PPS wander exceeded or calibration error when
+ * PPS frequency synchronization requested
+ */
+ || ((time_status & STA_PPSFREQ)
+ && (time_status & (STA_PPSWANDER|STA_PPSERROR)));
+}
+
+static inline void pps_fill_timex(struct timex *txc)
+{
+ txc->ppsfreq = shift_right((pps_freq >> PPM_SCALE_INV_SHIFT) *
+ PPM_SCALE_INV, NTP_SCALE_SHIFT);
+ txc->jitter = pps_jitter;
+ if (!(time_status & STA_NANO))
+ txc->jitter /= NSEC_PER_USEC;
+ txc->shift = pps_shift;
+ txc->stabil = pps_stabil;
+ txc->jitcnt = pps_jitcnt;
+ txc->calcnt = pps_calcnt;
+ txc->errcnt = pps_errcnt;
+ txc->stbcnt = pps_stbcnt;
+}
+
+#else /* !CONFIG_NTP_PPS */
+
+static inline s64 ntp_offset_chunk(s64 offset)
+{
+ return shift_right(offset, SHIFT_PLL + time_constant);
+}
+
+static inline void pps_reset_freq_interval(void) {}
+static inline void pps_clear(void) {}
+static inline void pps_dec_valid(void) {}
+static inline void pps_set_freq(s64 freq) {}
+
+static inline int is_error_status(int status)
+{
+ return status & (STA_UNSYNC|STA_CLOCKERR);
+}
+
+static inline void pps_fill_timex(struct timex *txc)
+{
+ /* PPS is not implemented, so these are zero */
+ txc->ppsfreq = 0;
+ txc->jitter = 0;
+ txc->shift = 0;
+ txc->stabil = 0;
+ txc->jitcnt = 0;
+ txc->calcnt = 0;
+ txc->errcnt = 0;
+ txc->stbcnt = 0;
+}
+
+#endif /* CONFIG_NTP_PPS */
+
/*
* NTP methods:
*/
@@ -185,6 +342,9 @@ void ntp_clear(void)
tick_length = tick_length_base;
time_offset = 0;
+
+ /* Clear PPS state variables */
+ pps_clear();
}
/*
@@ -250,16 +410,16 @@ void second_overflow(void)
time_status |= STA_UNSYNC;
}
- /*
- * Compute the phase adjustment for the next second. The offset is
- * reduced by a fixed factor times the time constant.
- */
+ /* Compute the phase adjustment for the next second */
tick_length = tick_length_base;
- delta = shift_right(time_offset, SHIFT_PLL + time_constant);
+ delta = ntp_offset_chunk(time_offset);
time_offset -= delta;
tick_length += delta;
+ /* Check PPS signal */
+ pps_dec_valid();
+
if (!time_adjust)
return;
@@ -369,6 +529,8 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts)
if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) {
time_state = TIME_OK;
time_status = STA_UNSYNC;
+ /* restart PPS frequency calibration */
+ pps_reset_freq_interval();
}
/*
@@ -418,6 +580,8 @@ static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts
time_freq = txc->freq * PPM_SCALE;
time_freq = min(time_freq, MAXFREQ_SCALED);
time_freq = max(time_freq, -MAXFREQ_SCALED);
+ /* update pps_freq */
+ pps_set_freq(time_freq);
}
if (txc->modes & ADJ_MAXERROR)
@@ -508,7 +672,8 @@ int do_adjtimex(struct timex *txc)
}
result = time_state; /* mostly `TIME_OK' */
- if (time_status & (STA_UNSYNC|STA_CLOCKERR))
+ /* check for errors */
+ if (is_error_status(time_status))
result = TIME_ERROR;
txc->freq = shift_right((time_freq >> PPM_SCALE_INV_SHIFT) *
@@ -522,15 +687,8 @@ int do_adjtimex(struct timex *txc)
txc->tick = tick_usec;
txc->tai = time_tai;
- /* PPS is not implemented, so these are zero */
- txc->ppsfreq = 0;
- txc->jitter = 0;
- txc->shift = 0;
- txc->stabil = 0;
- txc->jitcnt = 0;
- txc->calcnt = 0;
- txc->errcnt = 0;
- txc->stbcnt = 0;
+ /* fill PPS status fields */
+ pps_fill_timex(txc);
write_sequnlock_irq(&xtime_lock);
@@ -544,6 +702,243 @@ int do_adjtimex(struct timex *txc)
return result;
}
+#ifdef CONFIG_NTP_PPS
+
+/* actually struct pps_normtime is good old struct timespec, but it is
+ * semantically different (and it is the reason why it was invented):
+ * pps_normtime.nsec has a range of ( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ]
+ * while timespec.tv_nsec has a range of [0, NSEC_PER_SEC) */
+struct pps_normtime {
+ __kernel_time_t sec; /* seconds */
+ long nsec; /* nanoseconds */
+};
+
+/* normalize the timestamp so that nsec is in the
+ ( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ] interval */
+static inline struct pps_normtime pps_normalize_ts(struct timespec ts)
+{
+ struct pps_normtime norm = {
+ .sec = ts.tv_sec,
+ .nsec = ts.tv_nsec
+ };
+
+ if (norm.nsec > (NSEC_PER_SEC >> 1)) {
+ norm.nsec -= NSEC_PER_SEC;
+ norm.sec++;
+ }
+
+ return norm;
+}
+
+/* get current phase correction and jitter */
+static inline long pps_phase_filter_get(long *jitter)
+{
+ *jitter = pps_tf[0] - pps_tf[1];
+ if (*jitter < 0)
+ *jitter = -*jitter;
+
+ /* TODO: test various filters */
+ return pps_tf[0];
+}
+
+/* add the sample to the phase filter */
+static inline void pps_phase_filter_add(long err)
+{
+ pps_tf[2] = pps_tf[1];
+ pps_tf[1] = pps_tf[0];
+ pps_tf[0] = err;
+}
+
+/* decrease frequency calibration interval length.
+ * It is halved after four consecutive unstable intervals.
+ */
+static inline void pps_dec_freq_interval(void)
+{
+ if (--pps_intcnt <= -PPS_INTCOUNT) {
+ pps_intcnt = -PPS_INTCOUNT;
+ if (pps_shift > PPS_INTMIN) {
+ pps_shift--;
+ pps_intcnt = 0;
+ }
+ }
+}
+
+/* increase frequency calibration interval length.
+ * It is doubled after four consecutive stable intervals.
+ */
+static inline void pps_inc_freq_interval(void)
+{
+ if (++pps_intcnt >= PPS_INTCOUNT) {
+ pps_intcnt = PPS_INTCOUNT;
+ if (pps_shift < PPS_INTMAX) {
+ pps_shift++;
+ pps_intcnt = 0;
+ }
+ }
+}
+
+/* update clock frequency based on MONOTONIC_RAW clock PPS signal
+ * timestamps
+ *
+ * At the end of the calibration interval the difference between the
+ * first and last MONOTONIC_RAW clock timestamps divided by the length
+ * of the interval becomes the frequency update. If the interval was
+ * too long, the data are discarded.
+ * Returns the difference between old and new frequency values.
+ */
+static long hardpps_update_freq(struct pps_normtime freq_norm)
+{
+ long delta, delta_mod;
+ s64 ftemp;
+
+ /* check if the frequency interval was too long */
+ if (freq_norm.sec > (2 << pps_shift)) {
+ time_status |= STA_PPSERROR;
+ pps_errcnt++;
+ pps_dec_freq_interval();
+ pr_err("hardpps: PPSERROR: interval too long - %ld s\n",
+ freq_norm.sec);
+ return 0;
+ }
+
+ /* here the raw frequency offset and wander (stability) is
+ * calculated. If the wander is less than the wander threshold
+ * the interval is increased; otherwise it is decreased.
+ */
+ ftemp = div_s64(((s64)(-freq_norm.nsec)) << NTP_SCALE_SHIFT,
+ freq_norm.sec);
+ delta = shift_right(ftemp - pps_freq, NTP_SCALE_SHIFT);
+ pps_freq = ftemp;
+ if (delta > PPS_MAXWANDER || delta < -PPS_MAXWANDER) {
+ pr_warning("hardpps: PPSWANDER: change=%ld\n", delta);
+ time_status |= STA_PPSWANDER;
+ pps_stbcnt++;
+ pps_dec_freq_interval();
+ } else { /* good sample */
+ pps_inc_freq_interval();
+ }
+
+ /* the stability metric is calculated as the average of recent
+ * frequency changes, but is used only for performance
+ * monitoring
+ */
+ delta_mod = delta;
+ if (delta_mod < 0)
+ delta_mod = -delta_mod;
+ pps_stabil += (div_s64(((s64)delta_mod) <<
+ (NTP_SCALE_SHIFT - SHIFT_USEC),
+ NSEC_PER_USEC) - pps_stabil) >> PPS_INTMIN;
+
+ /* if enabled, the system clock frequency is updated */
+ if ((time_status & STA_PPSFREQ) != 0 &&
+ (time_status & STA_FREQHOLD) == 0) {
+ time_freq = pps_freq;
+ ntp_update_frequency();
+ }
+
+ return delta;
+}
+
+/* correct REALTIME clock phase error against PPS signal */
+static void hardpps_update_phase(long error)
+{
+ long correction = -error;
+ long jitter;
+
+ /* add the sample to the median filter */
+ pps_phase_filter_add(correction);
+ correction = pps_phase_filter_get(&jitter);
+
+ /* Nominal jitter is due to PPS signal noise. If it exceeds the
+ * threshold, the sample is discarded; otherwise, if so enabled,
+ * the time offset is updated.
+ */
+ if (jitter > (pps_jitter << PPS_POPCORN)) {
+ pr_warning("hardpps: PPSJITTER: jitter=%ld, limit=%ld\n",
+ jitter, (pps_jitter << PPS_POPCORN));
+ time_status |= STA_PPSJITTER;
+ pps_jitcnt++;
+ } else if (time_status & STA_PPSTIME) {
+ /* correct the time using the phase offset */
+ time_offset = div_s64(((s64)correction) << NTP_SCALE_SHIFT,
+ NTP_INTERVAL_FREQ);
+ /* cancel running adjtime() */
+ time_adjust = 0;
+ }
+ /* update jitter */
+ pps_jitter += (jitter - pps_jitter) >> PPS_INTMIN;
+}
+
+/*
+ * hardpps() - discipline CPU clock oscillator to external PPS signal
+ *
+ * This routine is called at each PPS signal arrival in order to
+ * discipline the CPU clock oscillator to the PPS signal. It takes two
+ * parameters: REALTIME and MONOTONIC_RAW clock timestamps. The former
+ * is used to correct clock phase error and the latter is used to
+ * correct the frequency.
+ *
+ * This code is based on David Mills's reference nanokernel
+ * implementation. It was mostly rewritten but keeps the same idea.
+ */
+void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
+{
+ struct pps_normtime pts_norm, freq_norm;
+ unsigned long flags;
+
+ pts_norm = pps_normalize_ts(*phase_ts);
+
+ write_seqlock_irqsave(&xtime_lock, flags);
+
+ /* clear the error bits, they will be set again if needed */
+ time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR);
+
+ /* indicate signal presence */
+ time_status |= STA_PPSSIGNAL;
+ pps_valid = PPS_VALID;
+
+ /* when called for the first time,
+ * just start the frequency interval */
+ if (unlikely(pps_fbase.tv_sec == 0)) {
+ pps_fbase = *raw_ts;
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+ return;
+ }
+
+ /* ok, now we have a base for frequency calculation */
+ freq_norm = pps_normalize_ts(timespec_sub(*raw_ts, pps_fbase));
+
+ /* check that the signal is in the range
+ * [1s - MAXFREQ us, 1s + MAXFREQ us], otherwise reject it */
+ if ((freq_norm.sec == 0) ||
+ (freq_norm.nsec > MAXFREQ * freq_norm.sec) ||
+ (freq_norm.nsec < -MAXFREQ * freq_norm.sec)) {
+ time_status |= STA_PPSJITTER;
+ /* restart the frequency calibration interval */
+ pps_fbase = *raw_ts;
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+ pr_err("hardpps: PPSJITTER: bad pulse\n");
+ return;
+ }
+
+ /* signal is ok */
+
+ /* check if the current frequency interval is finished */
+ if (freq_norm.sec >= (1 << pps_shift)) {
+ pps_calcnt++;
+ /* restart the frequency calibration interval */
+ pps_fbase = *raw_ts;
+ hardpps_update_freq(freq_norm);
+ }
+
+ hardpps_update_phase(pts_norm.nsec);
+
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+}
+EXPORT_SYMBOL(hardpps);
+
+#endif /* CONFIG_NTP_PPS */
+
static int __init ntp_tick_adj_setup(char *str)
{
ntp_tick_adj = simple_strtol(str, NULL, 0);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 5bb86da8200..5536aaf3ba3 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -288,6 +288,49 @@ void ktime_get_ts(struct timespec *ts)
}
EXPORT_SYMBOL_GPL(ktime_get_ts);
+#ifdef CONFIG_NTP_PPS
+
+/**
+ * getnstime_raw_and_real - get day and raw monotonic time in timespec format
+ * @ts_raw: pointer to the timespec to be set to raw monotonic time
+ * @ts_real: pointer to the timespec to be set to the time of day
+ *
+ * This function reads both the time of day and raw monotonic time at the
+ * same time atomically and stores the resulting timestamps in timespec
+ * format.
+ */
+void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
+{
+ unsigned long seq;
+ s64 nsecs_raw, nsecs_real;
+
+ WARN_ON_ONCE(timekeeping_suspended);
+
+ do {
+ u32 arch_offset;
+
+ seq = read_seqbegin(&xtime_lock);
+
+ *ts_raw = raw_time;
+ *ts_real = xtime;
+
+ nsecs_raw = timekeeping_get_ns_raw();
+ nsecs_real = timekeeping_get_ns();
+
+ /* If arch requires, add in gettimeoffset() */
+ arch_offset = arch_gettimeoffset();
+ nsecs_raw += arch_offset;
+ nsecs_real += arch_offset;
+
+ } while (read_seqretry(&xtime_lock, seq));
+
+ timespec_add_ns(ts_raw, nsecs_raw);
+ timespec_add_ns(ts_real, nsecs_real);
+}
+EXPORT_SYMBOL(getnstime_raw_and_real);
+
+#endif /* CONFIG_NTP_PPS */
+
/**
* do_gettimeofday - Returns the time of day in a timeval
* @tv: pointer to the timeval to be set
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 7b8ec028154..153562d0b93 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -758,53 +758,58 @@ static void blk_add_trace_rq_complete(void *ignore,
* @q: queue the io is for
* @bio: the source bio
* @what: the action
+ * @error: error, if any
*
* Description:
* Records an action against a bio. Will log the bio offset + size.
*
**/
static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
- u32 what)
+ u32 what, int error)
{
struct blk_trace *bt = q->blk_trace;
if (likely(!bt))
return;
+ if (!error && !bio_flagged(bio, BIO_UPTODATE))
+ error = EIO;
+
__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what,
- !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
+ error, 0, NULL);
}
static void blk_add_trace_bio_bounce(void *ignore,
struct request_queue *q, struct bio *bio)
{
- blk_add_trace_bio(q, bio, BLK_TA_BOUNCE);
+ blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0);
}
static void blk_add_trace_bio_complete(void *ignore,
- struct request_queue *q, struct bio *bio)
+ struct request_queue *q, struct bio *bio,
+ int error)
{
- blk_add_trace_bio(q, bio, BLK_TA_COMPLETE);
+ blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error);
}
static void blk_add_trace_bio_backmerge(void *ignore,
struct request_queue *q,
struct bio *bio)
{
- blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
+ blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0);
}
static void blk_add_trace_bio_frontmerge(void *ignore,
struct request_queue *q,
struct bio *bio)
{
- blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
+ blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0);
}
static void blk_add_trace_bio_queue(void *ignore,
struct request_queue *q, struct bio *bio)
{
- blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
+ blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0);
}
static void blk_add_trace_getrq(void *ignore,
@@ -812,7 +817,7 @@ static void blk_add_trace_getrq(void *ignore,
struct bio *bio, int rw)
{
if (bio)
- blk_add_trace_bio(q, bio, BLK_TA_GETRQ);
+ blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0);
else {
struct blk_trace *bt = q->blk_trace;
@@ -827,7 +832,7 @@ static void blk_add_trace_sleeprq(void *ignore,
struct bio *bio, int rw)
{
if (bio)
- blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ);
+ blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0);
else {
struct blk_trace *bt = q->blk_trace;
@@ -887,7 +892,7 @@ static void blk_add_trace_split(void *ignore,
}
/**
- * blk_add_trace_remap - Add a trace for a remap operation
+ * blk_add_trace_bio_remap - Add a trace for a bio-remap operation
* @ignore: trace callback data parameter (not used)
* @q: queue the io is for
* @bio: the source bio
@@ -899,9 +904,9 @@ static void blk_add_trace_split(void *ignore,
* it spans a stripe (or similar). Add a trace for that action.
*
**/
-static void blk_add_trace_remap(void *ignore,
- struct request_queue *q, struct bio *bio,
- dev_t dev, sector_t from)
+static void blk_add_trace_bio_remap(void *ignore,
+ struct request_queue *q, struct bio *bio,
+ dev_t dev, sector_t from)
{
struct blk_trace *bt = q->blk_trace;
struct blk_io_trace_remap r;
@@ -1016,7 +1021,7 @@ static void blk_register_tracepoints(void)
WARN_ON(ret);
ret = register_trace_block_split(blk_add_trace_split, NULL);
WARN_ON(ret);
- ret = register_trace_block_remap(blk_add_trace_remap, NULL);
+ ret = register_trace_block_bio_remap(blk_add_trace_bio_remap, NULL);
WARN_ON(ret);
ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
WARN_ON(ret);
@@ -1025,7 +1030,7 @@ static void blk_register_tracepoints(void)
static void blk_unregister_tracepoints(void)
{
unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
- unregister_trace_block_remap(blk_add_trace_remap, NULL);
+ unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL);
unregister_trace_block_split(blk_add_trace_split, NULL);
unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL);
unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL);
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index e3dfecaf13e..6cf223764be 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -53,7 +53,7 @@
*/
/*
- * Function trace entry - function address and parent function addres:
+ * Function trace entry - function address and parent function address:
*/
FTRACE_ENTRY(function, ftrace_entry,
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 25915832291..9da289c34f2 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -12,6 +12,8 @@
#include <linux/highuid.h>
#include <linux/cred.h>
+static struct kmem_cache *user_ns_cachep __read_mostly;
+
/*
* Create a new user namespace, deriving the creator from the user in the
* passed credentials, and replacing that user with the new root user for the
@@ -26,7 +28,7 @@ int create_user_ns(struct cred *new)
struct user_struct *root_user;
int n;
- ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL);
+ ns = kmem_cache_alloc(user_ns_cachep, GFP_KERNEL);
if (!ns)
return -ENOMEM;
@@ -38,7 +40,7 @@ int create_user_ns(struct cred *new)
/* Alloc new root user. */
root_user = alloc_uid(ns, 0);
if (!root_user) {
- kfree(ns);
+ kmem_cache_free(user_ns_cachep, ns);
return -ENOMEM;
}
@@ -71,7 +73,7 @@ static void free_user_ns_work(struct work_struct *work)
struct user_namespace *ns =
container_of(work, struct user_namespace, destroyer);
free_uid(ns->creator);
- kfree(ns);
+ kmem_cache_free(user_ns_cachep, ns);
}
void free_user_ns(struct kref *kref)
@@ -126,3 +128,10 @@ gid_t user_ns_map_gid(struct user_namespace *to, const struct cred *cred, gid_t
/* No useful relationship so no mapping */
return overflowgid;
}
+
+static __init int user_namespaces_init(void)
+{
+ user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
+ return 0;
+}
+module_init(user_namespaces_init);