summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c21
-rw-r--r--kernel/audit_tree.c19
-rw-r--r--kernel/cpu.c2
-rw-r--r--kernel/debug/kdb/kdb_debugger.c4
-rw-r--r--kernel/debug/kdb/kdb_io.c11
-rw-r--r--kernel/debug/kdb/kdb_main.c15
-rw-r--r--kernel/events/callchain.c9
-rw-r--r--kernel/events/core.c30
-rw-r--r--kernel/events/internal.h3
-rw-r--r--kernel/events/uprobes.c213
-rw-r--r--kernel/fork.c46
-rw-r--r--kernel/futex.c17
-rw-r--r--kernel/irq/handle.c7
-rw-r--r--kernel/irq/irqdomain.c362
-rw-r--r--kernel/irq/manage.c32
-rw-r--r--kernel/kexec.c2
-rw-r--r--kernel/kmod.c37
-rw-r--r--kernel/panic.c8
-rw-r--r--kernel/printk.c34
-rw-r--r--kernel/resource.c24
-rw-r--r--kernel/sched/core.c41
-rw-r--r--kernel/sched/cpupri.c10
-rw-r--r--kernel/sched/fair.c40
-rw-r--r--kernel/sched/rt.c13
-rw-r--r--kernel/sched/sched.h8
-rw-r--r--kernel/sched/stop_task.c22
-rw-r--r--kernel/softirq.c9
-rw-r--r--kernel/sys.c57
-rw-r--r--kernel/sysctl.c69
-rw-r--r--kernel/sysctl_binary.c2
-rw-r--r--kernel/task_work.c1
-rw-r--r--kernel/taskstats.c5
-rw-r--r--kernel/time/jiffies.c2
-rw-r--r--kernel/time/ntp.c2
-rw-r--r--kernel/time/timekeeping.c407
-rw-r--r--kernel/timer.c9
-rw-r--r--kernel/trace/trace_event_perf.c2
-rw-r--r--kernel/trace/trace_kprobe.c6
-rw-r--r--kernel/trace/trace_syscalls.c4
-rw-r--r--kernel/trace/trace_uprobe.c2
40 files changed, 945 insertions, 662 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index 4a3f28d2ca6..ea3b7b6191c 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1456,6 +1456,27 @@ void audit_log_key(struct audit_buffer *ab, char *key)
}
/**
+ * audit_log_link_denied - report a link restriction denial
+ * @operation: specific link opreation
+ * @link: the path that triggered the restriction
+ */
+void audit_log_link_denied(const char *operation, struct path *link)
+{
+ struct audit_buffer *ab;
+
+ ab = audit_log_start(current->audit_context, GFP_KERNEL,
+ AUDIT_ANOM_LINK);
+ audit_log_format(ab, "op=%s action=denied", operation);
+ audit_log_format(ab, " pid=%d comm=", current->pid);
+ audit_log_untrustedstring(ab, current->comm);
+ audit_log_d_path(ab, " path=", link);
+ audit_log_format(ab, " dev=");
+ audit_log_untrustedstring(ab, link->dentry->d_inode->i_sb->s_id);
+ audit_log_format(ab, " ino=%lu", link->dentry->d_inode->i_ino);
+ audit_log_end(ab);
+}
+
+/**
* audit_log_end - end one audit record
* @ab: the audit_buffer
*
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 3a5ca582ba1..ed206fd88cc 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -250,7 +250,6 @@ static void untag_chunk(struct node *p)
spin_unlock(&hash_lock);
spin_unlock(&entry->lock);
fsnotify_destroy_mark(entry);
- fsnotify_put_mark(entry);
goto out;
}
@@ -259,7 +258,7 @@ static void untag_chunk(struct node *p)
fsnotify_duplicate_mark(&new->mark, entry);
if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) {
- free_chunk(new);
+ fsnotify_put_mark(&new->mark);
goto Fallback;
}
@@ -293,7 +292,7 @@ static void untag_chunk(struct node *p)
spin_unlock(&hash_lock);
spin_unlock(&entry->lock);
fsnotify_destroy_mark(entry);
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(&new->mark); /* drop initial reference */
goto out;
Fallback:
@@ -322,7 +321,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
entry = &chunk->mark;
if (fsnotify_add_mark(entry, audit_tree_group, inode, NULL, 0)) {
- free_chunk(chunk);
+ fsnotify_put_mark(entry);
return -ENOSPC;
}
@@ -347,6 +346,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
insert_hash(chunk);
spin_unlock(&hash_lock);
spin_unlock(&entry->lock);
+ fsnotify_put_mark(entry); /* drop initial reference */
return 0;
}
@@ -396,7 +396,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
fsnotify_duplicate_mark(chunk_entry, old_entry);
if (fsnotify_add_mark(chunk_entry, chunk_entry->group, chunk_entry->i.inode, NULL, 1)) {
spin_unlock(&old_entry->lock);
- free_chunk(chunk);
+ fsnotify_put_mark(chunk_entry);
fsnotify_put_mark(old_entry);
return -ENOSPC;
}
@@ -444,8 +444,8 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
spin_unlock(&chunk_entry->lock);
spin_unlock(&old_entry->lock);
fsnotify_destroy_mark(old_entry);
+ fsnotify_put_mark(chunk_entry); /* drop initial reference */
fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
- fsnotify_put_mark(old_entry); /* and kill it */
return 0;
}
@@ -916,7 +916,12 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify
struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
evict_chunk(chunk);
- fsnotify_put_mark(entry);
+
+ /*
+ * We are guaranteed to have at least one reference to the mark from
+ * either the inode or the caller of fsnotify_destroy_mark().
+ */
+ BUG_ON(atomic_read(&entry->refcnt) < 1);
}
static bool audit_tree_send_event(struct fsnotify_group *group, struct inode *inode,
diff --git a/kernel/cpu.c b/kernel/cpu.c
index a4eb5227a19..14d32588ccc 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -416,7 +416,7 @@ int __cpuinit cpu_up(unsigned int cpu)
if (pgdat->node_zonelists->_zonerefs->zone == NULL) {
mutex_lock(&zonelists_mutex);
- build_all_zonelists(NULL);
+ build_all_zonelists(NULL, NULL);
mutex_unlock(&zonelists_mutex);
}
#endif
diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c
index 8b68ce78ff1..be7b33b73d3 100644
--- a/kernel/debug/kdb/kdb_debugger.c
+++ b/kernel/debug/kdb/kdb_debugger.c
@@ -12,6 +12,7 @@
#include <linux/kdb.h>
#include <linux/kdebug.h>
#include <linux/export.h>
+#include <linux/hardirq.h>
#include "kdb_private.h"
#include "../debug_core.h"
@@ -52,6 +53,9 @@ int kdb_stub(struct kgdb_state *ks)
if (atomic_read(&kgdb_setting_breakpoint))
reason = KDB_REASON_KEYBOARD;
+ if (in_nmi())
+ reason = KDB_REASON_NMI;
+
for (i = 0, bp = kdb_breakpoints; i < KDB_MAXBPT; i++, bp++) {
if ((bp->bp_enabled) && (bp->bp_addr == addr)) {
reason = KDB_REASON_BREAK;
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index bb9520f0f6f..0a69d2adc4f 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -715,9 +715,6 @@ kdb_printit:
/* check for having reached the LINES number of printed lines */
if (kdb_nextline == linecount) {
char buf1[16] = "";
-#if defined(CONFIG_SMP)
- char buf2[32];
-#endif
/* Watch out for recursion here. Any routine that calls
* kdb_printf will come back through here. And kdb_read
@@ -732,14 +729,6 @@ kdb_printit:
if (moreprompt == NULL)
moreprompt = "more> ";
-#if defined(CONFIG_SMP)
- if (strchr(moreprompt, '%')) {
- sprintf(buf2, moreprompt, get_cpu());
- put_cpu();
- moreprompt = buf2;
- }
-#endif
-
kdb_input_flush();
c = console_drivers;
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 1f91413edb8..31df1706b9a 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -139,11 +139,10 @@ static const int __nkdb_err = sizeof(kdbmsgs) / sizeof(kdbmsg_t);
static char *__env[] = {
#if defined(CONFIG_SMP)
"PROMPT=[%d]kdb> ",
- "MOREPROMPT=[%d]more> ",
#else
"PROMPT=kdb> ",
- "MOREPROMPT=more> ",
#endif
+ "MOREPROMPT=more> ",
"RADIX=16",
"MDCOUNT=8", /* lines of md output */
KDB_PLATFORM_ENV,
@@ -1236,18 +1235,6 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
*cmdbuf = '\0';
*(cmd_hist[cmd_head]) = '\0';
- if (KDB_FLAG(ONLY_DO_DUMP)) {
- /* kdb is off but a catastrophic error requires a dump.
- * Take the dump and reboot.
- * Turn on logging so the kdb output appears in the log
- * buffer in the dump.
- */
- const char *setargs[] = { "set", "LOGGING", "1" };
- kdb_set(2, setargs);
- kdb_reboot(0, NULL);
- /*NOTREACHED*/
- }
-
do_full_getstr:
#if defined(CONFIG_SMP)
snprintf(kdb_prompt_str, CMD_BUFLEN, kdbgetenv("PROMPT"),
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 6581a040f39..98d4597f43d 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -153,7 +153,8 @@ put_callchain_entry(int rctx)
put_recursion_context(__get_cpu_var(callchain_recursion), rctx);
}
-struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+struct perf_callchain_entry *
+perf_callchain(struct perf_event *event, struct pt_regs *regs)
{
int rctx;
struct perf_callchain_entry *entry;
@@ -178,6 +179,12 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
}
if (regs) {
+ /*
+ * Disallow cross-task user callchains.
+ */
+ if (event->ctx->task && event->ctx->task != current)
+ goto exit_put;
+
perf_callchain_store(entry, PERF_CONTEXT_USER);
perf_callchain_user(entry, regs);
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f1cf0edeb39..b7935fcec7d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4039,7 +4039,7 @@ void perf_prepare_sample(struct perf_event_header *header,
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
int size = 1;
- data->callchain = perf_callchain(regs);
+ data->callchain = perf_callchain(event, regs);
if (data->callchain)
size += data->callchain->nr;
@@ -5209,7 +5209,8 @@ static int perf_tp_event_match(struct perf_event *event,
}
void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
- struct pt_regs *regs, struct hlist_head *head, int rctx)
+ struct pt_regs *regs, struct hlist_head *head, int rctx,
+ struct task_struct *task)
{
struct perf_sample_data data;
struct perf_event *event;
@@ -5228,6 +5229,31 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
perf_swevent_event(event, count, &data, regs);
}
+ /*
+ * If we got specified a target task, also iterate its context and
+ * deliver this event there too.
+ */
+ if (task && task != current) {
+ struct perf_event_context *ctx;
+ struct trace_entry *entry = record;
+
+ rcu_read_lock();
+ ctx = rcu_dereference(task->perf_event_ctxp[perf_sw_context]);
+ if (!ctx)
+ goto unlock;
+
+ list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
+ if (event->attr.type != PERF_TYPE_TRACEPOINT)
+ continue;
+ if (event->attr.config != entry->type)
+ continue;
+ if (perf_tp_event_match(event, &data, regs))
+ perf_swevent_event(event, count, &data, regs);
+ }
+unlock:
+ rcu_read_unlock();
+ }
+
perf_swevent_put_recursion_context(rctx);
}
EXPORT_SYMBOL_GPL(perf_tp_event);
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index b0b107f90af..a096c19f2c2 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -101,7 +101,8 @@ __output_copy(struct perf_output_handle *handle,
}
/* Callchain handling */
-extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
+extern struct perf_callchain_entry *
+perf_callchain(struct perf_event *event, struct pt_regs *regs);
extern int get_callchain_buffers(void);
extern void put_callchain_buffers(void);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index f93532748bc..c08a22d02f7 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -32,6 +32,7 @@
#include <linux/swap.h> /* try_to_free_swap */
#include <linux/ptrace.h> /* user_enable_single_step */
#include <linux/kdebug.h> /* notifier mechanism */
+#include "../../mm/internal.h" /* munlock_vma_page */
#include <linux/uprobes.h>
@@ -112,14 +113,14 @@ static bool valid_vma(struct vm_area_struct *vma, bool is_register)
return false;
}
-static loff_t vma_address(struct vm_area_struct *vma, loff_t offset)
+static unsigned long offset_to_vaddr(struct vm_area_struct *vma, loff_t offset)
{
- loff_t vaddr;
-
- vaddr = vma->vm_start + offset;
- vaddr -= vma->vm_pgoff << PAGE_SHIFT;
+ return vma->vm_start + offset - ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
+}
- return vaddr;
+static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr)
+{
+ return ((loff_t)vma->vm_pgoff << PAGE_SHIFT) + (vaddr - vma->vm_start);
}
/**
@@ -127,25 +128,27 @@ static loff_t vma_address(struct vm_area_struct *vma, loff_t offset)
* based on replace_page in mm/ksm.c
*
* @vma: vma that holds the pte pointing to page
+ * @addr: address the old @page is mapped at
* @page: the cowed page we are replacing by kpage
* @kpage: the modified page we replace page by
*
* Returns 0 on success, -EFAULT on failure.
*/
-static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage)
+static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
+ struct page *page, struct page *kpage)
{
struct mm_struct *mm = vma->vm_mm;
- unsigned long addr;
spinlock_t *ptl;
pte_t *ptep;
+ int err;
- addr = page_address_in_vma(page, vma);
- if (addr == -EFAULT)
- return -EFAULT;
+ /* For try_to_free_swap() and munlock_vma_page() below */
+ lock_page(page);
+ err = -EAGAIN;
ptep = page_check_address(page, mm, addr, &ptl, 0);
if (!ptep)
- return -EAGAIN;
+ goto unlock;
get_page(kpage);
page_add_new_anon_rmap(kpage, vma, addr);
@@ -162,10 +165,16 @@ static int __replace_page(struct vm_area_struct *vma, struct page *page, struct
page_remove_rmap(page);
if (!page_mapped(page))
try_to_free_swap(page);
- put_page(page);
pte_unmap_unlock(ptep, ptl);
- return 0;
+ if (vma->vm_flags & VM_LOCKED)
+ munlock_vma_page(page);
+ put_page(page);
+
+ err = 0;
+ unlock:
+ unlock_page(page);
+ return err;
}
/**
@@ -206,45 +215,23 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
unsigned long vaddr, uprobe_opcode_t opcode)
{
struct page *old_page, *new_page;
- struct address_space *mapping;
void *vaddr_old, *vaddr_new;
struct vm_area_struct *vma;
- struct uprobe *uprobe;
int ret;
+
retry:
/* Read the page with vaddr into memory */
ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma);
if (ret <= 0)
return ret;
- ret = -EINVAL;
-
- /*
- * We are interested in text pages only. Our pages of interest
- * should be mapped for read and execute only. We desist from
- * adding probes in write mapped pages since the breakpoints
- * might end up in the file copy.
- */
- if (!valid_vma(vma, is_swbp_insn(&opcode)))
- goto put_out;
-
- uprobe = container_of(auprobe, struct uprobe, arch);
- mapping = uprobe->inode->i_mapping;
- if (mapping != vma->vm_file->f_mapping)
- goto put_out;
-
ret = -ENOMEM;
new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
if (!new_page)
- goto put_out;
+ goto put_old;
__SetPageUptodate(new_page);
- /*
- * lock page will serialize against do_wp_page()'s
- * PageAnon() handling
- */
- lock_page(old_page);
/* copy the page now that we've got it stable */
vaddr_old = kmap_atomic(old_page);
vaddr_new = kmap_atomic(new_page);
@@ -257,17 +244,13 @@ retry:
ret = anon_vma_prepare(vma);
if (ret)
- goto unlock_out;
+ goto put_new;
- lock_page(new_page);
- ret = __replace_page(vma, old_page, new_page);
- unlock_page(new_page);
+ ret = __replace_page(vma, vaddr, old_page, new_page);
-unlock_out:
- unlock_page(old_page);
+put_new:
page_cache_release(new_page);
-
-put_out:
+put_old:
put_page(old_page);
if (unlikely(ret == -EAGAIN))
@@ -791,7 +774,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
curr = info;
info->mm = vma->vm_mm;
- info->vaddr = vma_address(vma, offset);
+ info->vaddr = offset_to_vaddr(vma, offset);
}
mutex_unlock(&mapping->i_mmap_mutex);
@@ -839,12 +822,13 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
goto free;
down_write(&mm->mmap_sem);
- vma = find_vma(mm, (unsigned long)info->vaddr);
- if (!vma || !valid_vma(vma, is_register))
+ vma = find_vma(mm, info->vaddr);
+ if (!vma || !valid_vma(vma, is_register) ||
+ vma->vm_file->f_mapping->host != uprobe->inode)
goto unlock;
- if (vma->vm_file->f_mapping->host != uprobe->inode ||
- vma_address(vma, uprobe->offset) != info->vaddr)
+ if (vma->vm_start > info->vaddr ||
+ vaddr_to_offset(vma, info->vaddr) != uprobe->offset)
goto unlock;
if (is_register) {
@@ -960,59 +944,66 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume
put_uprobe(uprobe);
}
-/*
- * Of all the nodes that correspond to the given inode, return the node
- * with the least offset.
- */
-static struct rb_node *find_least_offset_node(struct inode *inode)
+static struct rb_node *
+find_node_in_range(struct inode *inode, loff_t min, loff_t max)
{
- struct uprobe u = { .inode = inode, .offset = 0};
struct rb_node *n = uprobes_tree.rb_node;
- struct rb_node *close_node = NULL;
- struct uprobe *uprobe;
- int match;
while (n) {
- uprobe = rb_entry(n, struct uprobe, rb_node);
- match = match_uprobe(&u, uprobe);
-
- if (uprobe->inode == inode)
- close_node = n;
-
- if (!match)
- return close_node;
+ struct uprobe *u = rb_entry(n, struct uprobe, rb_node);
- if (match < 0)
+ if (inode < u->inode) {
n = n->rb_left;
- else
+ } else if (inode > u->inode) {
n = n->rb_right;
+ } else {
+ if (max < u->offset)
+ n = n->rb_left;
+ else if (min > u->offset)
+ n = n->rb_right;
+ else
+ break;
+ }
}
- return close_node;
+ return n;
}
/*
- * For a given inode, build a list of probes that need to be inserted.
+ * For a given range in vma, build a list of probes that need to be inserted.
*/
-static void build_probe_list(struct inode *inode, struct list_head *head)
+static void build_probe_list(struct inode *inode,
+ struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ struct list_head *head)
{
- struct uprobe *uprobe;
+ loff_t min, max;
unsigned long flags;
- struct rb_node *n;
-
- spin_lock_irqsave(&uprobes_treelock, flags);
-
- n = find_least_offset_node(inode);
+ struct rb_node *n, *t;
+ struct uprobe *u;
- for (; n; n = rb_next(n)) {
- uprobe = rb_entry(n, struct uprobe, rb_node);
- if (uprobe->inode != inode)
- break;
+ INIT_LIST_HEAD(head);
+ min = vaddr_to_offset(vma, start);
+ max = min + (end - start) - 1;
- list_add(&uprobe->pending_list, head);
- atomic_inc(&uprobe->ref);
+ spin_lock_irqsave(&uprobes_treelock, flags);
+ n = find_node_in_range(inode, min, max);
+ if (n) {
+ for (t = n; t; t = rb_prev(t)) {
+ u = rb_entry(t, struct uprobe, rb_node);
+ if (u->inode != inode || u->offset < min)
+ break;
+ list_add(&u->pending_list, head);
+ atomic_inc(&u->ref);
+ }
+ for (t = n; (t = rb_next(t)); ) {
+ u = rb_entry(t, struct uprobe, rb_node);
+ if (u->inode != inode || u->offset > max)
+ break;
+ list_add(&u->pending_list, head);
+ atomic_inc(&u->ref);
+ }
}
-
spin_unlock_irqrestore(&uprobes_treelock, flags);
}
@@ -1031,7 +1022,7 @@ static void build_probe_list(struct inode *inode, struct list_head *head)
int uprobe_mmap(struct vm_area_struct *vma)
{
struct list_head tmp_list;
- struct uprobe *uprobe;
+ struct uprobe *uprobe, *u;
struct inode *inode;
int ret, count;
@@ -1042,21 +1033,15 @@ int uprobe_mmap(struct vm_area_struct *vma)
if (!inode)
return 0;
- INIT_LIST_HEAD(&tmp_list);
mutex_lock(uprobes_mmap_hash(inode));
- build_probe_list(inode, &tmp_list);
+ build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list);
ret = 0;
count = 0;
- list_for_each_entry(uprobe, &tmp_list, pending_list) {
+ list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
if (!ret) {
- loff_t vaddr = vma_address(vma, uprobe->offset);
-
- if (vaddr < vma->vm_start || vaddr >= vma->vm_end) {
- put_uprobe(uprobe);
- continue;
- }
+ unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset);
ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr);
/*
@@ -1097,12 +1082,15 @@ int uprobe_mmap(struct vm_area_struct *vma)
void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{
struct list_head tmp_list;
- struct uprobe *uprobe;
+ struct uprobe *uprobe, *u;
struct inode *inode;
if (!atomic_read(&uprobe_events) || !valid_vma(vma, false))
return;
+ if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */
+ return;
+
if (!atomic_read(&vma->vm_mm->uprobes_state.count))
return;
@@ -1110,21 +1098,17 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon
if (!inode)
return;
- INIT_LIST_HEAD(&tmp_list);
mutex_lock(uprobes_mmap_hash(inode));
- build_probe_list(inode, &tmp_list);
-
- list_for_each_entry(uprobe, &tmp_list, pending_list) {
- loff_t vaddr = vma_address(vma, uprobe->offset);
-
- if (vaddr >= start && vaddr < end) {
- /*
- * An unregister could have removed the probe before
- * unmap. So check before we decrement the count.
- */
- if (is_swbp_at_addr(vma->vm_mm, vaddr) == 1)
- atomic_dec(&vma->vm_mm->uprobes_state.count);
- }
+ build_probe_list(inode, vma, start, end, &tmp_list);
+
+ list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
+ unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset);
+ /*
+ * An unregister could have removed the probe before
+ * unmap. So check before we decrement the count.
+ */
+ if (is_swbp_at_addr(vma->vm_mm, vaddr) == 1)
+ atomic_dec(&vma->vm_mm->uprobes_state.count);
put_uprobe(uprobe);
}
mutex_unlock(uprobes_mmap_hash(inode));
@@ -1463,12 +1447,9 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
vma = find_vma(mm, bp_vaddr);
if (vma && vma->vm_start <= bp_vaddr) {
if (valid_vma(vma, false)) {
- struct inode *inode;
- loff_t offset;
+ struct inode *inode = vma->vm_file->f_mapping->host;
+ loff_t offset = vaddr_to_offset(vma, bp_vaddr);
- inode = vma->vm_file->f_mapping->host;
- offset = bp_vaddr - vma->vm_start;
- offset += (vma->vm_pgoff << PAGE_SHIFT);
uprobe = find_uprobe(inode, offset);
}
diff --git a/kernel/fork.c b/kernel/fork.c
index ff1cad3b7bd..3bd2280d79f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -114,6 +114,10 @@ int nr_processes(void)
return total;
}
+void __weak arch_release_task_struct(struct task_struct *tsk)
+{
+}
+
#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
static struct kmem_cache *task_struct_cachep;
@@ -122,17 +126,17 @@ static inline struct task_struct *alloc_task_struct_node(int node)
return kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node);
}
-void __weak arch_release_task_struct(struct task_struct *tsk) { }
-
static inline void free_task_struct(struct task_struct *tsk)
{
- arch_release_task_struct(tsk);
kmem_cache_free(task_struct_cachep, tsk);
}
#endif
+void __weak arch_release_thread_info(struct thread_info *ti)
+{
+}
+
#ifndef CONFIG_ARCH_THREAD_INFO_ALLOCATOR
-void __weak arch_release_thread_info(struct thread_info *ti) { }
/*
* Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
@@ -150,7 +154,6 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
static inline void free_thread_info(struct thread_info *ti)
{
- arch_release_thread_info(ti);
free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
}
# else
@@ -164,7 +167,6 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
static void free_thread_info(struct thread_info *ti)
{
- arch_release_thread_info(ti);
kmem_cache_free(thread_info_cache, ti);
}
@@ -205,10 +207,12 @@ static void account_kernel_stack(struct thread_info *ti, int account)
void free_task(struct task_struct *tsk)
{
account_kernel_stack(tsk->stack, -1);
+ arch_release_thread_info(tsk->stack);
free_thread_info(tsk->stack);
rt_mutex_debug_task_free(tsk);
ftrace_graph_exit_task(tsk);
put_seccomp_filter(tsk);
+ arch_release_task_struct(tsk);
free_task_struct(tsk);
}
EXPORT_SYMBOL(free_task);
@@ -298,23 +302,16 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
return NULL;
ti = alloc_thread_info_node(tsk, node);
- if (!ti) {
- free_task_struct(tsk);
- return NULL;
- }
+ if (!ti)
+ goto free_tsk;
err = arch_dup_task_struct(tsk, orig);
+ if (err)
+ goto free_ti;
- /*
- * We defer looking at err, because we will need this setup
- * for the clean up path to work correctly.
- */
tsk->stack = ti;
- setup_thread_stack(tsk, orig);
-
- if (err)
- goto out;
+ setup_thread_stack(tsk, orig);
clear_user_return_notifier(tsk);
clear_tsk_need_resched(tsk);
stackend = end_of_stack(tsk);
@@ -338,8 +335,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
return tsk;
-out:
+free_ti:
free_thread_info(ti);
+free_tsk:
free_task_struct(tsk);
return NULL;
}
@@ -383,16 +381,14 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
struct file *file;
if (mpnt->vm_flags & VM_DONTCOPY) {
- long pages = vma_pages(mpnt);
- mm->total_vm -= pages;
vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file,
- -pages);
+ -vma_pages(mpnt));
continue;
}
charge = 0;
if (mpnt->vm_flags & VM_ACCOUNT) {
- unsigned long len;
- len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+ unsigned long len = vma_pages(mpnt);
+
if (security_vm_enough_memory_mm(oldmm, len)) /* sic */
goto fail_nomem;
charge = len;
@@ -1310,7 +1306,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
#ifdef CONFIG_DEBUG_MUTEXES
p->blocked_on = NULL; /* not blocked yet */
#endif
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_MEMCG
p->memcg_batch.do_batch = 0;
p->memcg_batch.memcg = NULL;
#endif
diff --git a/kernel/futex.c b/kernel/futex.c
index e2b0fb9a0b3..3717e7b306e 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2231,11 +2231,11 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
* @uaddr2: the pi futex we will take prior to returning to user-space
*
* The caller will wait on uaddr and will be requeued by futex_requeue() to
- * uaddr2 which must be PI aware. Normal wakeup will wake on uaddr2 and
- * complete the acquisition of the rt_mutex prior to returning to userspace.
- * This ensures the rt_mutex maintains an owner when it has waiters; without
- * one, the pi logic wouldn't know which task to boost/deboost, if there was a
- * need to.
+ * uaddr2 which must be PI aware and unique from uaddr. Normal wakeup will wake
+ * on uaddr2 and complete the acquisition of the rt_mutex prior to returning to
+ * userspace. This ensures the rt_mutex maintains an owner when it has waiters;
+ * without one, the pi logic would not know which task to boost/deboost, if
+ * there was a need to.
*
* We call schedule in futex_wait_queue_me() when we enqueue and return there
* via the following:
@@ -2272,6 +2272,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
struct futex_q q = futex_q_init;
int res, ret;
+ if (uaddr == uaddr2)
+ return -EINVAL;
+
if (!bitset)
return -EINVAL;
@@ -2343,7 +2346,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
* signal. futex_unlock_pi() will not destroy the lock_ptr nor
* the pi_state.
*/
- WARN_ON(!&q.pi_state);
+ WARN_ON(!q.pi_state);
pi_mutex = &q.pi_state->pi_mutex;
ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
debug_rt_mutex_free_waiter(&rt_waiter);
@@ -2370,7 +2373,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
* fault, unlock the rt_mutex and return the fault to userspace.
*/
if (ret == -EFAULT) {
- if (rt_mutex_owner(pi_mutex) == current)
+ if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
rt_mutex_unlock(pi_mutex);
} else if (ret == -EINTR) {
/*
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index bdb18032555..131ca176b49 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -133,7 +133,7 @@ irqreturn_t
handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
{
irqreturn_t retval = IRQ_NONE;
- unsigned int random = 0, irq = desc->irq_data.irq;
+ unsigned int flags = 0, irq = desc->irq_data.irq;
do {
irqreturn_t res;
@@ -161,7 +161,7 @@ handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
/* Fall through to add to randomness */
case IRQ_HANDLED:
- random |= action->flags;
+ flags |= action->flags;
break;
default:
@@ -172,8 +172,7 @@ handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
action = action->next;
} while (action);
- if (random & IRQF_SAMPLE_RANDOM)
- add_interrupt_randomness(irq);
+ add_interrupt_randomness(irq, flags);
if (!noirqdebug)
note_interrupt(irq, desc, retval);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 38c5eb839c9..49a77727db4 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -10,6 +10,7 @@
#include <linux/mutex.h>
#include <linux/of.h>
#include <linux/of_address.h>
+#include <linux/topology.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/smp.h>
@@ -45,7 +46,8 @@ static struct irq_domain *irq_domain_alloc(struct device_node *of_node,
{
struct irq_domain *domain;
- domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+ domain = kzalloc_node(sizeof(*domain), GFP_KERNEL,
+ of_node_to_nid(of_node));
if (WARN_ON(!domain))
return NULL;
@@ -138,6 +140,36 @@ static unsigned int irq_domain_legacy_revmap(struct irq_domain *domain,
}
/**
+ * irq_domain_add_simple() - Allocate and register a simple irq_domain.
+ * @of_node: pointer to interrupt controller's device tree node.
+ * @size: total number of irqs in mapping
+ * @first_irq: first number of irq block assigned to the domain
+ * @ops: map/unmap domain callbacks
+ * @host_data: Controller private data pointer
+ *
+ * Allocates a legacy irq_domain if irq_base is positive or a linear
+ * domain otherwise.
+ *
+ * This is intended to implement the expected behaviour for most
+ * interrupt controllers which is that a linear mapping should
+ * normally be used unless the system requires a legacy mapping in
+ * order to support supplying interrupt numbers during non-DT
+ * registration of devices.
+ */
+struct irq_domain *irq_domain_add_simple(struct device_node *of_node,
+ unsigned int size,
+ unsigned int first_irq,
+ const struct irq_domain_ops *ops,
+ void *host_data)
+{
+ if (first_irq > 0)
+ return irq_domain_add_legacy(of_node, size, first_irq, 0,
+ ops, host_data);
+ else
+ return irq_domain_add_linear(of_node, size, ops, host_data);
+}
+
+/**
* irq_domain_add_legacy() - Allocate and register a legacy revmap irq_domain.
* @of_node: pointer to interrupt controller's device tree node.
* @size: total number of irqs in legacy mapping
@@ -203,7 +235,8 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node,
* one can then use irq_create_mapping() to
* explicitly change them
*/
- ops->map(domain, irq, hwirq);
+ if (ops->map)
+ ops->map(domain, irq, hwirq);
/* Clear norequest flags */
irq_clear_status_flags(irq, IRQ_NOREQUEST);
@@ -215,7 +248,7 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node,
EXPORT_SYMBOL_GPL(irq_domain_add_legacy);
/**
- * irq_domain_add_linear() - Allocate and register a legacy revmap irq_domain.
+ * irq_domain_add_linear() - Allocate and register a linear revmap irq_domain.
* @of_node: pointer to interrupt controller's device tree node.
* @size: Number of interrupts in the domain.
* @ops: map/unmap domain callbacks
@@ -229,7 +262,8 @@ struct irq_domain *irq_domain_add_linear(struct device_node *of_node,
struct irq_domain *domain;
unsigned int *revmap;
- revmap = kzalloc(sizeof(*revmap) * size, GFP_KERNEL);
+ revmap = kzalloc_node(sizeof(*revmap) * size, GFP_KERNEL,
+ of_node_to_nid(of_node));
if (WARN_ON(!revmap))
return NULL;
@@ -330,24 +364,112 @@ void irq_set_default_host(struct irq_domain *domain)
}
EXPORT_SYMBOL_GPL(irq_set_default_host);
-static int irq_setup_virq(struct irq_domain *domain, unsigned int virq,
- irq_hw_number_t hwirq)
+static void irq_domain_disassociate_many(struct irq_domain *domain,
+ unsigned int irq_base, int count)
{
- struct irq_data *irq_data = irq_get_irq_data(virq);
+ /*
+ * disassociate in reverse order;
+ * not strictly necessary, but nice for unwinding
+ */
+ while (count--) {
+ int irq = irq_base + count;
+ struct irq_data *irq_data = irq_get_irq_data(irq);
+ irq_hw_number_t hwirq = irq_data->hwirq;
+
+ if (WARN_ON(!irq_data || irq_data->domain != domain))
+ continue;
+
+ irq_set_status_flags(irq, IRQ_NOREQUEST);
+
+ /* remove chip and handler */
+ irq_set_chip_and_handler(irq, NULL, NULL);
+
+ /* Make sure it's completed */
+ synchronize_irq(irq);
+
+ /* Tell the PIC about it */
+ if (domain->ops->unmap)
+ domain->ops->unmap(domain, irq);
+ smp_mb();
- irq_data->hwirq = hwirq;
- irq_data->domain = domain;
- if (domain->ops->map(domain, virq, hwirq)) {
- pr_debug("irq-%i==>hwirq-0x%lx mapping failed\n", virq, hwirq);
irq_data->domain = NULL;
irq_data->hwirq = 0;
- return -1;
+
+ /* Clear reverse map */
+ switch(domain->revmap_type) {
+ case IRQ_DOMAIN_MAP_LINEAR:
+ if (hwirq < domain->revmap_data.linear.size)
+ domain->revmap_data.linear.revmap[hwirq] = 0;
+ break;
+ case IRQ_DOMAIN_MAP_TREE:
+ mutex_lock(&revmap_trees_mutex);
+ radix_tree_delete(&domain->revmap_data.tree, hwirq);
+ mutex_unlock(&revmap_trees_mutex);
+ break;
+ }
}
+}
+
+int irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base,
+ irq_hw_number_t hwirq_base, int count)
+{
+ unsigned int virq = irq_base;
+ irq_hw_number_t hwirq = hwirq_base;
+ int i, ret;
+
+ pr_debug("%s(%s, irqbase=%i, hwbase=%i, count=%i)\n", __func__,
+ of_node_full_name(domain->of_node), irq_base, (int)hwirq_base, count);
+
+ for (i = 0; i < count; i++) {
+ struct irq_data *irq_data = irq_get_irq_data(virq + i);
+
+ if (WARN(!irq_data, "error: irq_desc not allocated; "
+ "irq=%i hwirq=0x%x\n", virq + i, (int)hwirq + i))
+ return -EINVAL;
+ if (WARN(irq_data->domain, "error: irq_desc already associated; "
+ "irq=%i hwirq=0x%x\n", virq + i, (int)hwirq + i))
+ return -EINVAL;
+ };
+
+ for (i = 0; i < count; i++, virq++, hwirq++) {
+ struct irq_data *irq_data = irq_get_irq_data(virq);
+
+ irq_data->hwirq = hwirq;
+ irq_data->domain = domain;
+ if (domain->ops->map) {
+ ret = domain->ops->map(domain, virq, hwirq);
+ if (ret != 0) {
+ pr_err("irq-%i==>hwirq-0x%lx mapping failed: %d\n",
+ virq, hwirq, ret);
+ WARN_ON(1);
+ irq_data->domain = NULL;
+ irq_data->hwirq = 0;
+ goto err_unmap;
+ }
+ }
- irq_clear_status_flags(virq, IRQ_NOREQUEST);
+ switch (domain->revmap_type) {
+ case IRQ_DOMAIN_MAP_LINEAR:
+ if (hwirq < domain->revmap_data.linear.size)
+ domain->revmap_data.linear.revmap[hwirq] = virq;
+ break;
+ case IRQ_DOMAIN_MAP_TREE:
+ mutex_lock(&revmap_trees_mutex);
+ radix_tree_insert(&domain->revmap_data.tree, hwirq, irq_data);
+ mutex_unlock(&revmap_trees_mutex);
+ break;
+ }
+
+ irq_clear_status_flags(virq, IRQ_NOREQUEST);
+ }
return 0;
+
+ err_unmap:
+ irq_domain_disassociate_many(domain, irq_base, i);
+ return -EINVAL;
}
+EXPORT_SYMBOL_GPL(irq_domain_associate_many);
/**
* irq_create_direct_mapping() - Allocate an irq for direct mapping
@@ -364,10 +486,10 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain)
if (domain == NULL)
domain = irq_default_domain;
- BUG_ON(domain == NULL);
- WARN_ON(domain->revmap_type != IRQ_DOMAIN_MAP_NOMAP);
+ if (WARN_ON(!domain || domain->revmap_type != IRQ_DOMAIN_MAP_NOMAP))
+ return 0;
- virq = irq_alloc_desc_from(1, 0);
+ virq = irq_alloc_desc_from(1, of_node_to_nid(domain->of_node));
if (!virq) {
pr_debug("create_direct virq allocation failed\n");
return 0;
@@ -380,7 +502,7 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain)
}
pr_debug("create_direct obtained virq %d\n", virq);
- if (irq_setup_virq(domain, virq, virq)) {
+ if (irq_domain_associate(domain, virq, virq)) {
irq_free_desc(virq);
return 0;
}
@@ -433,17 +555,16 @@ unsigned int irq_create_mapping(struct irq_domain *domain,
hint = hwirq % nr_irqs;
if (hint == 0)
hint++;
- virq = irq_alloc_desc_from(hint, 0);
+ virq = irq_alloc_desc_from(hint, of_node_to_nid(domain->of_node));
if (virq <= 0)
- virq = irq_alloc_desc_from(1, 0);
+ virq = irq_alloc_desc_from(1, of_node_to_nid(domain->of_node));
if (virq <= 0) {
pr_debug("-> virq allocation failed\n");
return 0;
}
- if (irq_setup_virq(domain, virq, hwirq)) {
- if (domain->revmap_type != IRQ_DOMAIN_MAP_LEGACY)
- irq_free_desc(virq);
+ if (irq_domain_associate(domain, virq, hwirq)) {
+ irq_free_desc(virq);
return 0;
}
@@ -454,6 +575,44 @@ unsigned int irq_create_mapping(struct irq_domain *domain,
}
EXPORT_SYMBOL_GPL(irq_create_mapping);
+/**
+ * irq_create_strict_mappings() - Map a range of hw irqs to fixed linux irqs
+ * @domain: domain owning the interrupt range
+ * @irq_base: beginning of linux IRQ range
+ * @hwirq_base: beginning of hardware IRQ range
+ * @count: Number of interrupts to map
+ *
+ * This routine is used for allocating and mapping a range of hardware
+ * irqs to linux irqs where the linux irq numbers are at pre-defined
+ * locations. For use by controllers that already have static mappings
+ * to insert in to the domain.
+ *
+ * Non-linear users can use irq_create_identity_mapping() for IRQ-at-a-time
+ * domain insertion.
+ *
+ * 0 is returned upon success, while any failure to establish a static
+ * mapping is treated as an error.
+ */
+int irq_create_strict_mappings(struct irq_domain *domain, unsigned int irq_base,
+ irq_hw_number_t hwirq_base, int count)
+{
+ int ret;
+
+ ret = irq_alloc_descs(irq_base, irq_base, count,
+ of_node_to_nid(domain->of_node));
+ if (unlikely(ret < 0))
+ return ret;
+
+ ret = irq_domain_associate_many(domain, irq_base, hwirq_base, count);
+ if (unlikely(ret < 0)) {
+ irq_free_descs(irq_base, count);
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(irq_create_strict_mappings);
+
unsigned int irq_create_of_mapping(struct device_node *controller,
const u32 *intspec, unsigned int intsize)
{
@@ -511,7 +670,6 @@ void irq_dispose_mapping(unsigned int virq)
{
struct irq_data *irq_data = irq_get_irq_data(virq);
struct irq_domain *domain;
- irq_hw_number_t hwirq;
if (!virq || !irq_data)
return;
@@ -524,33 +682,7 @@ void irq_dispose_mapping(unsigned int virq)
if (domain->revmap_type == IRQ_DOMAIN_MAP_LEGACY)
return;
- irq_set_status_flags(virq, IRQ_NOREQUEST);
-
- /* remove chip and handler */
- irq_set_chip_and_handler(virq, NULL, NULL);
-
- /* Make sure it's completed */
- synchronize_irq(virq);
-
- /* Tell the PIC about it */
- if (domain->ops->unmap)
- domain->ops->unmap(domain, virq);
- smp_mb();
-
- /* Clear reverse map */
- hwirq = irq_data->hwirq;
- switch(domain->revmap_type) {
- case IRQ_DOMAIN_MAP_LINEAR:
- if (hwirq < domain->revmap_data.linear.size)
- domain->revmap_data.linear.revmap[hwirq] = 0;
- break;
- case IRQ_DOMAIN_MAP_TREE:
- mutex_lock(&revmap_trees_mutex);
- radix_tree_delete(&domain->revmap_data.tree, hwirq);
- mutex_unlock(&revmap_trees_mutex);
- break;
- }
-
+ irq_domain_disassociate_many(domain, virq, 1);
irq_free_desc(virq);
}
EXPORT_SYMBOL_GPL(irq_dispose_mapping);
@@ -559,16 +691,11 @@ EXPORT_SYMBOL_GPL(irq_dispose_mapping);
* irq_find_mapping() - Find a linux irq from an hw irq number.
* @domain: domain owning this hardware interrupt
* @hwirq: hardware irq number in that domain space
- *
- * This is a slow path, for use by generic code. It's expected that an
- * irq controller implementation directly calls the appropriate low level
- * mapping function.
*/
unsigned int irq_find_mapping(struct irq_domain *domain,
irq_hw_number_t hwirq)
{
- unsigned int i;
- unsigned int hint = hwirq % nr_irqs;
+ struct irq_data *data;
/* Look for default domain if nececssary */
if (domain == NULL)
@@ -576,115 +703,47 @@ unsigned int irq_find_mapping(struct irq_domain *domain,
if (domain == NULL)
return 0;
- /* legacy -> bail early */
- if (domain->revmap_type == IRQ_DOMAIN_MAP_LEGACY)
+ switch (domain->revmap_type) {
+ case IRQ_DOMAIN_MAP_LEGACY:
return irq_domain_legacy_revmap(domain, hwirq);
-
- /* Slow path does a linear search of the map */
- if (hint == 0)
- hint = 1;
- i = hint;
- do {
- struct irq_data *data = irq_get_irq_data(i);
+ case IRQ_DOMAIN_MAP_LINEAR:
+ return irq_linear_revmap(domain, hwirq);
+ case IRQ_DOMAIN_MAP_TREE:
+ rcu_read_lock();
+ data = radix_tree_lookup(&domain->revmap_data.tree, hwirq);
+ rcu_read_unlock();
+ if (data)
+ return data->irq;
+ break;
+ case IRQ_DOMAIN_MAP_NOMAP:
+ data = irq_get_irq_data(hwirq);
if (data && (data->domain == domain) && (data->hwirq == hwirq))
- return i;
- i++;
- if (i >= nr_irqs)
- i = 1;
- } while(i != hint);
+ return hwirq;
+ break;
+ }
+
return 0;
}
EXPORT_SYMBOL_GPL(irq_find_mapping);
/**
- * irq_radix_revmap_lookup() - Find a linux irq from a hw irq number.
- * @domain: domain owning this hardware interrupt
- * @hwirq: hardware irq number in that domain space
- *
- * This is a fast path, for use by irq controller code that uses radix tree
- * revmaps
- */
-unsigned int irq_radix_revmap_lookup(struct irq_domain *domain,
- irq_hw_number_t hwirq)
-{
- struct irq_data *irq_data;
-
- if (WARN_ON_ONCE(domain->revmap_type != IRQ_DOMAIN_MAP_TREE))
- return irq_find_mapping(domain, hwirq);
-
- /*
- * Freeing an irq can delete nodes along the path to
- * do the lookup via call_rcu.
- */
- rcu_read_lock();
- irq_data = radix_tree_lookup(&domain->revmap_data.tree, hwirq);
- rcu_read_unlock();
-
- /*
- * If found in radix tree, then fine.
- * Else fallback to linear lookup - this should not happen in practice
- * as it means that we failed to insert the node in the radix tree.
- */
- return irq_data ? irq_data->irq : irq_find_mapping(domain, hwirq);
-}
-EXPORT_SYMBOL_GPL(irq_radix_revmap_lookup);
-
-/**
- * irq_radix_revmap_insert() - Insert a hw irq to linux irq number mapping.
- * @domain: domain owning this hardware interrupt
- * @virq: linux irq number
- * @hwirq: hardware irq number in that domain space
- *
- * This is for use by irq controllers that use a radix tree reverse
- * mapping for fast lookup.
- */
-void irq_radix_revmap_insert(struct irq_domain *domain, unsigned int virq,
- irq_hw_number_t hwirq)
-{
- struct irq_data *irq_data = irq_get_irq_data(virq);
-
- if (WARN_ON(domain->revmap_type != IRQ_DOMAIN_MAP_TREE))
- return;
-
- if (virq) {
- mutex_lock(&revmap_trees_mutex);
- radix_tree_insert(&domain->revmap_data.tree, hwirq, irq_data);
- mutex_unlock(&revmap_trees_mutex);
- }
-}
-EXPORT_SYMBOL_GPL(irq_radix_revmap_insert);
-
-/**
* irq_linear_revmap() - Find a linux irq from a hw irq number.
* @domain: domain owning this hardware interrupt
* @hwirq: hardware irq number in that domain space
*
- * This is a fast path, for use by irq controller code that uses linear
- * revmaps. It does fallback to the slow path if the revmap doesn't exist
- * yet and will create the revmap entry with appropriate locking
+ * This is a fast path that can be called directly by irq controller code to
+ * save a handful of instructions.
*/
unsigned int irq_linear_revmap(struct irq_domain *domain,
irq_hw_number_t hwirq)
{
- unsigned int *revmap;
-
- if (WARN_ON_ONCE(domain->revmap_type != IRQ_DOMAIN_MAP_LINEAR))
- return irq_find_mapping(domain, hwirq);
+ BUG_ON(domain->revmap_type != IRQ_DOMAIN_MAP_LINEAR);
- /* Check revmap bounds */
- if (unlikely(hwirq >= domain->revmap_data.linear.size))
- return irq_find_mapping(domain, hwirq);
-
- /* Check if revmap was allocated */
- revmap = domain->revmap_data.linear.revmap;
- if (unlikely(revmap == NULL))
- return irq_find_mapping(domain, hwirq);
-
- /* Fill up revmap with slow path if no mapping found */
- if (unlikely(!revmap[hwirq]))
- revmap[hwirq] = irq_find_mapping(domain, hwirq);
+ /* Check revmap bounds; complain if exceeded */
+ if (WARN_ON(hwirq >= domain->revmap_data.linear.size))
+ return 0;
- return revmap[hwirq];
+ return domain->revmap_data.linear.revmap[hwirq];
}
EXPORT_SYMBOL_GPL(irq_linear_revmap);
@@ -761,12 +820,6 @@ static int __init irq_debugfs_init(void)
__initcall(irq_debugfs_init);
#endif /* CONFIG_IRQ_DOMAIN_DEBUG */
-static int irq_domain_simple_map(struct irq_domain *d, unsigned int irq,
- irq_hw_number_t hwirq)
-{
- return 0;
-}
-
/**
* irq_domain_xlate_onecell() - Generic xlate for direct one cell bindings
*
@@ -829,7 +882,6 @@ int irq_domain_xlate_onetwocell(struct irq_domain *d,
EXPORT_SYMBOL_GPL(irq_domain_xlate_onetwocell);
const struct irq_domain_ops irq_domain_simple_ops = {
- .map = irq_domain_simple_map,
.xlate = irq_domain_xlate_onetwocell,
};
EXPORT_SYMBOL_GPL(irq_domain_simple_ops);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 814c9ef6bba..4c69326aa77 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -893,22 +893,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
return -ENOSYS;
if (!try_module_get(desc->owner))
return -ENODEV;
- /*
- * Some drivers like serial.c use request_irq() heavily,
- * so we have to be careful not to interfere with a
- * running system.
- */
- if (new->flags & IRQF_SAMPLE_RANDOM) {
- /*
- * This function might sleep, we want to call it first,
- * outside of the atomic block.
- * Yes, this might clear the entropy pool if the wrong
- * driver is attempted to be loaded, without actually
- * installing a new handler, but is this really a problem,
- * only the sysadmin is able to do this.
- */
- rand_initialize_irq(irq);
- }
/*
* Check whether the interrupt nests into another interrupt
@@ -960,6 +944,18 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
}
/*
+ * Drivers are often written to work w/o knowledge about the
+ * underlying irq chip implementation, so a request for a
+ * threaded irq without a primary hard irq context handler
+ * requires the ONESHOT flag to be set. Some irq chips like
+ * MSI based interrupts are per se one shot safe. Check the
+ * chip flags, so we can avoid the unmask dance at the end of
+ * the threaded handler for those.
+ */
+ if (desc->irq_data.chip->flags & IRQCHIP_ONESHOT_SAFE)
+ new->flags &= ~IRQF_ONESHOT;
+
+ /*
* The following block of code has to be executed atomically
*/
raw_spin_lock_irqsave(&desc->lock, flags);
@@ -1033,7 +1029,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
*/
new->thread_mask = 1 << ffz(thread_mask);
- } else if (new->handler == irq_default_primary_handler) {
+ } else if (new->handler == irq_default_primary_handler &&
+ !(desc->irq_data.chip->flags & IRQCHIP_ONESHOT_SAFE)) {
/*
* The interrupt was requested with handler = NULL, so
* we use the default primary handler for it. But it
@@ -1354,7 +1351,6 @@ EXPORT_SYMBOL(free_irq);
* Flags:
*
* IRQF_SHARED Interrupt is shared
- * IRQF_SAMPLE_RANDOM The interrupt can be used for entropy
* IRQF_TRIGGER_* Specify active edge(s) or level
*
*/
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 4e2e472f6ae..0668d58d641 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1424,7 +1424,7 @@ static void update_vmcoreinfo_note(void)
void crash_save_vmcoreinfo(void)
{
- vmcoreinfo_append_str("CRASHTIME=%ld", get_seconds());
+ vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
update_vmcoreinfo_note();
}
diff --git a/kernel/kmod.c b/kernel/kmod.c
index ff2c7cb86d7..6f99aead66c 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -45,6 +45,13 @@ extern int max_threads;
static struct workqueue_struct *khelper_wq;
+/*
+ * kmod_thread_locker is used for deadlock avoidance. There is no explicit
+ * locking to protect this global - it is private to the singleton khelper
+ * thread and should only ever be modified by that thread.
+ */
+static const struct task_struct *kmod_thread_locker;
+
#define CAP_BSET (void *)1
#define CAP_PI (void *)2
@@ -221,6 +228,13 @@ fail:
return 0;
}
+static int call_helper(void *data)
+{
+ /* Worker thread started blocking khelper thread. */
+ kmod_thread_locker = current;
+ return ____call_usermodehelper(data);
+}
+
static void call_usermodehelper_freeinfo(struct subprocess_info *info)
{
if (info->cleanup)
@@ -295,9 +309,12 @@ static void __call_usermodehelper(struct work_struct *work)
if (wait == UMH_WAIT_PROC)
pid = kernel_thread(wait_for_helper, sub_info,
CLONE_FS | CLONE_FILES | SIGCHLD);
- else
- pid = kernel_thread(____call_usermodehelper, sub_info,
+ else {
+ pid = kernel_thread(call_helper, sub_info,
CLONE_VFORK | SIGCHLD);
+ /* Worker thread stopped blocking khelper thread. */
+ kmod_thread_locker = NULL;
+ }
switch (wait) {
case UMH_NO_WAIT:
@@ -548,6 +565,16 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
retval = -EBUSY;
goto out;
}
+ /*
+ * Worker thread must not wait for khelper thread at below
+ * wait_for_completion() if the thread was created with CLONE_VFORK
+ * flag, for khelper thread is already waiting for the thread at
+ * wait_for_completion() in do_fork().
+ */
+ if (wait != UMH_NO_WAIT && current == kmod_thread_locker) {
+ retval = -EBUSY;
+ goto out;
+ }
sub_info->complete = &done;
sub_info->wait = wait;
@@ -577,6 +604,12 @@ unlock:
return retval;
}
+/*
+ * call_usermodehelper_fns() will not run the caller-provided cleanup function
+ * if a memory allocation failure is experienced. So the caller might need to
+ * check the call_usermodehelper_fns() return value: if it is -ENOMEM, perform
+ * the necessaary cleanup within the caller.
+ */
int call_usermodehelper_fns(
char *path, char **argv, char **envp, int wait,
int (*init)(struct subprocess_info *info, struct cred *new),
diff --git a/kernel/panic.c b/kernel/panic.c
index d2a5f4ecc6d..e1b2822fff9 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -75,6 +75,14 @@ void panic(const char *fmt, ...)
int state = 0;
/*
+ * Disable local interrupts. This will prevent panic_smp_self_stop
+ * from deadlocking the first cpu that invokes the panic, since
+ * there is nothing to prevent an interrupt handler (that runs
+ * after the panic_lock is acquired) from invoking panic again.
+ */
+ local_irq_disable();
+
+ /*
* It's possible to come here directly from a panic-assertion and
* not have preempt disabled. Some functions called from here want
* preempt to be disabled. No point enabling it later though...
diff --git a/kernel/printk.c b/kernel/printk.c
index 50c96b5651b..66a2ea37b57 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -389,8 +389,10 @@ static ssize_t devkmsg_writev(struct kiocb *iocb, const struct iovec *iv,
line = buf;
for (i = 0; i < count; i++) {
- if (copy_from_user(line, iv[i].iov_base, iv[i].iov_len))
+ if (copy_from_user(line, iv[i].iov_base, iv[i].iov_len)) {
+ ret = -EFAULT;
goto out;
+ }
line += iv[i].iov_len;
}
@@ -1032,6 +1034,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
struct log *msg = log_from_idx(idx);
len += msg_print_text(msg, prev, true, NULL, 0);
+ prev = msg->flags;
idx = log_next(idx);
seq++;
}
@@ -1044,6 +1047,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
struct log *msg = log_from_idx(idx);
len -= msg_print_text(msg, prev, true, NULL, 0);
+ prev = msg->flags;
idx = log_next(idx);
seq++;
}
@@ -1540,17 +1544,23 @@ asmlinkage int vprintk_emit(int facility, int level,
lflags |= LOG_NEWLINE;
}
- /* strip syslog prefix and extract log level or control flags */
- if (text[0] == '<' && text[1] && text[2] == '>') {
- switch (text[1]) {
- case '0' ... '7':
- if (level == -1)
- level = text[1] - '0';
- case 'd': /* KERN_DEFAULT */
- lflags |= LOG_PREFIX;
- case 'c': /* KERN_CONT */
- text += 3;
- text_len -= 3;
+ /* strip kernel syslog prefix and extract log level or control flags */
+ if (facility == 0) {
+ int kern_level = printk_get_level(text);
+
+ if (kern_level) {
+ const char *end_of_header = printk_skip_level(text);
+ switch (kern_level) {
+ case '0' ... '7':
+ if (level == -1)
+ level = kern_level - '0';
+ case 'd': /* KERN_DEFAULT */
+ lflags |= LOG_PREFIX;
+ case 'c': /* KERN_CONT */
+ break;
+ }
+ text_len -= end_of_header - text;
+ text = (char *)end_of_header;
}
}
diff --git a/kernel/resource.c b/kernel/resource.c
index dc8b4776444..34d45886ee8 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -7,6 +7,8 @@
* Arbitrary resource management.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/export.h>
#include <linux/errno.h>
#include <linux/ioport.h>
@@ -791,8 +793,28 @@ void __init reserve_region_with_split(struct resource *root,
resource_size_t start, resource_size_t end,
const char *name)
{
+ int abort = 0;
+
write_lock(&resource_lock);
- __reserve_region_with_split(root, start, end, name);
+ if (root->start > start || root->end < end) {
+ pr_err("requested range [0x%llx-0x%llx] not in root %pr\n",
+ (unsigned long long)start, (unsigned long long)end,
+ root);
+ if (start > root->end || end < root->start)
+ abort = 1;
+ else {
+ if (end > root->end)
+ end = root->end;
+ if (start < root->start)
+ start = root->start;
+ pr_err("fixing request to [0x%llx-0x%llx]\n",
+ (unsigned long long)start,
+ (unsigned long long)end);
+ }
+ dump_stack();
+ }
+ if (!abort)
+ __reserve_region_with_split(root, start, end, name);
write_unlock(&resource_lock);
}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5d011ef4c0d..fbf1fd098dc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1910,12 +1910,12 @@ static inline void
prepare_task_switch(struct rq *rq, struct task_struct *prev,
struct task_struct *next)
{
+ trace_sched_switch(prev, next);
sched_info_switch(prev, next);
perf_event_task_sched_out(prev, next);
fire_sched_out_preempt_notifiers(prev, next);
prepare_lock_switch(rq, next);
prepare_arch_switch(next);
- trace_sched_switch(prev, next);
}
/**
@@ -3142,6 +3142,20 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs)
#endif
+static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total)
+{
+ u64 temp = (__force u64) rtime;
+
+ temp *= (__force u64) utime;
+
+ if (sizeof(cputime_t) == 4)
+ temp = div_u64(temp, (__force u32) total);
+ else
+ temp = div64_u64(temp, (__force u64) total);
+
+ return (__force cputime_t) temp;
+}
+
void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
{
cputime_t rtime, utime = p->utime, total = utime + p->stime;
@@ -3151,13 +3165,9 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
*/
rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
- if (total) {
- u64 temp = (__force u64) rtime;
-
- temp *= (__force u64) utime;
- do_div(temp, (__force u32) total);
- utime = (__force cputime_t) temp;
- } else
+ if (total)
+ utime = scale_utime(utime, rtime, total);
+ else
utime = rtime;
/*
@@ -3184,13 +3194,9 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
total = cputime.utime + cputime.stime;
rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
- if (total) {
- u64 temp = (__force u64) rtime;
-
- temp *= (__force u64) cputime.utime;
- do_div(temp, (__force u32) total);
- utime = (__force cputime_t) temp;
- } else
+ if (total)
+ utime = scale_utime(cputime.utime, rtime, total);
+ else
utime = rtime;
sig->prev_utime = max(sig->prev_utime, utime);
@@ -4340,9 +4346,7 @@ recheck:
*/
if (unlikely(policy == p->policy && (!rt_policy(policy) ||
param->sched_priority == p->rt_priority))) {
-
- __task_rq_unlock(rq);
- raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+ task_rq_unlock(rq, p, &flags);
return 0;
}
@@ -7248,6 +7252,7 @@ int in_sched_functions(unsigned long addr)
#ifdef CONFIG_CGROUP_SCHED
struct task_group root_task_group;
+LIST_HEAD(task_groups);
#endif
DECLARE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index d72586fdf66..23aa789c53e 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -65,8 +65,8 @@ static int convert_prio(int prio)
int cpupri_find(struct cpupri *cp, struct task_struct *p,
struct cpumask *lowest_mask)
{
- int idx = 0;
- int task_pri = convert_prio(p->prio);
+ int idx = 0;
+ int task_pri = convert_prio(p->prio);
if (task_pri >= MAX_RT_PRIO)
return 0;
@@ -137,9 +137,9 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
*/
void cpupri_set(struct cpupri *cp, int cpu, int newpri)
{
- int *currpri = &cp->cpu_to_pri[cpu];
- int oldpri = *currpri;
- int do_mb = 0;
+ int *currpri = &cp->cpu_to_pri[cpu];
+ int oldpri = *currpri;
+ int do_mb = 0;
newpri = convert_prio(newpri);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 22321db6495..c219bf8d704 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3069,6 +3069,9 @@ struct lb_env {
int new_dst_cpu;
enum cpu_idle_type idle;
long imbalance;
+ /* The set of CPUs under consideration for load-balancing */
+ struct cpumask *cpus;
+
unsigned int flags;
unsigned int loop;
@@ -3384,6 +3387,14 @@ static int tg_load_down(struct task_group *tg, void *data)
static void update_h_load(long cpu)
{
+ struct rq *rq = cpu_rq(cpu);
+ unsigned long now = jiffies;
+
+ if (rq->h_load_throttle == now)
+ return;
+
+ rq->h_load_throttle = now;
+
rcu_read_lock();
walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
rcu_read_unlock();
@@ -3653,8 +3664,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
*/
static inline void update_sg_lb_stats(struct lb_env *env,
struct sched_group *group, int load_idx,
- int local_group, const struct cpumask *cpus,
- int *balance, struct sg_lb_stats *sgs)
+ int local_group, int *balance, struct sg_lb_stats *sgs)
{
unsigned long nr_running, max_nr_running, min_nr_running;
unsigned long load, max_cpu_load, min_cpu_load;
@@ -3671,7 +3681,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
max_nr_running = 0;
min_nr_running = ~0UL;
- for_each_cpu_and(i, sched_group_cpus(group), cpus) {
+ for_each_cpu_and(i, sched_group_cpus(group), env->cpus) {
struct rq *rq = cpu_rq(i);
nr_running = rq->nr_running;
@@ -3800,8 +3810,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
* @sds: variable to hold the statistics for this sched_domain.
*/
static inline void update_sd_lb_stats(struct lb_env *env,
- const struct cpumask *cpus,
- int *balance, struct sd_lb_stats *sds)
+ int *balance, struct sd_lb_stats *sds)
{
struct sched_domain *child = env->sd->child;
struct sched_group *sg = env->sd->groups;
@@ -3818,8 +3827,7 @@ static inline void update_sd_lb_stats(struct lb_env *env,
local_group = cpumask_test_cpu(env->dst_cpu, sched_group_cpus(sg));
memset(&sgs, 0, sizeof(sgs));
- update_sg_lb_stats(env, sg, load_idx, local_group,
- cpus, balance, &sgs);
+ update_sg_lb_stats(env, sg, load_idx, local_group, balance, &sgs);
if (local_group && !(*balance))
return;
@@ -4055,7 +4063,6 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
* to restore balance.
*
* @env: The load balancing environment.
- * @cpus: The set of CPUs under consideration for load-balancing.
* @balance: Pointer to a variable indicating if this_cpu
* is the appropriate cpu to perform load balancing at this_level.
*
@@ -4065,7 +4072,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
* put to idle by rebalancing its tasks onto our group.
*/
static struct sched_group *
-find_busiest_group(struct lb_env *env, const struct cpumask *cpus, int *balance)
+find_busiest_group(struct lb_env *env, int *balance)
{
struct sd_lb_stats sds;
@@ -4075,7 +4082,7 @@ find_busiest_group(struct lb_env *env, const struct cpumask *cpus, int *balance)
* Compute the various statistics relavent for load balancing at
* this level.
*/
- update_sd_lb_stats(env, cpus, balance, &sds);
+ update_sd_lb_stats(env, balance, &sds);
/*
* this_cpu is not the appropriate cpu to perform load balancing at
@@ -4155,8 +4162,7 @@ ret:
* find_busiest_queue - find the busiest runqueue among the cpus in group.
*/
static struct rq *find_busiest_queue(struct lb_env *env,
- struct sched_group *group,
- const struct cpumask *cpus)
+ struct sched_group *group)
{
struct rq *busiest = NULL, *rq;
unsigned long max_load = 0;
@@ -4171,7 +4177,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
if (!capacity)
capacity = fix_small_capacity(env->sd, group);
- if (!cpumask_test_cpu(i, cpus))
+ if (!cpumask_test_cpu(i, env->cpus))
continue;
rq = cpu_rq(i);
@@ -4252,6 +4258,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
.dst_grpmask = sched_group_cpus(sd->groups),
.idle = idle,
.loop_break = sched_nr_migrate_break,
+ .cpus = cpus,
};
cpumask_copy(cpus, cpu_active_mask);
@@ -4260,7 +4267,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
schedstat_inc(sd, lb_count[idle]);
redo:
- group = find_busiest_group(&env, cpus, balance);
+ group = find_busiest_group(&env, balance);
if (*balance == 0)
goto out_balanced;
@@ -4270,7 +4277,7 @@ redo:
goto out_balanced;
}
- busiest = find_busiest_queue(&env, group, cpus);
+ busiest = find_busiest_queue(&env, group);
if (!busiest) {
schedstat_inc(sd, lb_nobusyq[idle]);
goto out_balanced;
@@ -4294,11 +4301,10 @@ redo:
env.src_rq = busiest;
env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running);
+ update_h_load(env.src_cpu);
more_balance:
local_irq_save(flags);
double_rq_lock(this_rq, busiest);
- if (!env.loop)
- update_h_load(env.src_cpu);
/*
* cur_ld_moved - load moved in current iteration
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 573e1ca0110..944cb68420e 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -788,6 +788,19 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
const struct cpumask *span;
span = sched_rt_period_mask();
+#ifdef CONFIG_RT_GROUP_SCHED
+ /*
+ * FIXME: isolated CPUs should really leave the root task group,
+ * whether they are isolcpus or were isolated via cpusets, lest
+ * the timer run on a CPU which does not service all runqueues,
+ * potentially leaving other CPUs indefinitely throttled. If
+ * isolation is really required, the user will turn the throttle
+ * off to kill the perturbations it causes anyway. Meanwhile,
+ * this maintains functionality for boot and/or troubleshooting.
+ */
+ if (rt_b == &root_task_group.rt_bandwidth)
+ span = cpu_online_mask;
+#endif
for_each_cpu(i, span) {
int enqueue = 0;
struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c35a1a7dd4d..f6714d009e7 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -80,7 +80,7 @@ extern struct mutex sched_domains_mutex;
struct cfs_rq;
struct rt_rq;
-static LIST_HEAD(task_groups);
+extern struct list_head task_groups;
struct cfs_bandwidth {
#ifdef CONFIG_CFS_BANDWIDTH
@@ -374,7 +374,11 @@ struct rq {
#ifdef CONFIG_FAIR_GROUP_SCHED
/* list of leaf cfs_rq on this cpu: */
struct list_head leaf_cfs_rq_list;
-#endif
+#ifdef CONFIG_SMP
+ unsigned long h_load_throttle;
+#endif /* CONFIG_SMP */
+#endif /* CONFIG_FAIR_GROUP_SCHED */
+
#ifdef CONFIG_RT_GROUP_SCHED
struct list_head leaf_rt_rq_list;
#endif
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index 7b386e86fd2..da5eb5bed84 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -27,8 +27,10 @@ static struct task_struct *pick_next_task_stop(struct rq *rq)
{
struct task_struct *stop = rq->stop;
- if (stop && stop->on_rq)
+ if (stop && stop->on_rq) {
+ stop->se.exec_start = rq->clock_task;
return stop;
+ }
return NULL;
}
@@ -52,6 +54,21 @@ static void yield_task_stop(struct rq *rq)
static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
{
+ struct task_struct *curr = rq->curr;
+ u64 delta_exec;
+
+ delta_exec = rq->clock_task - curr->se.exec_start;
+ if (unlikely((s64)delta_exec < 0))
+ delta_exec = 0;
+
+ schedstat_set(curr->se.statistics.exec_max,
+ max(curr->se.statistics.exec_max, delta_exec));
+
+ curr->se.sum_exec_runtime += delta_exec;
+ account_group_exec_runtime(curr, delta_exec);
+
+ curr->se.exec_start = rq->clock_task;
+ cpuacct_charge(curr, delta_exec);
}
static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued)
@@ -60,6 +77,9 @@ static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued)
static void set_curr_task_stop(struct rq *rq)
{
+ struct task_struct *stop = rq->stop;
+
+ stop->se.exec_start = rq->clock_task;
}
static void switched_to_stop(struct rq *rq, struct task_struct *p)
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 671f9594e36..b73e681df09 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -210,6 +210,14 @@ asmlinkage void __do_softirq(void)
__u32 pending;
int max_restart = MAX_SOFTIRQ_RESTART;
int cpu;
+ unsigned long old_flags = current->flags;
+
+ /*
+ * Mask out PF_MEMALLOC s current task context is borrowed for the
+ * softirq. A softirq handled such as network RX might set PF_MEMALLOC
+ * again if the socket is related to swap
+ */
+ current->flags &= ~PF_MEMALLOC;
pending = local_softirq_pending();
account_system_vtime(current);
@@ -265,6 +273,7 @@ restart:
account_system_vtime(current);
__local_bh_enable(SOFTIRQ_OFFSET);
+ tsk_restore_flags(current, old_flags, PF_MEMALLOC);
}
#ifndef __ARCH_HAS_DO_SOFTIRQ
diff --git a/kernel/sys.c b/kernel/sys.c
index 2d39a84cd85..241507f23ec 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2015,7 +2015,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
break;
}
me->pdeath_signal = arg2;
- error = 0;
break;
case PR_GET_PDEATHSIG:
error = put_user(me->pdeath_signal, (int __user *)arg2);
@@ -2029,7 +2028,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
break;
}
set_dumpable(me->mm, arg2);
- error = 0;
break;
case PR_SET_UNALIGN:
@@ -2056,10 +2054,7 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_SET_TIMING:
if (arg2 != PR_TIMING_STATISTICAL)
error = -EINVAL;
- else
- error = 0;
break;
-
case PR_SET_NAME:
comm[sizeof(me->comm)-1] = 0;
if (strncpy_from_user(comm, (char __user *)arg2,
@@ -2067,20 +2062,19 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
return -EFAULT;
set_task_comm(me, comm);
proc_comm_connector(me);
- return 0;
+ break;
case PR_GET_NAME:
get_task_comm(comm, me);
if (copy_to_user((char __user *)arg2, comm,
sizeof(comm)))
return -EFAULT;
- return 0;
+ break;
case PR_GET_ENDIAN:
error = GET_ENDIAN(me, arg2);
break;
case PR_SET_ENDIAN:
error = SET_ENDIAN(me, arg2);
break;
-
case PR_GET_SECCOMP:
error = prctl_get_seccomp();
break;
@@ -2108,7 +2102,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
current->default_timer_slack_ns;
else
current->timer_slack_ns = arg2;
- error = 0;
break;
case PR_MCE_KILL:
if (arg4 | arg5)
@@ -2134,7 +2127,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
default:
return -EINVAL;
}
- error = 0;
break;
case PR_MCE_KILL_GET:
if (arg2 | arg3 | arg4 | arg5)
@@ -2153,7 +2145,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
break;
case PR_SET_CHILD_SUBREAPER:
me->signal->is_child_subreaper = !!arg2;
- error = 0;
break;
case PR_GET_CHILD_SUBREAPER:
error = put_user(me->signal->is_child_subreaper,
@@ -2195,46 +2186,52 @@ static void argv_cleanup(struct subprocess_info *info)
argv_free(info->argv);
}
-/**
- * orderly_poweroff - Trigger an orderly system poweroff
- * @force: force poweroff if command execution fails
- *
- * This may be called from any context to trigger a system shutdown.
- * If the orderly shutdown fails, it will force an immediate shutdown.
- */
-int orderly_poweroff(bool force)
+static int __orderly_poweroff(void)
{
int argc;
- char **argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc);
+ char **argv;
static char *envp[] = {
"HOME=/",
"PATH=/sbin:/bin:/usr/sbin:/usr/bin",
NULL
};
- int ret = -ENOMEM;
+ int ret;
+ argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc);
if (argv == NULL) {
printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n",
__func__, poweroff_cmd);
- goto out;
+ return -ENOMEM;
}
ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_NO_WAIT,
NULL, argv_cleanup, NULL);
-out:
- if (likely(!ret))
- return 0;
-
if (ret == -ENOMEM)
argv_free(argv);
- if (force) {
+ return ret;
+}
+
+/**
+ * orderly_poweroff - Trigger an orderly system poweroff
+ * @force: force poweroff if command execution fails
+ *
+ * This may be called from any context to trigger a system shutdown.
+ * If the orderly shutdown fails, it will force an immediate shutdown.
+ */
+int orderly_poweroff(bool force)
+{
+ int ret = __orderly_poweroff();
+
+ if (ret && force) {
printk(KERN_WARNING "Failed to start orderly shutdown: "
"forcing the issue\n");
- /* I guess this should try to kick off some daemon to
- sync and poweroff asap. Or not even bother syncing
- if we're doing an emergency shutdown? */
+ /*
+ * I guess this should try to kick off some daemon to sync and
+ * poweroff asap. Or not even bother syncing if we're doing an
+ * emergency shutdown?
+ */
emergency_sync();
kernel_power_off();
}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4ab11879aeb..87174ef5916 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -30,6 +30,7 @@
#include <linux/security.h>
#include <linux/ctype.h>
#include <linux/kmemcheck.h>
+#include <linux/kmemleak.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kernel.h>
@@ -174,6 +175,11 @@ static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
#endif
+static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos);
+static int proc_dostring_coredump(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos);
+
#ifdef CONFIG_MAGIC_SYSRQ
/* Note: sysrq code uses it's own private copy */
static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE;
@@ -410,7 +416,7 @@ static struct ctl_table kern_table[] = {
.data = core_pattern,
.maxlen = CORENAME_MAX_SIZE,
.mode = 0644,
- .proc_handler = proc_dostring,
+ .proc_handler = proc_dostring_coredump,
},
{
.procname = "core_pipe_limit",
@@ -1095,11 +1101,9 @@ static struct ctl_table vm_table[] = {
.extra1 = &zero,
},
{
- .procname = "nr_pdflush_threads",
- .data = &nr_pdflush_threads,
- .maxlen = sizeof nr_pdflush_threads,
- .mode = 0444 /* read-only*/,
- .proc_handler = proc_dointvec,
+ .procname = "nr_pdflush_threads",
+ .mode = 0444 /* read-only */,
+ .proc_handler = pdflush_proc_obsolete,
},
{
.procname = "swappiness",
@@ -1494,11 +1498,29 @@ static struct ctl_table fs_table[] = {
#endif
#endif
{
+ .procname = "protected_symlinks",
+ .data = &sysctl_protected_symlinks,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {
+ .procname = "protected_hardlinks",
+ .data = &sysctl_protected_hardlinks,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {
.procname = "suid_dumpable",
.data = &suid_dumpable,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_dointvec_minmax_coredump,
.extra1 = &zero,
.extra2 = &two,
},
@@ -1551,7 +1573,10 @@ static struct ctl_table dev_table[] = {
int __init sysctl_init(void)
{
- register_sysctl_table(sysctl_base_table);
+ struct ctl_table_header *hdr;
+
+ hdr = register_sysctl_table(sysctl_base_table);
+ kmemleak_not_leak(hdr);
return 0;
}
@@ -2009,6 +2034,34 @@ int proc_dointvec_minmax(struct ctl_table *table, int write,
do_proc_dointvec_minmax_conv, &param);
}
+static void validate_coredump_safety(void)
+{
+ if (suid_dumpable == SUID_DUMPABLE_SAFE &&
+ core_pattern[0] != '/' && core_pattern[0] != '|') {
+ printk(KERN_WARNING "Unsafe core_pattern used with "\
+ "suid_dumpable=2. Pipe handler or fully qualified "\
+ "core dump path required.\n");
+ }
+}
+
+static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (!error)
+ validate_coredump_safety();
+ return error;
+}
+
+static int proc_dostring_coredump(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int error = proc_dostring(table, write, buffer, lenp, ppos);
+ if (!error)
+ validate_coredump_safety();
+ return error;
+}
+
static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
void __user *buffer,
size_t *lenp, loff_t *ppos,
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index a650694883a..65bdcf198d4 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -147,7 +147,7 @@ static const struct bin_table bin_vm_table[] = {
{ CTL_INT, VM_DIRTY_RATIO, "dirty_ratio" },
/* VM_DIRTY_WB_CS "dirty_writeback_centisecs" no longer used */
/* VM_DIRTY_EXPIRE_CS "dirty_expire_centisecs" no longer used */
- { CTL_INT, VM_NR_PDFLUSH_THREADS, "nr_pdflush_threads" },
+ /* VM_NR_PDFLUSH_THREADS "nr_pdflush_threads" no longer used */
{ CTL_INT, VM_OVERCOMMIT_RATIO, "overcommit_ratio" },
/* VM_PAGEBUF unused */
/* VM_HUGETLB_PAGES "nr_hugepages" no longer used */
diff --git a/kernel/task_work.c b/kernel/task_work.c
index 91d4e1742a0..d320d44903b 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -75,6 +75,7 @@ void task_work_run(void)
p = q->next;
q->func(q);
q = p;
+ cond_resched();
}
}
}
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index e66046456f4..d0a32796550 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -436,6 +436,11 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS,
sizeof(struct cgroupstats));
+ if (na == NULL) {
+ rc = -EMSGSIZE;
+ goto err;
+ }
+
stats = nla_data(na);
memset(stats, 0, sizeof(*stats));
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index a470154e040..46da0537c10 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -37,7 +37,7 @@
* requested HZ value. It is also not recommended
* for "tick-less" systems.
*/
-#define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/ACTHZ))
+#define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/SHIFTED_HZ))
/* Since jiffies uses a simple NSEC_PER_JIFFY multiplier
* conversion, the .shift value could be zero. However
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index b7fbadc5c97..24174b4d669 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -28,7 +28,7 @@ DEFINE_SPINLOCK(ntp_lock);
/* USER_HZ period (usecs): */
unsigned long tick_usec = TICK_USEC;
-/* ACTHZ period (nsecs): */
+/* SHIFTED_HZ period (nsecs): */
unsigned long tick_nsec;
static u64 tick_length;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index f045cc50832..e16af197a2b 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -65,14 +65,14 @@ struct timekeeper {
* used instead.
*/
struct timespec wall_to_monotonic;
- /* time spent in suspend */
- struct timespec total_sleep_time;
- /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */
- struct timespec raw_time;
/* Offset clock monotonic -> clock realtime */
ktime_t offs_real;
+ /* time spent in suspend */
+ struct timespec total_sleep_time;
/* Offset clock monotonic -> clock boottime */
ktime_t offs_boot;
+ /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */
+ struct timespec raw_time;
/* Seqlock for all timekeeper values */
seqlock_t lock;
};
@@ -108,13 +108,38 @@ static struct timespec tk_xtime(struct timekeeper *tk)
static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts)
{
tk->xtime_sec = ts->tv_sec;
- tk->xtime_nsec = ts->tv_nsec << tk->shift;
+ tk->xtime_nsec = (u64)ts->tv_nsec << tk->shift;
}
static void tk_xtime_add(struct timekeeper *tk, const struct timespec *ts)
{
tk->xtime_sec += ts->tv_sec;
- tk->xtime_nsec += ts->tv_nsec << tk->shift;
+ tk->xtime_nsec += (u64)ts->tv_nsec << tk->shift;
+}
+
+static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec wtm)
+{
+ struct timespec tmp;
+
+ /*
+ * Verify consistency of: offset_real = -wall_to_monotonic
+ * before modifying anything
+ */
+ set_normalized_timespec(&tmp, -tk->wall_to_monotonic.tv_sec,
+ -tk->wall_to_monotonic.tv_nsec);
+ WARN_ON_ONCE(tk->offs_real.tv64 != timespec_to_ktime(tmp).tv64);
+ tk->wall_to_monotonic = wtm;
+ set_normalized_timespec(&tmp, -wtm.tv_sec, -wtm.tv_nsec);
+ tk->offs_real = timespec_to_ktime(tmp);
+}
+
+static void tk_set_sleep_time(struct timekeeper *tk, struct timespec t)
+{
+ /* Verify consistency before modifying */
+ WARN_ON_ONCE(tk->offs_boot.tv64 != timespec_to_ktime(tk->total_sleep_time).tv64);
+
+ tk->total_sleep_time = t;
+ tk->offs_boot = timespec_to_ktime(t);
}
/**
@@ -217,14 +242,6 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
return nsec + arch_gettimeoffset();
}
-static void update_rt_offset(struct timekeeper *tk)
-{
- struct timespec tmp, *wtm = &tk->wall_to_monotonic;
-
- set_normalized_timespec(&tmp, -wtm->tv_sec, -wtm->tv_nsec);
- tk->offs_real = timespec_to_ktime(tmp);
-}
-
/* must hold write on timekeeper.lock */
static void timekeeping_update(struct timekeeper *tk, bool clearntp)
{
@@ -234,12 +251,10 @@ static void timekeeping_update(struct timekeeper *tk, bool clearntp)
tk->ntp_error = 0;
ntp_clear();
}
- update_rt_offset(tk);
xt = tk_xtime(tk);
update_vsyscall(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult);
}
-
/**
* timekeeping_forward_now - update clock to the current time
*
@@ -277,18 +292,19 @@ static void timekeeping_forward_now(struct timekeeper *tk)
*/
void getnstimeofday(struct timespec *ts)
{
+ struct timekeeper *tk = &timekeeper;
unsigned long seq;
s64 nsecs = 0;
WARN_ON(timekeeping_suspended);
do {
- seq = read_seqbegin(&timekeeper.lock);
+ seq = read_seqbegin(&tk->lock);
- ts->tv_sec = timekeeper.xtime_sec;
- ts->tv_nsec = timekeeping_get_ns(&timekeeper);
+ ts->tv_sec = tk->xtime_sec;
+ ts->tv_nsec = timekeeping_get_ns(tk);
- } while (read_seqretry(&timekeeper.lock, seq));
+ } while (read_seqretry(&tk->lock, seq));
timespec_add_ns(ts, nsecs);
}
@@ -296,19 +312,18 @@ EXPORT_SYMBOL(getnstimeofday);
ktime_t ktime_get(void)
{
+ struct timekeeper *tk = &timekeeper;
unsigned int seq;
s64 secs, nsecs;
WARN_ON(timekeeping_suspended);
do {
- seq = read_seqbegin(&timekeeper.lock);
- secs = timekeeper.xtime_sec +
- timekeeper.wall_to_monotonic.tv_sec;
- nsecs = timekeeping_get_ns(&timekeeper) +
- timekeeper.wall_to_monotonic.tv_nsec;
+ seq = read_seqbegin(&tk->lock);
+ secs = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
+ nsecs = timekeeping_get_ns(tk) + tk->wall_to_monotonic.tv_nsec;
- } while (read_seqretry(&timekeeper.lock, seq));
+ } while (read_seqretry(&tk->lock, seq));
/*
* Use ktime_set/ktime_add_ns to create a proper ktime on
* 32-bit architectures without CONFIG_KTIME_SCALAR.
@@ -327,18 +342,19 @@ EXPORT_SYMBOL_GPL(ktime_get);
*/
void ktime_get_ts(struct timespec *ts)
{
+ struct timekeeper *tk = &timekeeper;
struct timespec tomono;
unsigned int seq;
WARN_ON(timekeeping_suspended);
do {
- seq = read_seqbegin(&timekeeper.lock);
- ts->tv_sec = timekeeper.xtime_sec;
- ts->tv_nsec = timekeeping_get_ns(&timekeeper);
- tomono = timekeeper.wall_to_monotonic;
+ seq = read_seqbegin(&tk->lock);
+ ts->tv_sec = tk->xtime_sec;
+ ts->tv_nsec = timekeeping_get_ns(tk);
+ tomono = tk->wall_to_monotonic;
- } while (read_seqretry(&timekeeper.lock, seq));
+ } while (read_seqretry(&tk->lock, seq));
set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec,
ts->tv_nsec + tomono.tv_nsec);
@@ -358,22 +374,23 @@ EXPORT_SYMBOL_GPL(ktime_get_ts);
*/
void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
{
+ struct timekeeper *tk = &timekeeper;
unsigned long seq;
s64 nsecs_raw, nsecs_real;
WARN_ON_ONCE(timekeeping_suspended);
do {
- seq = read_seqbegin(&timekeeper.lock);
+ seq = read_seqbegin(&tk->lock);
- *ts_raw = timekeeper.raw_time;
- ts_real->tv_sec = timekeeper.xtime_sec;
+ *ts_raw = tk->raw_time;
+ ts_real->tv_sec = tk->xtime_sec;
ts_real->tv_nsec = 0;
- nsecs_raw = timekeeping_get_ns_raw(&timekeeper);
- nsecs_real = timekeeping_get_ns(&timekeeper);
+ nsecs_raw = timekeeping_get_ns_raw(tk);
+ nsecs_real = timekeeping_get_ns(tk);
- } while (read_seqretry(&timekeeper.lock, seq));
+ } while (read_seqretry(&tk->lock, seq));
timespec_add_ns(ts_raw, nsecs_raw);
timespec_add_ns(ts_real, nsecs_real);
@@ -406,28 +423,28 @@ EXPORT_SYMBOL(do_gettimeofday);
*/
int do_settimeofday(const struct timespec *tv)
{
+ struct timekeeper *tk = &timekeeper;
struct timespec ts_delta, xt;
unsigned long flags;
if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
return -EINVAL;
- write_seqlock_irqsave(&timekeeper.lock, flags);
+ write_seqlock_irqsave(&tk->lock, flags);
- timekeeping_forward_now(&timekeeper);
+ timekeeping_forward_now(tk);
- xt = tk_xtime(&timekeeper);
+ xt = tk_xtime(tk);
ts_delta.tv_sec = tv->tv_sec - xt.tv_sec;
ts_delta.tv_nsec = tv->tv_nsec - xt.tv_nsec;
- timekeeper.wall_to_monotonic =
- timespec_sub(timekeeper.wall_to_monotonic, ts_delta);
+ tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, ts_delta));
- tk_set_xtime(&timekeeper, tv);
+ tk_set_xtime(tk, tv);
- timekeeping_update(&timekeeper, true);
+ timekeeping_update(tk, true);
- write_sequnlock_irqrestore(&timekeeper.lock, flags);
+ write_sequnlock_irqrestore(&tk->lock, flags);
/* signal hrtimers about time change */
clock_was_set();
@@ -436,7 +453,6 @@ int do_settimeofday(const struct timespec *tv)
}
EXPORT_SYMBOL(do_settimeofday);
-
/**
* timekeeping_inject_offset - Adds or subtracts from the current time.
* @tv: pointer to the timespec variable containing the offset
@@ -445,23 +461,23 @@ EXPORT_SYMBOL(do_settimeofday);
*/
int timekeeping_inject_offset(struct timespec *ts)
{
+ struct timekeeper *tk = &timekeeper;
unsigned long flags;
if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
return -EINVAL;
- write_seqlock_irqsave(&timekeeper.lock, flags);
+ write_seqlock_irqsave(&tk->lock, flags);
- timekeeping_forward_now(&timekeeper);
+ timekeeping_forward_now(tk);
- tk_xtime_add(&timekeeper, ts);
- timekeeper.wall_to_monotonic =
- timespec_sub(timekeeper.wall_to_monotonic, *ts);
+ tk_xtime_add(tk, ts);
+ tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts));
- timekeeping_update(&timekeeper, true);
+ timekeeping_update(tk, true);
- write_sequnlock_irqrestore(&timekeeper.lock, flags);
+ write_sequnlock_irqrestore(&tk->lock, flags);
/* signal hrtimers about time change */
clock_was_set();
@@ -477,23 +493,24 @@ EXPORT_SYMBOL(timekeeping_inject_offset);
*/
static int change_clocksource(void *data)
{
+ struct timekeeper *tk = &timekeeper;
struct clocksource *new, *old;
unsigned long flags;
new = (struct clocksource *) data;
- write_seqlock_irqsave(&timekeeper.lock, flags);
+ write_seqlock_irqsave(&tk->lock, flags);
- timekeeping_forward_now(&timekeeper);
+ timekeeping_forward_now(tk);
if (!new->enable || new->enable(new) == 0) {
- old = timekeeper.clock;
- tk_setup_internals(&timekeeper, new);
+ old = tk->clock;
+ tk_setup_internals(tk, new);
if (old->disable)
old->disable(old);
}
- timekeeping_update(&timekeeper, true);
+ timekeeping_update(tk, true);
- write_sequnlock_irqrestore(&timekeeper.lock, flags);
+ write_sequnlock_irqrestore(&tk->lock, flags);
return 0;
}
@@ -507,7 +524,9 @@ static int change_clocksource(void *data)
*/
void timekeeping_notify(struct clocksource *clock)
{
- if (timekeeper.clock == clock)
+ struct timekeeper *tk = &timekeeper;
+
+ if (tk->clock == clock)
return;
stop_machine(change_clocksource, clock, NULL);
tick_clock_notify();
@@ -536,35 +555,36 @@ EXPORT_SYMBOL_GPL(ktime_get_real);
*/
void getrawmonotonic(struct timespec *ts)
{
+ struct timekeeper *tk = &timekeeper;
unsigned long seq;
s64 nsecs;
do {
- seq = read_seqbegin(&timekeeper.lock);
- nsecs = timekeeping_get_ns_raw(&timekeeper);
- *ts = timekeeper.raw_time;
+ seq = read_seqbegin(&tk->lock);
+ nsecs = timekeeping_get_ns_raw(tk);
+ *ts = tk->raw_time;
- } while (read_seqretry(&timekeeper.lock, seq));
+ } while (read_seqretry(&tk->lock, seq));
timespec_add_ns(ts, nsecs);
}
EXPORT_SYMBOL(getrawmonotonic);
-
/**
* timekeeping_valid_for_hres - Check if timekeeping is suitable for hres
*/
int timekeeping_valid_for_hres(void)
{
+ struct timekeeper *tk = &timekeeper;
unsigned long seq;
int ret;
do {
- seq = read_seqbegin(&timekeeper.lock);
+ seq = read_seqbegin(&tk->lock);
- ret = timekeeper.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
+ ret = tk->clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
- } while (read_seqretry(&timekeeper.lock, seq));
+ } while (read_seqretry(&tk->lock, seq));
return ret;
}
@@ -574,15 +594,16 @@ int timekeeping_valid_for_hres(void)
*/
u64 timekeeping_max_deferment(void)
{
+ struct timekeeper *tk = &timekeeper;
unsigned long seq;
u64 ret;
do {
- seq = read_seqbegin(&timekeeper.lock);
+ seq = read_seqbegin(&tk->lock);
- ret = timekeeper.clock->max_idle_ns;
+ ret = tk->clock->max_idle_ns;
- } while (read_seqretry(&timekeeper.lock, seq));
+ } while (read_seqretry(&tk->lock, seq));
return ret;
}
@@ -622,46 +643,43 @@ void __attribute__((weak)) read_boot_clock(struct timespec *ts)
*/
void __init timekeeping_init(void)
{
+ struct timekeeper *tk = &timekeeper;
struct clocksource *clock;
unsigned long flags;
- struct timespec now, boot;
+ struct timespec now, boot, tmp;
read_persistent_clock(&now);
read_boot_clock(&boot);
- seqlock_init(&timekeeper.lock);
+ seqlock_init(&tk->lock);
ntp_init();
- write_seqlock_irqsave(&timekeeper.lock, flags);
+ write_seqlock_irqsave(&tk->lock, flags);
clock = clocksource_default_clock();
if (clock->enable)
clock->enable(clock);
- tk_setup_internals(&timekeeper, clock);
+ tk_setup_internals(tk, clock);
- tk_set_xtime(&timekeeper, &now);
- timekeeper.raw_time.tv_sec = 0;
- timekeeper.raw_time.tv_nsec = 0;
+ tk_set_xtime(tk, &now);
+ tk->raw_time.tv_sec = 0;
+ tk->raw_time.tv_nsec = 0;
if (boot.tv_sec == 0 && boot.tv_nsec == 0)
- boot = tk_xtime(&timekeeper);
-
- set_normalized_timespec(&timekeeper.wall_to_monotonic,
- -boot.tv_sec, -boot.tv_nsec);
- update_rt_offset(&timekeeper);
- timekeeper.total_sleep_time.tv_sec = 0;
- timekeeper.total_sleep_time.tv_nsec = 0;
- write_sequnlock_irqrestore(&timekeeper.lock, flags);
+ boot = tk_xtime(tk);
+
+ set_normalized_timespec(&tmp, -boot.tv_sec, -boot.tv_nsec);
+ tk_set_wall_to_mono(tk, tmp);
+
+ tmp.tv_sec = 0;
+ tmp.tv_nsec = 0;
+ tk_set_sleep_time(tk, tmp);
+
+ write_sequnlock_irqrestore(&tk->lock, flags);
}
/* time in seconds when suspend began */
static struct timespec timekeeping_suspend_time;
-static void update_sleep_time(struct timespec t)
-{
- timekeeper.total_sleep_time = t;
- timekeeper.offs_boot = timespec_to_ktime(t);
-}
-
/**
* __timekeeping_inject_sleeptime - Internal function to add sleep interval
* @delta: pointer to a timespec delta value
@@ -677,13 +695,11 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
"sleep delta value!\n");
return;
}
-
tk_xtime_add(tk, delta);
- tk->wall_to_monotonic = timespec_sub(tk->wall_to_monotonic, *delta);
- update_sleep_time(timespec_add(tk->total_sleep_time, *delta));
+ tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *delta));
+ tk_set_sleep_time(tk, timespec_add(tk->total_sleep_time, *delta));
}
-
/**
* timekeeping_inject_sleeptime - Adds suspend interval to timeekeeping values
* @delta: pointer to a timespec delta value
@@ -696,6 +712,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
*/
void timekeeping_inject_sleeptime(struct timespec *delta)
{
+ struct timekeeper *tk = &timekeeper;
unsigned long flags;
struct timespec ts;
@@ -704,21 +721,20 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
if (!(ts.tv_sec == 0 && ts.tv_nsec == 0))
return;
- write_seqlock_irqsave(&timekeeper.lock, flags);
+ write_seqlock_irqsave(&tk->lock, flags);
- timekeeping_forward_now(&timekeeper);
+ timekeeping_forward_now(tk);
- __timekeeping_inject_sleeptime(&timekeeper, delta);
+ __timekeeping_inject_sleeptime(tk, delta);
- timekeeping_update(&timekeeper, true);
+ timekeeping_update(tk, true);
- write_sequnlock_irqrestore(&timekeeper.lock, flags);
+ write_sequnlock_irqrestore(&tk->lock, flags);
/* signal hrtimers about time change */
clock_was_set();
}
-
/**
* timekeeping_resume - Resumes the generic timekeeping subsystem.
*
@@ -728,6 +744,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
*/
static void timekeeping_resume(void)
{
+ struct timekeeper *tk = &timekeeper;
unsigned long flags;
struct timespec ts;
@@ -735,18 +752,18 @@ static void timekeeping_resume(void)
clocksource_resume();
- write_seqlock_irqsave(&timekeeper.lock, flags);
+ write_seqlock_irqsave(&tk->lock, flags);
if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) {
ts = timespec_sub(ts, timekeeping_suspend_time);
- __timekeeping_inject_sleeptime(&timekeeper, &ts);
+ __timekeeping_inject_sleeptime(tk, &ts);
}
/* re-base the last cycle value */
- timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
- timekeeper.ntp_error = 0;
+ tk->clock->cycle_last = tk->clock->read(tk->clock);
+ tk->ntp_error = 0;
timekeeping_suspended = 0;
- timekeeping_update(&timekeeper, false);
- write_sequnlock_irqrestore(&timekeeper.lock, flags);
+ timekeeping_update(tk, false);
+ write_sequnlock_irqrestore(&tk->lock, flags);
touch_softlockup_watchdog();
@@ -758,14 +775,15 @@ static void timekeeping_resume(void)
static int timekeeping_suspend(void)
{
+ struct timekeeper *tk = &timekeeper;
unsigned long flags;
struct timespec delta, delta_delta;
static struct timespec old_delta;
read_persistent_clock(&timekeeping_suspend_time);
- write_seqlock_irqsave(&timekeeper.lock, flags);
- timekeeping_forward_now(&timekeeper);
+ write_seqlock_irqsave(&tk->lock, flags);
+ timekeeping_forward_now(tk);
timekeeping_suspended = 1;
/*
@@ -774,7 +792,7 @@ static int timekeeping_suspend(void)
* try to compensate so the difference in system time
* and persistent_clock time stays close to constant.
*/
- delta = timespec_sub(tk_xtime(&timekeeper), timekeeping_suspend_time);
+ delta = timespec_sub(tk_xtime(tk), timekeeping_suspend_time);
delta_delta = timespec_sub(delta, old_delta);
if (abs(delta_delta.tv_sec) >= 2) {
/*
@@ -787,7 +805,7 @@ static int timekeeping_suspend(void)
timekeeping_suspend_time =
timespec_add(timekeeping_suspend_time, delta_delta);
}
- write_sequnlock_irqrestore(&timekeeper.lock, flags);
+ write_sequnlock_irqrestore(&tk->lock, flags);
clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
clocksource_suspend();
@@ -898,27 +916,29 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
* the error. This causes the likely below to be unlikely.
*
* The proper fix is to avoid rounding up by using
- * the high precision timekeeper.xtime_nsec instead of
+ * the high precision tk->xtime_nsec instead of
* xtime.tv_nsec everywhere. Fixing this will take some
* time.
*/
if (likely(error <= interval))
adj = 1;
else
- adj = timekeeping_bigadjust(tk, error, &interval,
- &offset);
- } else if (error < -interval) {
- /* See comment above, this is just switched for the negative */
- error >>= 2;
- if (likely(error >= -interval)) {
- adj = -1;
- interval = -interval;
- offset = -offset;
- } else
- adj = timekeeping_bigadjust(tk, error, &interval,
- &offset);
- } else
- return;
+ adj = timekeeping_bigadjust(tk, error, &interval, &offset);
+ } else {
+ if (error < -interval) {
+ /* See comment above, this is just switched for the negative */
+ error >>= 2;
+ if (likely(error >= -interval)) {
+ adj = -1;
+ interval = -interval;
+ offset = -offset;
+ } else {
+ adj = timekeeping_bigadjust(tk, error, &interval, &offset);
+ }
+ } else {
+ goto out_adjust;
+ }
+ }
if (unlikely(tk->clock->maxadj &&
(tk->mult + adj > tk->clock->mult + tk->clock->maxadj))) {
@@ -981,6 +1001,7 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
tk->xtime_nsec -= offset;
tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;
+out_adjust:
/*
* It may be possible that when we entered this function, xtime_nsec
* was very small. Further, if we're slightly speeding the clocksource
@@ -1003,7 +1024,6 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
}
-
/**
* accumulate_nsecs_to_secs - Accumulates nsecs into secs
*
@@ -1024,15 +1044,21 @@ static inline void accumulate_nsecs_to_secs(struct timekeeper *tk)
/* Figure out if its a leap sec and apply if needed */
leap = second_overflow(tk->xtime_sec);
- tk->xtime_sec += leap;
- tk->wall_to_monotonic.tv_sec -= leap;
- if (leap)
- clock_was_set_delayed();
+ if (unlikely(leap)) {
+ struct timespec ts;
+
+ tk->xtime_sec += leap;
+
+ ts.tv_sec = leap;
+ ts.tv_nsec = 0;
+ tk_set_wall_to_mono(tk,
+ timespec_sub(tk->wall_to_monotonic, ts));
+ clock_was_set_delayed();
+ }
}
}
-
/**
* logarithmic_accumulation - shifted accumulation of cycles
*
@@ -1076,7 +1102,6 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
return offset;
}
-
/**
* update_wall_time - Uses the current clocksource to increment the wall time
*
@@ -1084,21 +1109,22 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
static void update_wall_time(void)
{
struct clocksource *clock;
+ struct timekeeper *tk = &timekeeper;
cycle_t offset;
int shift = 0, maxshift;
unsigned long flags;
s64 remainder;
- write_seqlock_irqsave(&timekeeper.lock, flags);
+ write_seqlock_irqsave(&tk->lock, flags);
/* Make sure we're fully resumed: */
if (unlikely(timekeeping_suspended))
goto out;
- clock = timekeeper.clock;
+ clock = tk->clock;
#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
- offset = timekeeper.cycle_interval;
+ offset = tk->cycle_interval;
#else
offset = (clock->read(clock) - clock->cycle_last) & clock->mask;
#endif
@@ -1111,19 +1137,19 @@ static void update_wall_time(void)
* chunk in one go, and then try to consume the next smaller
* doubled multiple.
*/
- shift = ilog2(offset) - ilog2(timekeeper.cycle_interval);
+ shift = ilog2(offset) - ilog2(tk->cycle_interval);
shift = max(0, shift);
/* Bound shift to one less than what overflows tick_length */
maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
shift = min(shift, maxshift);
- while (offset >= timekeeper.cycle_interval) {
- offset = logarithmic_accumulation(&timekeeper, offset, shift);
- if(offset < timekeeper.cycle_interval<<shift)
+ while (offset >= tk->cycle_interval) {
+ offset = logarithmic_accumulation(tk, offset, shift);
+ if (offset < tk->cycle_interval<<shift)
shift--;
}
/* correct the clock when NTP error is too big */
- timekeeping_adjust(&timekeeper, offset);
+ timekeeping_adjust(tk, offset);
/*
@@ -1135,21 +1161,21 @@ static void update_wall_time(void)
* the vsyscall implementations are converted to use xtime_nsec
* (shifted nanoseconds), this can be killed.
*/
- remainder = timekeeper.xtime_nsec & ((1 << timekeeper.shift) - 1);
- timekeeper.xtime_nsec -= remainder;
- timekeeper.xtime_nsec += 1 << timekeeper.shift;
- timekeeper.ntp_error += remainder << timekeeper.ntp_error_shift;
+ remainder = tk->xtime_nsec & ((1 << tk->shift) - 1);
+ tk->xtime_nsec -= remainder;
+ tk->xtime_nsec += 1 << tk->shift;
+ tk->ntp_error += remainder << tk->ntp_error_shift;
/*
* Finally, make sure that after the rounding
* xtime_nsec isn't larger than NSEC_PER_SEC
*/
- accumulate_nsecs_to_secs(&timekeeper);
+ accumulate_nsecs_to_secs(tk);
- timekeeping_update(&timekeeper, false);
+ timekeeping_update(tk, false);
out:
- write_sequnlock_irqrestore(&timekeeper.lock, flags);
+ write_sequnlock_irqrestore(&tk->lock, flags);
}
@@ -1166,18 +1192,18 @@ out:
*/
void getboottime(struct timespec *ts)
{
+ struct timekeeper *tk = &timekeeper;
struct timespec boottime = {
- .tv_sec = timekeeper.wall_to_monotonic.tv_sec +
- timekeeper.total_sleep_time.tv_sec,
- .tv_nsec = timekeeper.wall_to_monotonic.tv_nsec +
- timekeeper.total_sleep_time.tv_nsec
+ .tv_sec = tk->wall_to_monotonic.tv_sec +
+ tk->total_sleep_time.tv_sec,
+ .tv_nsec = tk->wall_to_monotonic.tv_nsec +
+ tk->total_sleep_time.tv_nsec
};
set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec);
}
EXPORT_SYMBOL_GPL(getboottime);
-
/**
* get_monotonic_boottime - Returns monotonic time since boot
* @ts: pointer to the timespec to be set
@@ -1189,19 +1215,20 @@ EXPORT_SYMBOL_GPL(getboottime);
*/
void get_monotonic_boottime(struct timespec *ts)
{
+ struct timekeeper *tk = &timekeeper;
struct timespec tomono, sleep;
unsigned int seq;
WARN_ON(timekeeping_suspended);
do {
- seq = read_seqbegin(&timekeeper.lock);
- ts->tv_sec = timekeeper.xtime_sec;
- ts->tv_nsec = timekeeping_get_ns(&timekeeper);
- tomono = timekeeper.wall_to_monotonic;
- sleep = timekeeper.total_sleep_time;
+ seq = read_seqbegin(&tk->lock);
+ ts->tv_sec = tk->xtime_sec;
+ ts->tv_nsec = timekeeping_get_ns(tk);
+ tomono = tk->wall_to_monotonic;
+ sleep = tk->total_sleep_time;
- } while (read_seqretry(&timekeeper.lock, seq));
+ } while (read_seqretry(&tk->lock, seq));
set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec + sleep.tv_sec,
ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec);
@@ -1231,31 +1258,38 @@ EXPORT_SYMBOL_GPL(ktime_get_boottime);
*/
void monotonic_to_bootbased(struct timespec *ts)
{
- *ts = timespec_add(*ts, timekeeper.total_sleep_time);
+ struct timekeeper *tk = &timekeeper;
+
+ *ts = timespec_add(*ts, tk->total_sleep_time);
}
EXPORT_SYMBOL_GPL(monotonic_to_bootbased);
unsigned long get_seconds(void)
{
- return timekeeper.xtime_sec;
+ struct timekeeper *tk = &timekeeper;
+
+ return tk->xtime_sec;
}
EXPORT_SYMBOL(get_seconds);
struct timespec __current_kernel_time(void)
{
- return tk_xtime(&timekeeper);
+ struct timekeeper *tk = &timekeeper;
+
+ return tk_xtime(tk);
}
struct timespec current_kernel_time(void)
{
+ struct timekeeper *tk = &timekeeper;
struct timespec now;
unsigned long seq;
do {
- seq = read_seqbegin(&timekeeper.lock);
+ seq = read_seqbegin(&tk->lock);
- now = tk_xtime(&timekeeper);
- } while (read_seqretry(&timekeeper.lock, seq));
+ now = tk_xtime(tk);
+ } while (read_seqretry(&tk->lock, seq));
return now;
}
@@ -1263,15 +1297,16 @@ EXPORT_SYMBOL(current_kernel_time);
struct timespec get_monotonic_coarse(void)
{
+ struct timekeeper *tk = &timekeeper;
struct timespec now, mono;
unsigned long seq;
do {
- seq = read_seqbegin(&timekeeper.lock);
+ seq = read_seqbegin(&tk->lock);
- now = tk_xtime(&timekeeper);
- mono = timekeeper.wall_to_monotonic;
- } while (read_seqretry(&timekeeper.lock, seq));
+ now = tk_xtime(tk);
+ mono = tk->wall_to_monotonic;
+ } while (read_seqretry(&tk->lock, seq));
set_normalized_timespec(&now, now.tv_sec + mono.tv_sec,
now.tv_nsec + mono.tv_nsec);
@@ -1300,14 +1335,15 @@ void do_timer(unsigned long ticks)
void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
struct timespec *wtom, struct timespec *sleep)
{
+ struct timekeeper *tk = &timekeeper;
unsigned long seq;
do {
- seq = read_seqbegin(&timekeeper.lock);
- *xtim = tk_xtime(&timekeeper);
- *wtom = timekeeper.wall_to_monotonic;
- *sleep = timekeeper.total_sleep_time;
- } while (read_seqretry(&timekeeper.lock, seq));
+ seq = read_seqbegin(&tk->lock);
+ *xtim = tk_xtime(tk);
+ *wtom = tk->wall_to_monotonic;
+ *sleep = tk->total_sleep_time;
+ } while (read_seqretry(&tk->lock, seq));
}
#ifdef CONFIG_HIGH_RES_TIMERS
@@ -1321,19 +1357,20 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
*/
ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot)
{
+ struct timekeeper *tk = &timekeeper;
ktime_t now;
unsigned int seq;
u64 secs, nsecs;
do {
- seq = read_seqbegin(&timekeeper.lock);
+ seq = read_seqbegin(&tk->lock);
- secs = timekeeper.xtime_sec;
- nsecs = timekeeping_get_ns(&timekeeper);
+ secs = tk->xtime_sec;
+ nsecs = timekeeping_get_ns(tk);
- *offs_real = timekeeper.offs_real;
- *offs_boot = timekeeper.offs_boot;
- } while (read_seqretry(&timekeeper.lock, seq));
+ *offs_real = tk->offs_real;
+ *offs_boot = tk->offs_boot;
+ } while (read_seqretry(&tk->lock, seq));
now = ktime_add_ns(ktime_set(secs, 0), nsecs);
now = ktime_sub(now, *offs_real);
@@ -1346,19 +1383,19 @@ ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot)
*/
ktime_t ktime_get_monotonic_offset(void)
{
+ struct timekeeper *tk = &timekeeper;
unsigned long seq;
struct timespec wtom;
do {
- seq = read_seqbegin(&timekeeper.lock);
- wtom = timekeeper.wall_to_monotonic;
- } while (read_seqretry(&timekeeper.lock, seq));
+ seq = read_seqbegin(&tk->lock);
+ wtom = tk->wall_to_monotonic;
+ } while (read_seqretry(&tk->lock, seq));
return timespec_to_ktime(wtom);
}
EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset);
-
/**
* xtime_update() - advances the timekeeping infrastructure
* @ticks: number of ticks, that have elapsed since the last call.
diff --git a/kernel/timer.c b/kernel/timer.c
index a61c09374eb..8c5e7b908c6 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1407,13 +1407,6 @@ SYSCALL_DEFINE1(alarm, unsigned int, seconds)
#endif
-#ifndef __alpha__
-
-/*
- * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this
- * should be moved into arch/i386 instead?
- */
-
/**
* sys_getpid - return the thread group id of the current process
*
@@ -1469,8 +1462,6 @@ SYSCALL_DEFINE0(getegid)
return from_kgid_munged(current_user_ns(), current_egid());
}
-#endif
-
static void process_timeout(unsigned long __data)
{
wake_up_process((struct task_struct *)__data);
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index fee3752ae8f..8a6d2ee2086 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -281,7 +281,7 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip)
head = this_cpu_ptr(event_function.perf_events);
perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0,
- 1, &regs, head);
+ 1, &regs, head, NULL);
#undef ENTRY_SIZE
}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index b31d3d5699f..1a2117043bb 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1002,7 +1002,8 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
head = this_cpu_ptr(call->perf_events);
- perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
+ perf_trace_buf_submit(entry, size, rctx,
+ entry->ip, 1, regs, head, NULL);
}
/* Kretprobe profile handler */
@@ -1033,7 +1034,8 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
head = this_cpu_ptr(call->perf_events);
- perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
+ perf_trace_buf_submit(entry, size, rctx,
+ entry->ret_ip, 1, regs, head, NULL);
}
#endif /* CONFIG_PERF_EVENTS */
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 96fc7336909..60e4d787567 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -532,7 +532,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
(unsigned long *)&rec->args);
head = this_cpu_ptr(sys_data->enter_event->perf_events);
- perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
+ perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
}
int perf_sysenter_enable(struct ftrace_event_call *call)
@@ -608,7 +608,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
rec->ret = syscall_get_return_value(current, regs);
head = this_cpu_ptr(sys_data->exit_event->perf_events);
- perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
+ perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
}
int perf_sysexit_enable(struct ftrace_event_call *call)
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 2b36ac68549..03003cd7dd9 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -670,7 +670,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
head = this_cpu_ptr(call->perf_events);
- perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
+ perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head, NULL);
out:
preempt_enable();