summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile3
-rw-r--r--kernel/async.c2
-rw-r--r--kernel/audit.c4
-rw-r--r--kernel/capability.c80
-rw-r--r--kernel/debug/kdb/kdb_main.c2
-rw-r--r--kernel/exit.c6
-rw-r--r--kernel/fork.c8
-rw-r--r--kernel/irq/internals.h2
-rw-r--r--kernel/irq/spurious.c2
-rw-r--r--kernel/kexec.c25
-rw-r--r--kernel/kprobes.c2
-rw-r--r--kernel/module.c205
-rw-r--r--kernel/panic.c26
-rw-r--r--kernel/params.c38
-rw-r--r--kernel/pid.c4
-rw-r--r--kernel/pid_namespace.c31
-rw-r--r--kernel/power/swap.c13
-rw-r--r--kernel/printk.c10
-rw-r--r--kernel/ptrace.c14
-rw-r--r--kernel/sched/core.c9
-rw-r--r--kernel/sched/fair.c10
-rw-r--r--kernel/sys.c121
-rw-r--r--kernel/trace/ftrace.c715
-rw-r--r--kernel/trace/trace_events_filter.c283
-rw-r--r--kernel/trace/trace_stack.c30
25 files changed, 1109 insertions, 536 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index f70396e5a24..2d9de86b7e7 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -23,6 +23,7 @@ CFLAGS_REMOVE_irq_work.o = -pg
endif
obj-y += sched/
+obj-y += power/
obj-$(CONFIG_FREEZER) += freezer.o
obj-$(CONFIG_PROFILING) += profile.o
@@ -52,8 +53,6 @@ obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
obj-$(CONFIG_UID16) += uid16.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_KALLSYMS) += kallsyms.o
-obj-$(CONFIG_PM) += power/
-obj-$(CONFIG_FREEZER) += power/
obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
obj-$(CONFIG_KEXEC) += kexec.o
obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
diff --git a/kernel/async.c b/kernel/async.c
index 80b74b88fef..bd0c168a3bb 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -78,8 +78,6 @@ static DECLARE_WAIT_QUEUE_HEAD(async_done);
static atomic_t entry_count;
-extern int initcall_debug;
-
/*
* MUST be called with the lock held!
diff --git a/kernel/audit.c b/kernel/audit.c
index 705c25a70bf..bb0eb5bb9a0 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -601,13 +601,13 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
case AUDIT_TTY_SET:
case AUDIT_TRIM:
case AUDIT_MAKE_EQUIV:
- if (security_netlink_recv(skb, CAP_AUDIT_CONTROL))
+ if (!capable(CAP_AUDIT_CONTROL))
err = -EPERM;
break;
case AUDIT_USER:
case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG:
case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2:
- if (security_netlink_recv(skb, CAP_AUDIT_WRITE))
+ if (!capable(CAP_AUDIT_WRITE))
err = -EPERM;
break;
default: /* bad msg */
diff --git a/kernel/capability.c b/kernel/capability.c
index b463871a4e6..3f1adb6c647 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -287,74 +287,84 @@ error:
}
/**
- * has_capability - Does a task have a capability in init_user_ns
+ * has_ns_capability - Does a task have a capability in a specific user ns
* @t: The task in question
+ * @ns: target user namespace
* @cap: The capability to be tested for
*
* Return true if the specified task has the given superior capability
- * currently in effect to the initial user namespace, false if not.
+ * currently in effect to the specified user namespace, false if not.
*
* Note that this does not set PF_SUPERPRIV on the task.
*/
-bool has_capability(struct task_struct *t, int cap)
+bool has_ns_capability(struct task_struct *t,
+ struct user_namespace *ns, int cap)
{
- int ret = security_real_capable(t, &init_user_ns, cap);
+ int ret;
+
+ rcu_read_lock();
+ ret = security_capable(__task_cred(t), ns, cap);
+ rcu_read_unlock();
return (ret == 0);
}
/**
- * has_capability - Does a task have a capability in a specific user ns
+ * has_capability - Does a task have a capability in init_user_ns
* @t: The task in question
- * @ns: target user namespace
* @cap: The capability to be tested for
*
* Return true if the specified task has the given superior capability
- * currently in effect to the specified user namespace, false if not.
+ * currently in effect to the initial user namespace, false if not.
*
* Note that this does not set PF_SUPERPRIV on the task.
*/
-bool has_ns_capability(struct task_struct *t,
- struct user_namespace *ns, int cap)
+bool has_capability(struct task_struct *t, int cap)
{
- int ret = security_real_capable(t, ns, cap);
-
- return (ret == 0);
+ return has_ns_capability(t, &init_user_ns, cap);
}
/**
- * has_capability_noaudit - Does a task have a capability (unaudited)
+ * has_ns_capability_noaudit - Does a task have a capability (unaudited)
+ * in a specific user ns.
* @t: The task in question
+ * @ns: target user namespace
* @cap: The capability to be tested for
*
* Return true if the specified task has the given superior capability
- * currently in effect to init_user_ns, false if not. Don't write an
- * audit message for the check.
+ * currently in effect to the specified user namespace, false if not.
+ * Do not write an audit message for the check.
*
* Note that this does not set PF_SUPERPRIV on the task.
*/
-bool has_capability_noaudit(struct task_struct *t, int cap)
+bool has_ns_capability_noaudit(struct task_struct *t,
+ struct user_namespace *ns, int cap)
{
- int ret = security_real_capable_noaudit(t, &init_user_ns, cap);
+ int ret;
+
+ rcu_read_lock();
+ ret = security_capable_noaudit(__task_cred(t), ns, cap);
+ rcu_read_unlock();
return (ret == 0);
}
/**
- * capable - Determine if the current task has a superior capability in effect
+ * has_capability_noaudit - Does a task have a capability (unaudited) in the
+ * initial user ns
+ * @t: The task in question
* @cap: The capability to be tested for
*
- * Return true if the current task has the given superior capability currently
- * available for use, false if not.
+ * Return true if the specified task has the given superior capability
+ * currently in effect to init_user_ns, false if not. Don't write an
+ * audit message for the check.
*
- * This sets PF_SUPERPRIV on the task if the capability is available on the
- * assumption that it's about to be used.
+ * Note that this does not set PF_SUPERPRIV on the task.
*/
-bool capable(int cap)
+bool has_capability_noaudit(struct task_struct *t, int cap)
{
- return ns_capable(&init_user_ns, cap);
+ return has_ns_capability_noaudit(t, &init_user_ns, cap);
}
-EXPORT_SYMBOL(capable);
/**
* ns_capable - Determine if the current task has a superior capability in effect
@@ -374,7 +384,7 @@ bool ns_capable(struct user_namespace *ns, int cap)
BUG();
}
- if (security_capable(ns, current_cred(), cap) == 0) {
+ if (security_capable(current_cred(), ns, cap) == 0) {
current->flags |= PF_SUPERPRIV;
return true;
}
@@ -383,18 +393,20 @@ bool ns_capable(struct user_namespace *ns, int cap)
EXPORT_SYMBOL(ns_capable);
/**
- * task_ns_capable - Determine whether current task has a superior
- * capability targeted at a specific task's user namespace.
- * @t: The task whose user namespace is targeted.
- * @cap: The capability in question.
+ * capable - Determine if the current task has a superior capability in effect
+ * @cap: The capability to be tested for
+ *
+ * Return true if the current task has the given superior capability currently
+ * available for use, false if not.
*
- * Return true if it does, false otherwise.
+ * This sets PF_SUPERPRIV on the task if the capability is available on the
+ * assumption that it's about to be used.
*/
-bool task_ns_capable(struct task_struct *t, int cap)
+bool capable(int cap)
{
- return ns_capable(task_cred_xxx(t, user)->user_ns, cap);
+ return ns_capable(&init_user_ns, cap);
}
-EXPORT_SYMBOL(task_ns_capable);
+EXPORT_SYMBOL(capable);
/**
* nsown_capable - Check superior capability to one's own user_ns
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 63786e71a3c..e2ae7349437 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1982,7 +1982,7 @@ static int kdb_lsmod(int argc, const char **argv)
kdb_printf("%-20s%8u 0x%p ", mod->name,
mod->core_size, (void *)mod);
#ifdef CONFIG_MODULE_UNLOAD
- kdb_printf("%4d ", module_refcount(mod));
+ kdb_printf("%4ld ", module_refcount(mod));
#endif
if (mod->state == MODULE_STATE_GOING)
kdb_printf(" (Unloading)");
diff --git a/kernel/exit.c b/kernel/exit.c
index 88dcbbc446f..294b1709170 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -887,7 +887,7 @@ static void check_stack_usage(void)
static inline void check_stack_usage(void) {}
#endif
-NORET_TYPE void do_exit(long code)
+void do_exit(long code)
{
struct task_struct *tsk = current;
int group_dead;
@@ -1050,7 +1050,7 @@ NORET_TYPE void do_exit(long code)
EXPORT_SYMBOL_GPL(do_exit);
-NORET_TYPE void complete_and_exit(struct completion *comp, long code)
+void complete_and_exit(struct completion *comp, long code)
{
if (comp)
complete(comp);
@@ -1069,7 +1069,7 @@ SYSCALL_DEFINE1(exit, int, error_code)
* Take down every thread in the group. This is called by fatal signals
* as well as by sys_exit_group (below).
*/
-NORET_TYPE void
+void
do_group_exit(int exit_code)
{
struct signal_struct *sig = current->signal;
diff --git a/kernel/fork.c b/kernel/fork.c
index c1e5c21f48c..051f090d40c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -873,6 +873,7 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
{
#ifdef CONFIG_BLOCK
struct io_context *ioc = current->io_context;
+ struct io_context *new_ioc;
if (!ioc)
return 0;
@@ -884,11 +885,12 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
if (unlikely(!tsk->io_context))
return -ENOMEM;
} else if (ioprio_valid(ioc->ioprio)) {
- tsk->io_context = alloc_io_context(GFP_KERNEL, -1);
- if (unlikely(!tsk->io_context))
+ new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE);
+ if (unlikely(!new_ioc))
return -ENOMEM;
- tsk->io_context->ioprio = ioc->ioprio;
+ new_ioc->ioprio = ioc->ioprio;
+ put_io_context(new_ioc, NULL);
}
#endif
return 0;
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index a73dd6c7372..b7952316016 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -15,7 +15,7 @@
#define istate core_internal_state__do_not_mess_with_it
-extern int noirqdebug;
+extern bool noirqdebug;
/*
* Bits used by threaded handlers:
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index dc813a948be..611cd6003c4 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -325,7 +325,7 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
desc->irqs_unhandled = 0;
}
-int noirqdebug __read_mostly;
+bool noirqdebug __read_mostly;
int noirqdebug_setup(char *str)
{
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 090ee10d960..7b088678670 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -32,7 +32,6 @@
#include <linux/console.h>
#include <linux/vmalloc.h>
#include <linux/swap.h>
-#include <linux/kmsg_dump.h>
#include <linux/syscore_ops.h>
#include <asm/page.h>
@@ -1094,8 +1093,6 @@ void crash_kexec(struct pt_regs *regs)
if (kexec_crash_image) {
struct pt_regs fixed_regs;
- kmsg_dump(KMSG_DUMP_KEXEC);
-
crash_setup_regs(&fixed_regs, regs);
crash_save_vmcoreinfo();
machine_crash_shutdown(&fixed_regs);
@@ -1132,6 +1129,8 @@ int crash_shrink_memory(unsigned long new_size)
{
int ret = 0;
unsigned long start, end;
+ unsigned long old_size;
+ struct resource *ram_res;
mutex_lock(&kexec_mutex);
@@ -1141,11 +1140,15 @@ int crash_shrink_memory(unsigned long new_size)
}
start = crashk_res.start;
end = crashk_res.end;
+ old_size = (end == 0) ? 0 : end - start + 1;
+ if (new_size >= old_size) {
+ ret = (new_size == old_size) ? 0 : -EINVAL;
+ goto unlock;
+ }
- if (new_size >= end - start + 1) {
- ret = -EINVAL;
- if (new_size == end - start + 1)
- ret = 0;
+ ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL);
+ if (!ram_res) {
+ ret = -ENOMEM;
goto unlock;
}
@@ -1157,7 +1160,15 @@ int crash_shrink_memory(unsigned long new_size)
if ((start == end) && (crashk_res.parent != NULL))
release_resource(&crashk_res);
+
+ ram_res->start = end;
+ ram_res->end = crashk_res.end;
+ ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+ ram_res->name = "System RAM";
+
crashk_res.end = end - 1;
+
+ insert_resource(&iomem_resource, ram_res);
crash_unmap_reserved_pages();
unlock:
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index e5d84644823..95dd7212e61 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -2198,7 +2198,7 @@ static ssize_t write_enabled_file_bool(struct file *file,
const char __user *user_buf, size_t count, loff_t *ppos)
{
char buf[32];
- int buf_size;
+ size_t buf_size;
buf_size = min(count, (sizeof(buf)-1));
if (copy_from_user(buf, user_buf, buf_size))
diff --git a/kernel/module.c b/kernel/module.c
index 178333c48d1..2c932760fd3 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -62,12 +62,6 @@
#define CREATE_TRACE_POINTS
#include <trace/events/module.h>
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(fmt , a...)
-#endif
-
#ifndef ARCH_SHF_SMALL
#define ARCH_SHF_SMALL 0
#endif
@@ -138,7 +132,6 @@ struct load_info {
unsigned long len;
Elf_Shdr *sechdrs;
char *secstrings, *strtab;
- unsigned long *strmap;
unsigned long symoffs, stroffs;
struct _ddebug *debug;
unsigned int num_debug;
@@ -410,7 +403,7 @@ const struct kernel_symbol *find_symbol(const char *name,
return fsa.sym;
}
- DEBUGP("Failed to find symbol %s\n", name);
+ pr_debug("Failed to find symbol %s\n", name);
return NULL;
}
EXPORT_SYMBOL_GPL(find_symbol);
@@ -600,11 +593,11 @@ static int already_uses(struct module *a, struct module *b)
list_for_each_entry(use, &b->source_list, source_list) {
if (use->source == a) {
- DEBUGP("%s uses %s!\n", a->name, b->name);
+ pr_debug("%s uses %s!\n", a->name, b->name);
return 1;
}
}
- DEBUGP("%s does not use %s!\n", a->name, b->name);
+ pr_debug("%s does not use %s!\n", a->name, b->name);
return 0;
}
@@ -619,7 +612,7 @@ static int add_module_usage(struct module *a, struct module *b)
{
struct module_use *use;
- DEBUGP("Allocating new usage for %s.\n", a->name);
+ pr_debug("Allocating new usage for %s.\n", a->name);
use = kmalloc(sizeof(*use), GFP_ATOMIC);
if (!use) {
printk(KERN_WARNING "%s: out of memory loading\n", a->name);
@@ -663,7 +656,7 @@ static void module_unload_free(struct module *mod)
mutex_lock(&module_mutex);
list_for_each_entry_safe(use, tmp, &mod->target_list, target_list) {
struct module *i = use->target;
- DEBUGP("%s unusing %s\n", mod->name, i->name);
+ pr_debug("%s unusing %s\n", mod->name, i->name);
module_put(i);
list_del(&use->source_list);
list_del(&use->target_list);
@@ -726,9 +719,9 @@ static int try_stop_module(struct module *mod, int flags, int *forced)
}
}
-unsigned int module_refcount(struct module *mod)
+unsigned long module_refcount(struct module *mod)
{
- unsigned int incs = 0, decs = 0;
+ unsigned long incs = 0, decs = 0;
int cpu;
for_each_possible_cpu(cpu)
@@ -761,7 +754,7 @@ static void wait_for_zero_refcount(struct module *mod)
/* Since we might sleep for some time, release the mutex first */
mutex_unlock(&module_mutex);
for (;;) {
- DEBUGP("Looking at refcount...\n");
+ pr_debug("Looking at refcount...\n");
set_current_state(TASK_UNINTERRUPTIBLE);
if (module_refcount(mod) == 0)
break;
@@ -804,7 +797,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
if (mod->state != MODULE_STATE_LIVE) {
/* FIXME: if (force), slam module count and wake up
waiter --RR */
- DEBUGP("%s already dying\n", mod->name);
+ pr_debug("%s already dying\n", mod->name);
ret = -EBUSY;
goto out;
}
@@ -854,7 +847,7 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod)
struct module_use *use;
int printed_something = 0;
- seq_printf(m, " %u ", module_refcount(mod));
+ seq_printf(m, " %lu ", module_refcount(mod));
/* Always include a trailing , so userspace can differentiate
between this and the old multi-field proc format. */
@@ -904,13 +897,11 @@ EXPORT_SYMBOL_GPL(symbol_put_addr);
static ssize_t show_refcnt(struct module_attribute *mattr,
struct module_kobject *mk, char *buffer)
{
- return sprintf(buffer, "%u\n", module_refcount(mk->mod));
+ return sprintf(buffer, "%lu\n", module_refcount(mk->mod));
}
-static struct module_attribute refcnt = {
- .attr = { .name = "refcnt", .mode = 0444 },
- .show = show_refcnt,
-};
+static struct module_attribute modinfo_refcnt =
+ __ATTR(refcnt, 0444, show_refcnt, NULL);
void module_put(struct module *module)
{
@@ -951,6 +942,26 @@ static inline int module_unload_init(struct module *mod)
}
#endif /* CONFIG_MODULE_UNLOAD */
+static size_t module_flags_taint(struct module *mod, char *buf)
+{
+ size_t l = 0;
+
+ if (mod->taints & (1 << TAINT_PROPRIETARY_MODULE))
+ buf[l++] = 'P';
+ if (mod->taints & (1 << TAINT_OOT_MODULE))
+ buf[l++] = 'O';
+ if (mod->taints & (1 << TAINT_FORCED_MODULE))
+ buf[l++] = 'F';
+ if (mod->taints & (1 << TAINT_CRAP))
+ buf[l++] = 'C';
+ /*
+ * TAINT_FORCED_RMMOD: could be added.
+ * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't
+ * apply to modules.
+ */
+ return l;
+}
+
static ssize_t show_initstate(struct module_attribute *mattr,
struct module_kobject *mk, char *buffer)
{
@@ -970,10 +981,8 @@ static ssize_t show_initstate(struct module_attribute *mattr,
return sprintf(buffer, "%s\n", state);
}
-static struct module_attribute initstate = {
- .attr = { .name = "initstate", .mode = 0444 },
- .show = show_initstate,
-};
+static struct module_attribute modinfo_initstate =
+ __ATTR(initstate, 0444, show_initstate, NULL);
static ssize_t store_uevent(struct module_attribute *mattr,
struct module_kobject *mk,
@@ -986,18 +995,50 @@ static ssize_t store_uevent(struct module_attribute *mattr,
return count;
}
-struct module_attribute module_uevent = {
- .attr = { .name = "uevent", .mode = 0200 },
- .store = store_uevent,
-};
+struct module_attribute module_uevent =
+ __ATTR(uevent, 0200, NULL, store_uevent);
+
+static ssize_t show_coresize(struct module_attribute *mattr,
+ struct module_kobject *mk, char *buffer)
+{
+ return sprintf(buffer, "%u\n", mk->mod->core_size);
+}
+
+static struct module_attribute modinfo_coresize =
+ __ATTR(coresize, 0444, show_coresize, NULL);
+
+static ssize_t show_initsize(struct module_attribute *mattr,
+ struct module_kobject *mk, char *buffer)
+{
+ return sprintf(buffer, "%u\n", mk->mod->init_size);
+}
+
+static struct module_attribute modinfo_initsize =
+ __ATTR(initsize, 0444, show_initsize, NULL);
+
+static ssize_t show_taint(struct module_attribute *mattr,
+ struct module_kobject *mk, char *buffer)
+{
+ size_t l;
+
+ l = module_flags_taint(mk->mod, buffer);
+ buffer[l++] = '\n';
+ return l;
+}
+
+static struct module_attribute modinfo_taint =
+ __ATTR(taint, 0444, show_taint, NULL);
static struct module_attribute *modinfo_attrs[] = {
+ &module_uevent,
&modinfo_version,
&modinfo_srcversion,
- &initstate,
- &module_uevent,
+ &modinfo_initstate,
+ &modinfo_coresize,
+ &modinfo_initsize,
+ &modinfo_taint,
#ifdef CONFIG_MODULE_UNLOAD
- &refcnt,
+ &modinfo_refcnt,
#endif
NULL,
};
@@ -1057,7 +1098,7 @@ static int check_version(Elf_Shdr *sechdrs,
if (versions[i].crc == maybe_relocated(*crc, crc_owner))
return 1;
- DEBUGP("Found checksum %lX vs module %lX\n",
+ pr_debug("Found checksum %lX vs module %lX\n",
maybe_relocated(*crc, crc_owner), versions[i].crc);
goto bad_version;
}
@@ -1834,7 +1875,7 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
case SHN_COMMON:
/* We compiled with -fno-common. These are not
supposed to happen. */
- DEBUGP("Common symbol: %s\n", name);
+ pr_debug("Common symbol: %s\n", name);
printk("%s: please compile with -fno-common\n",
mod->name);
ret = -ENOEXEC;
@@ -1842,7 +1883,7 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
case SHN_ABS:
/* Don't need to do anything */
- DEBUGP("Absolute symbol: 0x%08lx\n",
+ pr_debug("Absolute symbol: 0x%08lx\n",
(long)sym[i].st_value);
break;
@@ -1966,7 +2007,7 @@ static void layout_sections(struct module *mod, struct load_info *info)
for (i = 0; i < info->hdr->e_shnum; i++)
info->sechdrs[i].sh_entsize = ~0UL;
- DEBUGP("Core section allocation order:\n");
+ pr_debug("Core section allocation order:\n");
for (m = 0; m < ARRAY_SIZE(masks); ++m) {
for (i = 0; i < info->hdr->e_shnum; ++i) {
Elf_Shdr *s = &info->sechdrs[i];
@@ -1978,7 +2019,7 @@ static void layout_sections(struct module *mod, struct load_info *info)
|| strstarts(sname, ".init"))
continue;
s->sh_entsize = get_offset(mod, &mod->core_size, s, i);
- DEBUGP("\t%s\n", name);
+ pr_debug("\t%s\n", sname);
}
switch (m) {
case 0: /* executable */
@@ -1995,7 +2036,7 @@ static void layout_sections(struct module *mod, struct load_info *info)
}
}
- DEBUGP("Init section allocation order:\n");
+ pr_debug("Init section allocation order:\n");
for (m = 0; m < ARRAY_SIZE(masks); ++m) {
for (i = 0; i < info->hdr->e_shnum; ++i) {
Elf_Shdr *s = &info->sechdrs[i];
@@ -2008,7 +2049,7 @@ static void layout_sections(struct module *mod, struct load_info *info)
continue;
s->sh_entsize = (get_offset(mod, &mod->init_size, s, i)
| INIT_OFFSET_MASK);
- DEBUGP("\t%s\n", sname);
+ pr_debug("\t%s\n", sname);
}
switch (m) {
case 0: /* executable */
@@ -2178,45 +2219,46 @@ static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs,
return true;
}
+/*
+ * We only allocate and copy the strings needed by the parts of symtab
+ * we keep. This is simple, but has the effect of making multiple
+ * copies of duplicates. We could be more sophisticated, see
+ * linux-kernel thread starting with
+ * <73defb5e4bca04a6431392cc341112b1@localhost>.
+ */
static void layout_symtab(struct module *mod, struct load_info *info)
{
Elf_Shdr *symsect = info->sechdrs + info->index.sym;
Elf_Shdr *strsect = info->sechdrs + info->index.str;
const Elf_Sym *src;
- unsigned int i, nsrc, ndst;
+ unsigned int i, nsrc, ndst, strtab_size;
/* Put symbol section at end of init part of module. */
symsect->sh_flags |= SHF_ALLOC;
symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect,
info->index.sym) | INIT_OFFSET_MASK;
- DEBUGP("\t%s\n", info->secstrings + symsect->sh_name);
+ pr_debug("\t%s\n", info->secstrings + symsect->sh_name);
src = (void *)info->hdr + symsect->sh_offset;
nsrc = symsect->sh_size / sizeof(*src);
- for (ndst = i = 1; i < nsrc; ++i, ++src)
- if (is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) {
- unsigned int j = src->st_name;
- while (!__test_and_set_bit(j, info->strmap)
- && info->strtab[j])
- ++j;
- ++ndst;
+ /* Compute total space required for the core symbols' strtab. */
+ for (ndst = i = strtab_size = 1; i < nsrc; ++i, ++src)
+ if (is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) {
+ strtab_size += strlen(&info->strtab[src->st_name]) + 1;
+ ndst++;
}
/* Append room for core symbols at end of core part. */
info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
- mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym);
+ info->stroffs = mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym);
+ mod->core_size += strtab_size;
/* Put string table section at end of init part of module. */
strsect->sh_flags |= SHF_ALLOC;
strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect,
info->index.str) | INIT_OFFSET_MASK;
- DEBUGP("\t%s\n", info->secstrings + strsect->sh_name);
-
- /* Append room for core symbols' strings at end of core part. */
- info->stroffs = mod->core_size;
- __set_bit(0, info->strmap);
- mod->core_size += bitmap_weight(info->strmap, strsect->sh_size);
+ pr_debug("\t%s\n", info->secstrings + strsect->sh_name);
}
static void add_kallsyms(struct module *mod, const struct load_info *info)
@@ -2237,22 +2279,19 @@ static void add_kallsyms(struct module *mod, const struct load_info *info)
mod->symtab[i].st_info = elf_type(&mod->symtab[i], info);
mod->core_symtab = dst = mod->module_core + info->symoffs;
+ mod->core_strtab = s = mod->module_core + info->stroffs;
src = mod->symtab;
*dst = *src;
+ *s++ = 0;
for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) {
if (!is_core_symbol(src, info->sechdrs, info->hdr->e_shnum))
continue;
+
dst[ndst] = *src;
- dst[ndst].st_name = bitmap_weight(info->strmap,
- dst[ndst].st_name);
- ++ndst;
+ dst[ndst++].st_name = s - mod->core_strtab;
+ s += strlcpy(s, &mod->strtab[src->st_name], KSYM_NAME_LEN) + 1;
}
mod->core_num_syms = ndst;
-
- mod->core_strtab = s = mod->module_core + info->stroffs;
- for (*s = 0, i = 1; i < info->sechdrs[info->index.str].sh_size; ++i)
- if (test_bit(i, info->strmap))
- *++s = mod->strtab[i];
}
#else
static inline void layout_symtab(struct module *mod, struct load_info *info)
@@ -2621,7 +2660,7 @@ static int move_module(struct module *mod, struct load_info *info)
mod->module_init = ptr;
/* Transfer each section which specifies SHF_ALLOC */
- DEBUGP("final section addresses:\n");
+ pr_debug("final section addresses:\n");
for (i = 0; i < info->hdr->e_shnum; i++) {
void *dest;
Elf_Shdr *shdr = &info->sechdrs[i];
@@ -2639,8 +2678,8 @@ static int move_module(struct module *mod, struct load_info *info)
memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size);
/* Update sh_addr to point to copy in image. */
shdr->sh_addr = (unsigned long)dest;
- DEBUGP("\t0x%lx %s\n",
- shdr->sh_addr, info->secstrings + shdr->sh_name);
+ pr_debug("\t0x%lx %s\n",
+ (long)shdr->sh_addr, info->secstrings + shdr->sh_name);
}
return 0;
@@ -2742,27 +2781,18 @@ static struct module *layout_and_allocate(struct load_info *info)
this is done generically; there doesn't appear to be any
special cases for the architectures. */
layout_sections(mod, info);
-
- info->strmap = kzalloc(BITS_TO_LONGS(info->sechdrs[info->index.str].sh_size)
- * sizeof(long), GFP_KERNEL);
- if (!info->strmap) {
- err = -ENOMEM;
- goto free_percpu;
- }
layout_symtab(mod, info);
/* Allocate and move to the final place */
err = move_module(mod, info);
if (err)
- goto free_strmap;
+ goto free_percpu;
/* Module has been copied to its final place now: return it. */
mod = (void *)info->sechdrs[info->index.mod].sh_addr;
kmemleak_load_module(mod, info);
return mod;
-free_strmap:
- kfree(info->strmap);
free_percpu:
percpu_modfree(mod);
out:
@@ -2772,7 +2802,6 @@ out:
/* mod is no longer valid after this! */
static void module_deallocate(struct module *mod, struct load_info *info)
{
- kfree(info->strmap);
percpu_modfree(mod);
module_free(mod, mod->module_init);
module_free(mod, mod->module_core);
@@ -2811,7 +2840,7 @@ static struct module *load_module(void __user *umod,
struct module *mod;
long err;
- DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
+ pr_debug("load_module: umod=%p, len=%lu, uargs=%p\n",
umod, len, uargs);
/* Copy in the blobs from userspace, check they are vaguely sane. */
@@ -2902,8 +2931,7 @@ static struct module *load_module(void __user *umod,
if (err < 0)
goto unlink;
- /* Get rid of temporary copy and strmap. */
- kfree(info.strmap);
+ /* Get rid of temporary copy. */
free_copy(&info);
/* Done! */
@@ -3256,20 +3284,7 @@ static char *module_flags(struct module *mod, char *buf)
mod->state == MODULE_STATE_GOING ||
mod->state == MODULE_STATE_COMING) {
buf[bx++] = '(';
- if (mod->taints & (1 << TAINT_PROPRIETARY_MODULE))
- buf[bx++] = 'P';
- else if (mod->taints & (1 << TAINT_OOT_MODULE))
- buf[bx++] = 'O';
- if (mod->taints & (1 << TAINT_FORCED_MODULE))
- buf[bx++] = 'F';
- if (mod->taints & (1 << TAINT_CRAP))
- buf[bx++] = 'C';
- /*
- * TAINT_FORCED_RMMOD: could be added.
- * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't
- * apply to modules.
- */
-
+ bx += module_flags_taint(mod, buf + bx);
/* Show a - for module-is-being-unloaded */
if (mod->state == MODULE_STATE_GOING)
buf[bx++] = '-';
diff --git a/kernel/panic.c b/kernel/panic.c
index 3458469eb7c..80aed44e345 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -49,6 +49,15 @@ static long no_blink(int state)
long (*panic_blink)(int state);
EXPORT_SYMBOL(panic_blink);
+/*
+ * Stop ourself in panic -- architecture code may override this
+ */
+void __weak panic_smp_self_stop(void)
+{
+ while (1)
+ cpu_relax();
+}
+
/**
* panic - halt the system
* @fmt: The text string to print
@@ -57,8 +66,9 @@ EXPORT_SYMBOL(panic_blink);
*
* This function never returns.
*/
-NORET_TYPE void panic(const char * fmt, ...)
+void panic(const char *fmt, ...)
{
+ static DEFINE_SPINLOCK(panic_lock);
static char buf[1024];
va_list args;
long i, i_next = 0;
@@ -68,8 +78,14 @@ NORET_TYPE void panic(const char * fmt, ...)
* It's possible to come here directly from a panic-assertion and
* not have preempt disabled. Some functions called from here want
* preempt to be disabled. No point enabling it later though...
+ *
+ * Only one CPU is allowed to execute the panic code from here. For
+ * multiple parallel invocations of panic, all other CPUs either
+ * stop themself or will wait until they are stopped by the 1st CPU
+ * with smp_send_stop().
*/
- preempt_disable();
+ if (!spin_trylock(&panic_lock))
+ panic_smp_self_stop();
console_verbose();
bust_spinlocks(1);
@@ -78,7 +94,11 @@ NORET_TYPE void panic(const char * fmt, ...)
va_end(args);
printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf);
#ifdef CONFIG_DEBUG_BUGVERBOSE
- dump_stack();
+ /*
+ * Avoid nested stack-dumping if a panic occurs during oops processing
+ */
+ if (!oops_in_progress)
+ dump_stack();
#endif
/*
diff --git a/kernel/params.c b/kernel/params.c
index 65aae11eb93..32ee0430828 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -25,12 +25,6 @@
#include <linux/slab.h>
#include <linux/ctype.h>
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(fmt, a...)
-#endif
-
/* Protects all parameters, and incidentally kmalloced_param list. */
static DEFINE_MUTEX(param_lock);
@@ -105,7 +99,7 @@ static int parse_one(char *param,
/* No one handled NULL, so do it here. */
if (!val && params[i].ops->set != param_set_bool)
return -EINVAL;
- DEBUGP("They are equal! Calling %p\n",
+ pr_debug("They are equal! Calling %p\n",
params[i].ops->set);
mutex_lock(&param_lock);
err = params[i].ops->set(val, &params[i]);
@@ -115,11 +109,11 @@ static int parse_one(char *param,
}
if (handle_unknown) {
- DEBUGP("Unknown argument: calling %p\n", handle_unknown);
+ pr_debug("Unknown argument: calling %p\n", handle_unknown);
return handle_unknown(param, val);
}
- DEBUGP("Unknown argument `%s'\n", param);
+ pr_debug("Unknown argument `%s'\n", param);
return -ENOENT;
}
@@ -184,7 +178,7 @@ int parse_args(const char *name,
{
char *param, *val;
- DEBUGP("Parsing ARGS: %s\n", args);
+ pr_debug("Parsing ARGS: %s\n", args);
/* Chew leading spaces */
args = skip_spaces(args);
@@ -369,6 +363,30 @@ struct kernel_param_ops param_ops_invbool = {
};
EXPORT_SYMBOL(param_ops_invbool);
+int param_set_bint(const char *val, const struct kernel_param *kp)
+{
+ struct kernel_param boolkp;
+ bool v;
+ int ret;
+
+ /* Match bool exactly, by re-using it. */
+ boolkp = *kp;
+ boolkp.arg = &v;
+ boolkp.flags |= KPARAM_ISBOOL;
+
+ ret = param_set_bool(val, &boolkp);
+ if (ret == 0)
+ *(int *)kp->arg = v;
+ return ret;
+}
+EXPORT_SYMBOL(param_set_bint);
+
+struct kernel_param_ops param_ops_bint = {
+ .set = param_set_bint,
+ .get = param_get_int,
+};
+EXPORT_SYMBOL(param_ops_bint);
+
/* We break the rule and mangle the string. */
static int param_array(const char *name,
const char *val,
diff --git a/kernel/pid.c b/kernel/pid.c
index fa5f72227e5..ce8e00deacc 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -137,7 +137,9 @@ static int pid_before(int base, int a, int b)
}
/*
- * We might be racing with someone else trying to set pid_ns->last_pid.
+ * We might be racing with someone else trying to set pid_ns->last_pid
+ * at the pid allocation time (there's also a sysctl for this, but racing
+ * with this one is OK, see comment in kernel/pid_namespace.c about it).
* We want the winner to have the "later" value, because if the
* "earlier" value prevails, then a pid may get reused immediately.
*
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index e9c9adc84ca..a8968396046 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -191,9 +191,40 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
return;
}
+static int pid_ns_ctl_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct ctl_table tmp = *table;
+
+ if (write && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ /*
+ * Writing directly to ns' last_pid field is OK, since this field
+ * is volatile in a living namespace anyway and a code writing to
+ * it should synchronize its usage with external means.
+ */
+
+ tmp.data = &current->nsproxy->pid_ns->last_pid;
+ return proc_dointvec(&tmp, write, buffer, lenp, ppos);
+}
+
+static struct ctl_table pid_ns_ctl_table[] = {
+ {
+ .procname = "ns_last_pid",
+ .maxlen = sizeof(int),
+ .mode = 0666, /* permissions are checked in the handler */
+ .proc_handler = pid_ns_ctl_handler,
+ },
+ { }
+};
+
+static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } };
+
static __init int pid_namespaces_init(void)
{
pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
+ register_sysctl_paths(kern_path, pid_ns_ctl_table);
return 0;
}
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 3739ecced08..8742fd013a9 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -773,8 +773,7 @@ static int enough_swap(unsigned int nr_pages, unsigned int flags)
pr_debug("PM: Free swap pages: %u\n", free_swap);
- required = PAGES_FOR_IO + ((flags & SF_NOCOMPRESS_MODE) ?
- nr_pages : (nr_pages * LZO_CMP_PAGES) / LZO_UNC_PAGES + 1);
+ required = PAGES_FOR_IO + nr_pages;
return free_swap > required;
}
@@ -802,10 +801,12 @@ int swsusp_write(unsigned int flags)
printk(KERN_ERR "PM: Cannot get swap writer\n");
return error;
}
- if (!enough_swap(pages, flags)) {
- printk(KERN_ERR "PM: Not enough free swap\n");
- error = -ENOSPC;
- goto out_finish;
+ if (flags & SF_NOCOMPRESS_MODE) {
+ if (!enough_swap(pages, flags)) {
+ printk(KERN_ERR "PM: Not enough free swap\n");
+ error = -ENOSPC;
+ goto out_finish;
+ }
}
memset(&snapshot, 0, sizeof(struct snapshot_handle));
error = snapshot_read_next(&snapshot);
diff --git a/kernel/printk.c b/kernel/printk.c
index 989e4a52da7..13c0a1143f4 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -521,7 +521,7 @@ static void __call_console_drivers(unsigned start, unsigned end)
}
}
-static int __read_mostly ignore_loglevel;
+static bool __read_mostly ignore_loglevel;
static int __init ignore_loglevel_setup(char *str)
{
@@ -532,7 +532,7 @@ static int __init ignore_loglevel_setup(char *str)
}
early_param("ignore_loglevel", ignore_loglevel_setup);
-module_param_named(ignore_loglevel, ignore_loglevel, bool, S_IRUGO | S_IWUSR);
+module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to"
"print all kernel messages to the console.");
@@ -696,9 +696,9 @@ static void zap_locks(void)
}
#if defined(CONFIG_PRINTK_TIME)
-static int printk_time = 1;
+static bool printk_time = 1;
#else
-static int printk_time = 0;
+static bool printk_time = 0;
#endif
module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
@@ -1098,7 +1098,7 @@ int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, cha
return -1;
}
-int console_suspend_enabled = 1;
+bool console_suspend_enabled = 1;
EXPORT_SYMBOL(console_suspend_enabled);
static int __init console_suspend_disable(char *str)
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 78ab24a7b0e..00ab2ca5ed1 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -172,6 +172,14 @@ int ptrace_check_attach(struct task_struct *child, bool ignore_state)
return ret;
}
+static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
+{
+ if (mode & PTRACE_MODE_NOAUDIT)
+ return has_ns_capability_noaudit(current, ns, CAP_SYS_PTRACE);
+ else
+ return has_ns_capability(current, ns, CAP_SYS_PTRACE);
+}
+
int __ptrace_may_access(struct task_struct *task, unsigned int mode)
{
const struct cred *cred = current_cred(), *tcred;
@@ -198,7 +206,7 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode)
cred->gid == tcred->sgid &&
cred->gid == tcred->gid))
goto ok;
- if (ns_capable(tcred->user->user_ns, CAP_SYS_PTRACE))
+ if (ptrace_has_cap(tcred->user->user_ns, mode))
goto ok;
rcu_read_unlock();
return -EPERM;
@@ -207,7 +215,7 @@ ok:
smp_rmb();
if (task->mm)
dumpable = get_dumpable(task->mm);
- if (!dumpable && !task_ns_capable(task, CAP_SYS_PTRACE))
+ if (!dumpable && !ptrace_has_cap(task_user_ns(task), mode))
return -EPERM;
return security_ptrace_access_check(task, mode);
@@ -277,7 +285,7 @@ static int ptrace_attach(struct task_struct *task, long request,
task->ptrace = PT_PTRACED;
if (seize)
task->ptrace |= PT_SEIZED;
- if (task_ns_capable(task, CAP_SYS_PTRACE))
+ if (ns_capable(task_user_ns(task), CAP_SYS_PTRACE))
task->ptrace |= PT_PTRACE_CAP;
__ptrace_link(task, current);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index cecbb64be05..df00cb09263 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4330,7 +4330,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
goto out_free_cpus_allowed;
}
retval = -EPERM;
- if (!check_same_owner(p) && !task_ns_capable(p, CAP_SYS_NICE))
+ if (!check_same_owner(p) && !ns_capable(task_user_ns(p), CAP_SYS_NICE))
goto out_unlock;
retval = security_task_setscheduler(p);
@@ -7134,10 +7134,6 @@ void set_curr_task(int cpu, struct task_struct *p)
#endif
-#ifdef CONFIG_RT_GROUP_SCHED
-#else /* !CONFIG_RT_GROUP_SCHED */
-#endif /* CONFIG_RT_GROUP_SCHED */
-
#ifdef CONFIG_CGROUP_SCHED
/* task_group_lock serializes the addition/removal of task groups */
static DEFINE_SPINLOCK(task_group_lock);
@@ -7246,9 +7242,6 @@ void sched_move_task(struct task_struct *tsk)
}
#endif /* CONFIG_CGROUP_SCHED */
-#ifdef CONFIG_FAIR_GROUP_SCHED
-#endif
-
#if defined(CONFIG_RT_GROUP_SCHED) || defined(CONFIG_CFS_BANDWIDTH)
static unsigned long to_ratio(u64 period, u64 runtime)
{
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 8e42de9105f..84adb2d66cb 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3130,8 +3130,10 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
}
#define LBF_ALL_PINNED 0x01
-#define LBF_NEED_BREAK 0x02
-#define LBF_ABORT 0x04
+#define LBF_NEED_BREAK 0x02 /* clears into HAD_BREAK */
+#define LBF_HAD_BREAK 0x04
+#define LBF_HAD_BREAKS 0x0C /* count HAD_BREAKs overflows into ABORT */
+#define LBF_ABORT 0x10
/*
* can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
@@ -4508,7 +4510,9 @@ redo:
goto out_balanced;
if (lb_flags & LBF_NEED_BREAK) {
- lb_flags &= ~LBF_NEED_BREAK;
+ lb_flags += LBF_HAD_BREAK - LBF_NEED_BREAK;
+ if (lb_flags & LBF_ABORT)
+ goto out_balanced;
goto redo;
}
diff --git a/kernel/sys.c b/kernel/sys.c
index ddf8155bf3f..40701538fbd 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1692,6 +1692,124 @@ SYSCALL_DEFINE1(umask, int, mask)
return mask;
}
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static int prctl_set_mm(int opt, unsigned long addr,
+ unsigned long arg4, unsigned long arg5)
+{
+ unsigned long rlim = rlimit(RLIMIT_DATA);
+ unsigned long vm_req_flags;
+ unsigned long vm_bad_flags;
+ struct vm_area_struct *vma;
+ int error = 0;
+ struct mm_struct *mm = current->mm;
+
+ if (arg4 | arg5)
+ return -EINVAL;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (addr >= TASK_SIZE)
+ return -EINVAL;
+
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, addr);
+
+ if (opt != PR_SET_MM_START_BRK && opt != PR_SET_MM_BRK) {
+ /* It must be existing VMA */
+ if (!vma || vma->vm_start > addr)
+ goto out;
+ }
+
+ error = -EINVAL;
+ switch (opt) {
+ case PR_SET_MM_START_CODE:
+ case PR_SET_MM_END_CODE:
+ vm_req_flags = VM_READ | VM_EXEC;
+ vm_bad_flags = VM_WRITE | VM_MAYSHARE;
+
+ if ((vma->vm_flags & vm_req_flags) != vm_req_flags ||
+ (vma->vm_flags & vm_bad_flags))
+ goto out;
+
+ if (opt == PR_SET_MM_START_CODE)
+ mm->start_code = addr;
+ else
+ mm->end_code = addr;
+ break;
+
+ case PR_SET_MM_START_DATA:
+ case PR_SET_MM_END_DATA:
+ vm_req_flags = VM_READ | VM_WRITE;
+ vm_bad_flags = VM_EXEC | VM_MAYSHARE;
+
+ if ((vma->vm_flags & vm_req_flags) != vm_req_flags ||
+ (vma->vm_flags & vm_bad_flags))
+ goto out;
+
+ if (opt == PR_SET_MM_START_DATA)
+ mm->start_data = addr;
+ else
+ mm->end_data = addr;
+ break;
+
+ case PR_SET_MM_START_STACK:
+
+#ifdef CONFIG_STACK_GROWSUP
+ vm_req_flags = VM_READ | VM_WRITE | VM_GROWSUP;
+#else
+ vm_req_flags = VM_READ | VM_WRITE | VM_GROWSDOWN;
+#endif
+ if ((vma->vm_flags & vm_req_flags) != vm_req_flags)
+ goto out;
+
+ mm->start_stack = addr;
+ break;
+
+ case PR_SET_MM_START_BRK:
+ if (addr <= mm->end_data)
+ goto out;
+
+ if (rlim < RLIM_INFINITY &&
+ (mm->brk - addr) +
+ (mm->end_data - mm->start_data) > rlim)
+ goto out;
+
+ mm->start_brk = addr;
+ break;
+
+ case PR_SET_MM_BRK:
+ if (addr <= mm->end_data)
+ goto out;
+
+ if (rlim < RLIM_INFINITY &&
+ (addr - mm->start_brk) +
+ (mm->end_data - mm->start_data) > rlim)
+ goto out;
+
+ mm->brk = addr;
+ break;
+
+ default:
+ error = -EINVAL;
+ goto out;
+ }
+
+ error = 0;
+
+out:
+ up_read(&mm->mmap_sem);
+
+ return error;
+}
+#else /* CONFIG_CHECKPOINT_RESTORE */
+static int prctl_set_mm(int opt, unsigned long addr,
+ unsigned long arg4, unsigned long arg5)
+{
+ return -EINVAL;
+}
+#endif
+
SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
unsigned long, arg4, unsigned long, arg5)
{
@@ -1841,6 +1959,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
else
error = PR_MCE_KILL_DEFAULT;
break;
+ case PR_SET_MM:
+ error = prctl_set_mm(arg2, arg3, arg4, arg5);
+ break;
default:
error = -EINVAL;
break;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b1e8943fed1..683d559a0ee 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -22,11 +22,13 @@
#include <linux/hardirq.h>
#include <linux/kthread.h>
#include <linux/uaccess.h>
+#include <linux/bsearch.h>
#include <linux/module.h>
#include <linux/ftrace.h>
#include <linux/sysctl.h>
#include <linux/slab.h>
#include <linux/ctype.h>
+#include <linux/sort.h>
#include <linux/list.h>
#include <linux/hash.h>
#include <linux/rcupdate.h>
@@ -947,13 +949,6 @@ struct ftrace_func_probe {
struct rcu_head rcu;
};
-enum {
- FTRACE_ENABLE_CALLS = (1 << 0),
- FTRACE_DISABLE_CALLS = (1 << 1),
- FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
- FTRACE_START_FUNC_RET = (1 << 3),
- FTRACE_STOP_FUNC_RET = (1 << 4),
-};
struct ftrace_func_entry {
struct hlist_node hlist;
unsigned long ip;
@@ -984,18 +979,19 @@ static struct ftrace_ops global_ops = {
.filter_hash = EMPTY_HASH,
};
-static struct dyn_ftrace *ftrace_new_addrs;
-
static DEFINE_MUTEX(ftrace_regex_lock);
struct ftrace_page {
struct ftrace_page *next;
+ struct dyn_ftrace *records;
int index;
- struct dyn_ftrace records[];
+ int size;
};
-#define ENTRIES_PER_PAGE \
- ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace))
+static struct ftrace_page *ftrace_new_pgs;
+
+#define ENTRY_SIZE sizeof(struct dyn_ftrace)
+#define ENTRIES_PER_PAGE (PAGE_SIZE / ENTRY_SIZE)
/* estimate from running different kernels */
#define NR_TO_INIT 10000
@@ -1003,7 +999,10 @@ struct ftrace_page {
static struct ftrace_page *ftrace_pages_start;
static struct ftrace_page *ftrace_pages;
-static struct dyn_ftrace *ftrace_free_records;
+static bool ftrace_hash_empty(struct ftrace_hash *hash)
+{
+ return !hash || !hash->count;
+}
static struct ftrace_func_entry *
ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
@@ -1013,7 +1012,7 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
struct hlist_head *hhd;
struct hlist_node *n;
- if (!hash->count)
+ if (ftrace_hash_empty(hash))
return NULL;
if (hash->size_bits > 0)
@@ -1157,7 +1156,7 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash)
return NULL;
/* Empty hash? */
- if (!hash || !hash->count)
+ if (ftrace_hash_empty(hash))
return new_hash;
size = 1 << hash->size_bits;
@@ -1282,9 +1281,9 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
filter_hash = rcu_dereference_raw(ops->filter_hash);
notrace_hash = rcu_dereference_raw(ops->notrace_hash);
- if ((!filter_hash || !filter_hash->count ||
+ if ((ftrace_hash_empty(filter_hash) ||
ftrace_lookup_ip(filter_hash, ip)) &&
- (!notrace_hash || !notrace_hash->count ||
+ (ftrace_hash_empty(notrace_hash) ||
!ftrace_lookup_ip(notrace_hash, ip)))
ret = 1;
else
@@ -1307,6 +1306,47 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
} \
}
+
+static int ftrace_cmp_recs(const void *a, const void *b)
+{
+ const struct dyn_ftrace *reca = a;
+ const struct dyn_ftrace *recb = b;
+
+ if (reca->ip > recb->ip)
+ return 1;
+ if (reca->ip < recb->ip)
+ return -1;
+ return 0;
+}
+
+/**
+ * ftrace_location - return true if the ip giving is a traced location
+ * @ip: the instruction pointer to check
+ *
+ * Returns 1 if @ip given is a pointer to a ftrace location.
+ * That is, the instruction that is either a NOP or call to
+ * the function tracer. It checks the ftrace internal tables to
+ * determine if the address belongs or not.
+ */
+int ftrace_location(unsigned long ip)
+{
+ struct ftrace_page *pg;
+ struct dyn_ftrace *rec;
+ struct dyn_ftrace key;
+
+ key.ip = ip;
+
+ for (pg = ftrace_pages_start; pg; pg = pg->next) {
+ rec = bsearch(&key, pg->records, pg->index,
+ sizeof(struct dyn_ftrace),
+ ftrace_cmp_recs);
+ if (rec)
+ return 1;
+ }
+
+ return 0;
+}
+
static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
int filter_hash,
bool inc)
@@ -1336,7 +1376,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
if (filter_hash) {
hash = ops->filter_hash;
other_hash = ops->notrace_hash;
- if (!hash || !hash->count)
+ if (ftrace_hash_empty(hash))
all = 1;
} else {
inc = !inc;
@@ -1346,7 +1386,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
* If the notrace hash has no items,
* then there's nothing to do.
*/
- if (hash && !hash->count)
+ if (ftrace_hash_empty(hash))
return;
}
@@ -1363,8 +1403,8 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
if (!other_hash || !ftrace_lookup_ip(other_hash, rec->ip))
match = 1;
} else {
- in_hash = hash && !!ftrace_lookup_ip(hash, rec->ip);
- in_other_hash = other_hash && !!ftrace_lookup_ip(other_hash, rec->ip);
+ in_hash = !!ftrace_lookup_ip(hash, rec->ip);
+ in_other_hash = !!ftrace_lookup_ip(other_hash, rec->ip);
/*
*
@@ -1372,7 +1412,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
if (filter_hash && in_hash && !in_other_hash)
match = 1;
else if (!filter_hash && in_hash &&
- (in_other_hash || !other_hash->count))
+ (in_other_hash || ftrace_hash_empty(other_hash)))
match = 1;
}
if (!match)
@@ -1406,40 +1446,12 @@ static void ftrace_hash_rec_enable(struct ftrace_ops *ops,
__ftrace_hash_rec_update(ops, filter_hash, 1);
}
-static void ftrace_free_rec(struct dyn_ftrace *rec)
-{
- rec->freelist = ftrace_free_records;
- ftrace_free_records = rec;
- rec->flags |= FTRACE_FL_FREE;
-}
-
static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
{
- struct dyn_ftrace *rec;
-
- /* First check for freed records */
- if (ftrace_free_records) {
- rec = ftrace_free_records;
-
- if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
- FTRACE_WARN_ON_ONCE(1);
- ftrace_free_records = NULL;
+ if (ftrace_pages->index == ftrace_pages->size) {
+ /* We should have allocated enough */
+ if (WARN_ON(!ftrace_pages->next))
return NULL;
- }
-
- ftrace_free_records = rec->freelist;
- memset(rec, 0, sizeof(*rec));
- return rec;
- }
-
- if (ftrace_pages->index == ENTRIES_PER_PAGE) {
- if (!ftrace_pages->next) {
- /* allocate another page */
- ftrace_pages->next =
- (void *)get_zeroed_page(GFP_KERNEL);
- if (!ftrace_pages->next)
- return NULL;
- }
ftrace_pages = ftrace_pages->next;
}
@@ -1459,8 +1471,6 @@ ftrace_record_ip(unsigned long ip)
return NULL;
rec->ip = ip;
- rec->newlist = ftrace_new_addrs;
- ftrace_new_addrs = rec;
return rec;
}
@@ -1475,7 +1485,19 @@ static void print_ip_ins(const char *fmt, unsigned char *p)
printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
}
-static void ftrace_bug(int failed, unsigned long ip)
+/**
+ * ftrace_bug - report and shutdown function tracer
+ * @failed: The failed type (EFAULT, EINVAL, EPERM)
+ * @ip: The address that failed
+ *
+ * The arch code that enables or disables the function tracing
+ * can call ftrace_bug() when it has detected a problem in
+ * modifying the code. @failed should be one of either:
+ * EFAULT - if the problem happens on reading the @ip address
+ * EINVAL - if what is read at @ip is not what was expected
+ * EPERM - if the problem happens on writting to the @ip address
+ */
+void ftrace_bug(int failed, unsigned long ip)
{
switch (failed) {
case -EFAULT:
@@ -1517,24 +1539,19 @@ int ftrace_text_reserved(void *start, void *end)
return 0;
}
-
-static int
-__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
+static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
{
- unsigned long ftrace_addr;
unsigned long flag = 0UL;
- ftrace_addr = (unsigned long)FTRACE_ADDR;
-
/*
- * If we are enabling tracing:
+ * If we are updating calls:
*
* If the record has a ref count, then we need to enable it
* because someone is using it.
*
* Otherwise we make sure its disabled.
*
- * If we are disabling tracing, then disable all records that
+ * If we are disabling calls, then disable all records that
* are enabled.
*/
if (enable && (rec->flags & ~FTRACE_FL_MASK))
@@ -1542,18 +1559,72 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
/* If the state of this record hasn't changed, then do nothing */
if ((rec->flags & FTRACE_FL_ENABLED) == flag)
- return 0;
+ return FTRACE_UPDATE_IGNORE;
if (flag) {
- rec->flags |= FTRACE_FL_ENABLED;
+ if (update)
+ rec->flags |= FTRACE_FL_ENABLED;
+ return FTRACE_UPDATE_MAKE_CALL;
+ }
+
+ if (update)
+ rec->flags &= ~FTRACE_FL_ENABLED;
+
+ return FTRACE_UPDATE_MAKE_NOP;
+}
+
+/**
+ * ftrace_update_record, set a record that now is tracing or not
+ * @rec: the record to update
+ * @enable: set to 1 if the record is tracing, zero to force disable
+ *
+ * The records that represent all functions that can be traced need
+ * to be updated when tracing has been enabled.
+ */
+int ftrace_update_record(struct dyn_ftrace *rec, int enable)
+{
+ return ftrace_check_record(rec, enable, 1);
+}
+
+/**
+ * ftrace_test_record, check if the record has been enabled or not
+ * @rec: the record to test
+ * @enable: set to 1 to check if enabled, 0 if it is disabled
+ *
+ * The arch code may need to test if a record is already set to
+ * tracing to determine how to modify the function code that it
+ * represents.
+ */
+int ftrace_test_record(struct dyn_ftrace *rec, int enable)
+{
+ return ftrace_check_record(rec, enable, 0);
+}
+
+static int
+__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
+{
+ unsigned long ftrace_addr;
+ int ret;
+
+ ftrace_addr = (unsigned long)FTRACE_ADDR;
+
+ ret = ftrace_update_record(rec, enable);
+
+ switch (ret) {
+ case FTRACE_UPDATE_IGNORE:
+ return 0;
+
+ case FTRACE_UPDATE_MAKE_CALL:
return ftrace_make_call(rec, ftrace_addr);
+
+ case FTRACE_UPDATE_MAKE_NOP:
+ return ftrace_make_nop(NULL, rec, ftrace_addr);
}
- rec->flags &= ~FTRACE_FL_ENABLED;
- return ftrace_make_nop(NULL, rec, ftrace_addr);
+ return -1; /* unknow ftrace bug */
}
-static void ftrace_replace_code(int enable)
+static void ftrace_replace_code(int update)
{
struct dyn_ftrace *rec;
struct ftrace_page *pg;
@@ -1563,11 +1634,7 @@ static void ftrace_replace_code(int enable)
return;
do_for_each_ftrace_rec(pg, rec) {
- /* Skip over free records */
- if (rec->flags & FTRACE_FL_FREE)
- continue;
-
- failed = __ftrace_replace_code(rec, enable);
+ failed = __ftrace_replace_code(rec, update);
if (failed) {
ftrace_bug(failed, rec->ip);
/* Stop processing */
@@ -1576,6 +1643,78 @@ static void ftrace_replace_code(int enable)
} while_for_each_ftrace_rec();
}
+struct ftrace_rec_iter {
+ struct ftrace_page *pg;
+ int index;
+};
+
+/**
+ * ftrace_rec_iter_start, start up iterating over traced functions
+ *
+ * Returns an iterator handle that is used to iterate over all
+ * the records that represent address locations where functions
+ * are traced.
+ *
+ * May return NULL if no records are available.
+ */
+struct ftrace_rec_iter *ftrace_rec_iter_start(void)
+{
+ /*
+ * We only use a single iterator.
+ * Protected by the ftrace_lock mutex.
+ */
+ static struct ftrace_rec_iter ftrace_rec_iter;
+ struct ftrace_rec_iter *iter = &ftrace_rec_iter;
+
+ iter->pg = ftrace_pages_start;
+ iter->index = 0;
+
+ /* Could have empty pages */
+ while (iter->pg && !iter->pg->index)
+ iter->pg = iter->pg->next;
+
+ if (!iter->pg)
+ return NULL;
+
+ return iter;
+}
+
+/**
+ * ftrace_rec_iter_next, get the next record to process.
+ * @iter: The handle to the iterator.
+ *
+ * Returns the next iterator after the given iterator @iter.
+ */
+struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter)
+{
+ iter->index++;
+
+ if (iter->index >= iter->pg->index) {
+ iter->pg = iter->pg->next;
+ iter->index = 0;
+
+ /* Could have empty pages */
+ while (iter->pg && !iter->pg->index)
+ iter->pg = iter->pg->next;
+ }
+
+ if (!iter->pg)
+ return NULL;
+
+ return iter;
+}
+
+/**
+ * ftrace_rec_iter_record, get the record at the iterator location
+ * @iter: The current iterator location
+ *
+ * Returns the record that the current @iter is at.
+ */
+struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter)
+{
+ return &iter->pg->records[iter->index];
+}
+
static int
ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
{
@@ -1617,13 +1756,7 @@ static int __ftrace_modify_code(void *data)
{
int *command = data;
- /*
- * Do not call function tracer while we update the code.
- * We are in stop machine, no worrying about races.
- */
- function_trace_stop++;
-
- if (*command & FTRACE_ENABLE_CALLS)
+ if (*command & FTRACE_UPDATE_CALLS)
ftrace_replace_code(1);
else if (*command & FTRACE_DISABLE_CALLS)
ftrace_replace_code(0);
@@ -1636,21 +1769,33 @@ static int __ftrace_modify_code(void *data)
else if (*command & FTRACE_STOP_FUNC_RET)
ftrace_disable_ftrace_graph_caller();
-#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
- /*
- * For archs that call ftrace_test_stop_func(), we must
- * wait till after we update all the function callers
- * before we update the callback. This keeps different
- * ops that record different functions from corrupting
- * each other.
- */
- __ftrace_trace_function = __ftrace_trace_function_delay;
-#endif
- function_trace_stop--;
-
return 0;
}
+/**
+ * ftrace_run_stop_machine, go back to the stop machine method
+ * @command: The command to tell ftrace what to do
+ *
+ * If an arch needs to fall back to the stop machine method, the
+ * it can call this function.
+ */
+void ftrace_run_stop_machine(int command)
+{
+ stop_machine(__ftrace_modify_code, &command, NULL);
+}
+
+/**
+ * arch_ftrace_update_code, modify the code to trace or not trace
+ * @command: The command that needs to be done
+ *
+ * Archs can override this function if it does not need to
+ * run stop_machine() to modify code.
+ */
+void __weak arch_ftrace_update_code(int command)
+{
+ ftrace_run_stop_machine(command);
+}
+
static void ftrace_run_update_code(int command)
{
int ret;
@@ -1659,8 +1804,31 @@ static void ftrace_run_update_code(int command)
FTRACE_WARN_ON(ret);
if (ret)
return;
+ /*
+ * Do not call function tracer while we update the code.
+ * We are in stop machine.
+ */
+ function_trace_stop++;
- stop_machine(__ftrace_modify_code, &command, NULL);
+ /*
+ * By default we use stop_machine() to modify the code.
+ * But archs can do what ever they want as long as it
+ * is safe. The stop_machine() is the safest, but also
+ * produces the most overhead.
+ */
+ arch_ftrace_update_code(command);
+
+#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
+ /*
+ * For archs that call ftrace_test_stop_func(), we must
+ * wait till after we update all the function callers
+ * before we update the callback. This keeps different
+ * ops that record different functions from corrupting
+ * each other.
+ */
+ __ftrace_trace_function = __ftrace_trace_function_delay;
+#endif
+ function_trace_stop--;
ret = ftrace_arch_code_modify_post_process();
FTRACE_WARN_ON(ret);
@@ -1691,7 +1859,7 @@ static int ftrace_startup(struct ftrace_ops *ops, int command)
return -ENODEV;
ftrace_start_up++;
- command |= FTRACE_ENABLE_CALLS;
+ command |= FTRACE_UPDATE_CALLS;
/* ops marked global share the filter hashes */
if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
@@ -1743,8 +1911,7 @@ static void ftrace_shutdown(struct ftrace_ops *ops, int command)
if (ops != &global_ops || !global_start_up)
ops->flags &= ~FTRACE_OPS_FL_ENABLED;
- if (!ftrace_start_up)
- command |= FTRACE_DISABLE_CALLS;
+ command |= FTRACE_UPDATE_CALLS;
if (saved_ftrace_func != ftrace_trace_function) {
saved_ftrace_func = ftrace_trace_function;
@@ -1766,7 +1933,7 @@ static void ftrace_startup_sysctl(void)
saved_ftrace_func = NULL;
/* ftrace_start_up is true if we want ftrace running */
if (ftrace_start_up)
- ftrace_run_update_code(FTRACE_ENABLE_CALLS);
+ ftrace_run_update_code(FTRACE_UPDATE_CALLS);
}
static void ftrace_shutdown_sysctl(void)
@@ -1788,14 +1955,16 @@ static int ops_traces_mod(struct ftrace_ops *ops)
struct ftrace_hash *hash;
hash = ops->filter_hash;
- return !!(!hash || !hash->count);
+ return ftrace_hash_empty(hash);
}
static int ftrace_update_code(struct module *mod)
{
+ struct ftrace_page *pg;
struct dyn_ftrace *p;
cycle_t start, stop;
unsigned long ref = 0;
+ int i;
/*
* When adding a module, we need to check if tracers are
@@ -1817,46 +1986,44 @@ static int ftrace_update_code(struct module *mod)
start = ftrace_now(raw_smp_processor_id());
ftrace_update_cnt = 0;
- while (ftrace_new_addrs) {
+ for (pg = ftrace_new_pgs; pg; pg = pg->next) {
- /* If something went wrong, bail without enabling anything */
- if (unlikely(ftrace_disabled))
- return -1;
+ for (i = 0; i < pg->index; i++) {
+ /* If something went wrong, bail without enabling anything */
+ if (unlikely(ftrace_disabled))
+ return -1;
- p = ftrace_new_addrs;
- ftrace_new_addrs = p->newlist;
- p->flags = ref;
+ p = &pg->records[i];
+ p->flags = ref;
- /*
- * Do the initial record conversion from mcount jump
- * to the NOP instructions.
- */
- if (!ftrace_code_disable(mod, p)) {
- ftrace_free_rec(p);
- /* Game over */
- break;
- }
+ /*
+ * Do the initial record conversion from mcount jump
+ * to the NOP instructions.
+ */
+ if (!ftrace_code_disable(mod, p))
+ break;
- ftrace_update_cnt++;
+ ftrace_update_cnt++;
- /*
- * If the tracing is enabled, go ahead and enable the record.
- *
- * The reason not to enable the record immediatelly is the
- * inherent check of ftrace_make_nop/ftrace_make_call for
- * correct previous instructions. Making first the NOP
- * conversion puts the module to the correct state, thus
- * passing the ftrace_make_call check.
- */
- if (ftrace_start_up && ref) {
- int failed = __ftrace_replace_code(p, 1);
- if (failed) {
- ftrace_bug(failed, p->ip);
- ftrace_free_rec(p);
+ /*
+ * If the tracing is enabled, go ahead and enable the record.
+ *
+ * The reason not to enable the record immediatelly is the
+ * inherent check of ftrace_make_nop/ftrace_make_call for
+ * correct previous instructions. Making first the NOP
+ * conversion puts the module to the correct state, thus
+ * passing the ftrace_make_call check.
+ */
+ if (ftrace_start_up && ref) {
+ int failed = __ftrace_replace_code(p, 1);
+ if (failed)
+ ftrace_bug(failed, p->ip);
}
}
}
+ ftrace_new_pgs = NULL;
+
stop = ftrace_now(raw_smp_processor_id());
ftrace_update_time = stop - start;
ftrace_update_tot_cnt += ftrace_update_cnt;
@@ -1864,57 +2031,108 @@ static int ftrace_update_code(struct module *mod)
return 0;
}
-static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
+static int ftrace_allocate_records(struct ftrace_page *pg, int count)
{
- struct ftrace_page *pg;
+ int order;
int cnt;
- int i;
- /* allocate a few pages */
- ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL);
- if (!ftrace_pages_start)
- return -1;
+ if (WARN_ON(!count))
+ return -EINVAL;
+
+ order = get_count_order(DIV_ROUND_UP(count, ENTRIES_PER_PAGE));
/*
- * Allocate a few more pages.
- *
- * TODO: have some parser search vmlinux before
- * final linking to find all calls to ftrace.
- * Then we can:
- * a) know how many pages to allocate.
- * and/or
- * b) set up the table then.
- *
- * The dynamic code is still necessary for
- * modules.
+ * We want to fill as much as possible. No more than a page
+ * may be empty.
*/
+ while ((PAGE_SIZE << order) / ENTRY_SIZE >= count + ENTRIES_PER_PAGE)
+ order--;
- pg = ftrace_pages = ftrace_pages_start;
+ again:
+ pg->records = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
- cnt = num_to_init / ENTRIES_PER_PAGE;
- pr_info("ftrace: allocating %ld entries in %d pages\n",
- num_to_init, cnt + 1);
+ if (!pg->records) {
+ /* if we can't allocate this size, try something smaller */
+ if (!order)
+ return -ENOMEM;
+ order >>= 1;
+ goto again;
+ }
- for (i = 0; i < cnt; i++) {
- pg->next = (void *)get_zeroed_page(GFP_KERNEL);
+ cnt = (PAGE_SIZE << order) / ENTRY_SIZE;
+ pg->size = cnt;
- /* If we fail, we'll try later anyway */
- if (!pg->next)
+ if (cnt > count)
+ cnt = count;
+
+ return cnt;
+}
+
+static struct ftrace_page *
+ftrace_allocate_pages(unsigned long num_to_init)
+{
+ struct ftrace_page *start_pg;
+ struct ftrace_page *pg;
+ int order;
+ int cnt;
+
+ if (!num_to_init)
+ return 0;
+
+ start_pg = pg = kzalloc(sizeof(*pg), GFP_KERNEL);
+ if (!pg)
+ return NULL;
+
+ /*
+ * Try to allocate as much as possible in one continues
+ * location that fills in all of the space. We want to
+ * waste as little space as possible.
+ */
+ for (;;) {
+ cnt = ftrace_allocate_records(pg, num_to_init);
+ if (cnt < 0)
+ goto free_pages;
+
+ num_to_init -= cnt;
+ if (!num_to_init)
break;
+ pg->next = kzalloc(sizeof(*pg), GFP_KERNEL);
+ if (!pg->next)
+ goto free_pages;
+
pg = pg->next;
}
- return 0;
+ return start_pg;
+
+ free_pages:
+ while (start_pg) {
+ order = get_count_order(pg->size / ENTRIES_PER_PAGE);
+ free_pages((unsigned long)pg->records, order);
+ start_pg = pg->next;
+ kfree(pg);
+ pg = start_pg;
+ }
+ pr_info("ftrace: FAILED to allocate memory for functions\n");
+ return NULL;
}
-enum {
- FTRACE_ITER_FILTER = (1 << 0),
- FTRACE_ITER_NOTRACE = (1 << 1),
- FTRACE_ITER_PRINTALL = (1 << 2),
- FTRACE_ITER_HASH = (1 << 3),
- FTRACE_ITER_ENABLED = (1 << 4),
-};
+static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
+{
+ int cnt;
+
+ if (!num_to_init) {
+ pr_info("ftrace: No functions to be traced?\n");
+ return -1;
+ }
+
+ cnt = num_to_init / ENTRIES_PER_PAGE;
+ pr_info("ftrace: allocating %ld entries in %d pages\n",
+ num_to_init, cnt + 1);
+
+ return 0;
+}
#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
@@ -1980,6 +2198,9 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos)
void *p = NULL;
loff_t l;
+ if (!(iter->flags & FTRACE_ITER_DO_HASH))
+ return NULL;
+
if (iter->func_pos > *pos)
return NULL;
@@ -2023,7 +2244,7 @@ static void *
t_next(struct seq_file *m, void *v, loff_t *pos)
{
struct ftrace_iterator *iter = m->private;
- struct ftrace_ops *ops = &global_ops;
+ struct ftrace_ops *ops = iter->ops;
struct dyn_ftrace *rec = NULL;
if (unlikely(ftrace_disabled))
@@ -2047,9 +2268,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
}
} else {
rec = &iter->pg->records[iter->idx++];
- if ((rec->flags & FTRACE_FL_FREE) ||
-
- ((iter->flags & FTRACE_ITER_FILTER) &&
+ if (((iter->flags & FTRACE_ITER_FILTER) &&
!(ftrace_lookup_ip(ops->filter_hash, rec->ip))) ||
((iter->flags & FTRACE_ITER_NOTRACE) &&
@@ -2081,7 +2300,7 @@ static void reset_iter_read(struct ftrace_iterator *iter)
static void *t_start(struct seq_file *m, loff_t *pos)
{
struct ftrace_iterator *iter = m->private;
- struct ftrace_ops *ops = &global_ops;
+ struct ftrace_ops *ops = iter->ops;
void *p = NULL;
loff_t l;
@@ -2101,7 +2320,8 @@ static void *t_start(struct seq_file *m, loff_t *pos)
* off, we can short cut and just print out that all
* functions are enabled.
*/
- if (iter->flags & FTRACE_ITER_FILTER && !ops->filter_hash->count) {
+ if (iter->flags & FTRACE_ITER_FILTER &&
+ ftrace_hash_empty(ops->filter_hash)) {
if (*pos > 0)
return t_hash_start(m, pos);
iter->flags |= FTRACE_ITER_PRINTALL;
@@ -2126,12 +2346,8 @@ static void *t_start(struct seq_file *m, loff_t *pos)
break;
}
- if (!p) {
- if (iter->flags & FTRACE_ITER_FILTER)
- return t_hash_start(m, pos);
-
- return NULL;
- }
+ if (!p)
+ return t_hash_start(m, pos);
return iter;
}
@@ -2189,6 +2405,7 @@ ftrace_avail_open(struct inode *inode, struct file *file)
return -ENOMEM;
iter->pg = ftrace_pages_start;
+ iter->ops = &global_ops;
ret = seq_open(file, &show_ftrace_seq_ops);
if (!ret) {
@@ -2217,6 +2434,7 @@ ftrace_enabled_open(struct inode *inode, struct file *file)
iter->pg = ftrace_pages_start;
iter->flags = FTRACE_ITER_ENABLED;
+ iter->ops = &global_ops;
ret = seq_open(file, &show_ftrace_seq_ops);
if (!ret) {
@@ -2237,7 +2455,23 @@ static void ftrace_filter_reset(struct ftrace_hash *hash)
mutex_unlock(&ftrace_lock);
}
-static int
+/**
+ * ftrace_regex_open - initialize function tracer filter files
+ * @ops: The ftrace_ops that hold the hash filters
+ * @flag: The type of filter to process
+ * @inode: The inode, usually passed in to your open routine
+ * @file: The file, usually passed in to your open routine
+ *
+ * ftrace_regex_open() initializes the filter files for the
+ * @ops. Depending on @flag it may process the filter hash or
+ * the notrace hash of @ops. With this called from the open
+ * routine, you can use ftrace_filter_write() for the write
+ * routine if @flag has FTRACE_ITER_FILTER set, or
+ * ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set.
+ * ftrace_regex_lseek() should be used as the lseek routine, and
+ * release must call ftrace_regex_release().
+ */
+int
ftrace_regex_open(struct ftrace_ops *ops, int flag,
struct inode *inode, struct file *file)
{
@@ -2306,8 +2540,9 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
static int
ftrace_filter_open(struct inode *inode, struct file *file)
{
- return ftrace_regex_open(&global_ops, FTRACE_ITER_FILTER,
- inode, file);
+ return ftrace_regex_open(&global_ops,
+ FTRACE_ITER_FILTER | FTRACE_ITER_DO_HASH,
+ inode, file);
}
static int
@@ -2317,7 +2552,7 @@ ftrace_notrace_open(struct inode *inode, struct file *file)
inode, file);
}
-static loff_t
+loff_t
ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
{
loff_t ret;
@@ -2426,7 +2661,6 @@ match_records(struct ftrace_hash *hash, char *buff,
goto out_unlock;
do_for_each_ftrace_rec(pg, rec) {
-
if (ftrace_match_record(rec, mod, search, search_len, type)) {
ret = enter_record(hash, rec, not);
if (ret < 0) {
@@ -2871,14 +3105,14 @@ out_unlock:
return ret;
}
-static ssize_t
+ssize_t
ftrace_filter_write(struct file *file, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
return ftrace_regex_write(file, ubuf, cnt, ppos, 1);
}
-static ssize_t
+ssize_t
ftrace_notrace_write(struct file *file, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
@@ -2919,7 +3153,7 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
ret = ftrace_hash_move(ops, enable, orig_hash, hash);
if (!ret && ops->flags & FTRACE_OPS_FL_ENABLED
&& ftrace_enabled)
- ftrace_run_update_code(FTRACE_ENABLE_CALLS);
+ ftrace_run_update_code(FTRACE_UPDATE_CALLS);
mutex_unlock(&ftrace_lock);
@@ -3045,8 +3279,8 @@ static void __init set_ftrace_early_graph(char *buf)
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-static void __init
-set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable)
+void __init
+ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable)
{
char *func;
@@ -3059,17 +3293,16 @@ set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable)
static void __init set_ftrace_early_filters(void)
{
if (ftrace_filter_buf[0])
- set_ftrace_early_filter(&global_ops, ftrace_filter_buf, 1);
+ ftrace_set_early_filter(&global_ops, ftrace_filter_buf, 1);
if (ftrace_notrace_buf[0])
- set_ftrace_early_filter(&global_ops, ftrace_notrace_buf, 0);
+ ftrace_set_early_filter(&global_ops, ftrace_notrace_buf, 0);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
if (ftrace_graph_buf[0])
set_ftrace_early_graph(ftrace_graph_buf);
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
}
-static int
-ftrace_regex_release(struct inode *inode, struct file *file)
+int ftrace_regex_release(struct inode *inode, struct file *file)
{
struct seq_file *m = (struct seq_file *)file->private_data;
struct ftrace_iterator *iter;
@@ -3107,7 +3340,7 @@ ftrace_regex_release(struct inode *inode, struct file *file)
orig_hash, iter->hash);
if (!ret && (iter->ops->flags & FTRACE_OPS_FL_ENABLED)
&& ftrace_enabled)
- ftrace_run_update_code(FTRACE_ENABLE_CALLS);
+ ftrace_run_update_code(FTRACE_UPDATE_CALLS);
mutex_unlock(&ftrace_lock);
}
@@ -3270,9 +3503,6 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
do_for_each_ftrace_rec(pg, rec) {
- if (rec->flags & FTRACE_FL_FREE)
- continue;
-
if (ftrace_match_record(rec, NULL, search, search_len, type)) {
/* if it is in the array */
exists = false;
@@ -3381,15 +3611,62 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
return 0;
}
+static void ftrace_swap_recs(void *a, void *b, int size)
+{
+ struct dyn_ftrace *reca = a;
+ struct dyn_ftrace *recb = b;
+ struct dyn_ftrace t;
+
+ t = *reca;
+ *reca = *recb;
+ *recb = t;
+}
+
static int ftrace_process_locs(struct module *mod,
unsigned long *start,
unsigned long *end)
{
+ struct ftrace_page *pg;
+ unsigned long count;
unsigned long *p;
unsigned long addr;
unsigned long flags = 0; /* Shut up gcc */
+ int ret = -ENOMEM;
+
+ count = end - start;
+
+ if (!count)
+ return 0;
+
+ pg = ftrace_allocate_pages(count);
+ if (!pg)
+ return -ENOMEM;
mutex_lock(&ftrace_lock);
+
+ /*
+ * Core and each module needs their own pages, as
+ * modules will free them when they are removed.
+ * Force a new page to be allocated for modules.
+ */
+ if (!mod) {
+ WARN_ON(ftrace_pages || ftrace_pages_start);
+ /* First initialization */
+ ftrace_pages = ftrace_pages_start = pg;
+ } else {
+ if (!ftrace_pages)
+ goto out;
+
+ if (WARN_ON(ftrace_pages->next)) {
+ /* Hmm, we have free pages? */
+ while (ftrace_pages->next)
+ ftrace_pages = ftrace_pages->next;
+ }
+
+ ftrace_pages->next = pg;
+ ftrace_pages = pg;
+ }
+
p = start;
while (p < end) {
addr = ftrace_call_adjust(*p++);
@@ -3401,9 +3678,18 @@ static int ftrace_process_locs(struct module *mod,
*/
if (!addr)
continue;
- ftrace_record_ip(addr);
+ if (!ftrace_record_ip(addr))
+ break;
}
+ /* These new locations need to be initialized */
+ ftrace_new_pgs = pg;
+
+ /* Make each individual set of pages sorted by ips */
+ for (; pg; pg = pg->next)
+ sort(pg->records, pg->index, sizeof(struct dyn_ftrace),
+ ftrace_cmp_recs, ftrace_swap_recs);
+
/*
* We only need to disable interrupts on start up
* because we are modifying code that an interrupt
@@ -3417,32 +3703,55 @@ static int ftrace_process_locs(struct module *mod,
ftrace_update_code(mod);
if (!mod)
local_irq_restore(flags);
+ ret = 0;
+ out:
mutex_unlock(&ftrace_lock);
- return 0;
+ return ret;
}
#ifdef CONFIG_MODULES
+
+#define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next)
+
void ftrace_release_mod(struct module *mod)
{
struct dyn_ftrace *rec;
+ struct ftrace_page **last_pg;
struct ftrace_page *pg;
+ int order;
mutex_lock(&ftrace_lock);
if (ftrace_disabled)
goto out_unlock;
- do_for_each_ftrace_rec(pg, rec) {
+ /*
+ * Each module has its own ftrace_pages, remove
+ * them from the list.
+ */
+ last_pg = &ftrace_pages_start;
+ for (pg = ftrace_pages_start; pg; pg = *last_pg) {
+ rec = &pg->records[0];
if (within_module_core(rec->ip, mod)) {
/*
- * rec->ip is changed in ftrace_free_rec()
- * It should not between s and e if record was freed.
+ * As core pages are first, the first
+ * page should never be a module page.
*/
- FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
- ftrace_free_rec(rec);
- }
- } while_for_each_ftrace_rec();
+ if (WARN_ON(pg == ftrace_pages_start))
+ goto out_unlock;
+
+ /* Check if we are deleting the last page */
+ if (pg == ftrace_pages)
+ ftrace_pages = next_to_ftrace_page(last_pg);
+
+ *last_pg = pg->next;
+ order = get_count_order(pg->size / ENTRIES_PER_PAGE);
+ free_pages((unsigned long)pg->records, order);
+ kfree(pg);
+ } else
+ last_pg = &pg->next;
+ }
out_unlock:
mutex_unlock(&ftrace_lock);
}
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index f04cc3136bd..24aee712745 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1738,11 +1738,121 @@ static int replace_system_preds(struct event_subsystem *system,
return -ENOMEM;
}
+static int create_filter_start(char *filter_str, bool set_str,
+ struct filter_parse_state **psp,
+ struct event_filter **filterp)
+{
+ struct event_filter *filter;
+ struct filter_parse_state *ps = NULL;
+ int err = 0;
+
+ WARN_ON_ONCE(*psp || *filterp);
+
+ /* allocate everything, and if any fails, free all and fail */
+ filter = __alloc_filter();
+ if (filter && set_str)
+ err = replace_filter_string(filter, filter_str);
+
+ ps = kzalloc(sizeof(*ps), GFP_KERNEL);
+
+ if (!filter || !ps || err) {
+ kfree(ps);
+ __free_filter(filter);
+ return -ENOMEM;
+ }
+
+ /* we're committed to creating a new filter */
+ *filterp = filter;
+ *psp = ps;
+
+ parse_init(ps, filter_ops, filter_str);
+ err = filter_parse(ps);
+ if (err && set_str)
+ append_filter_err(ps, filter);
+ return err;
+}
+
+static void create_filter_finish(struct filter_parse_state *ps)
+{
+ if (ps) {
+ filter_opstack_clear(ps);
+ postfix_clear(ps);
+ kfree(ps);
+ }
+}
+
+/**
+ * create_filter - create a filter for a ftrace_event_call
+ * @call: ftrace_event_call to create a filter for
+ * @filter_str: filter string
+ * @set_str: remember @filter_str and enable detailed error in filter
+ * @filterp: out param for created filter (always updated on return)
+ *
+ * Creates a filter for @call with @filter_str. If @set_str is %true,
+ * @filter_str is copied and recorded in the new filter.
+ *
+ * On success, returns 0 and *@filterp points to the new filter. On
+ * failure, returns -errno and *@filterp may point to %NULL or to a new
+ * filter. In the latter case, the returned filter contains error
+ * information if @set_str is %true and the caller is responsible for
+ * freeing it.
+ */
+static int create_filter(struct ftrace_event_call *call,
+ char *filter_str, bool set_str,
+ struct event_filter **filterp)
+{
+ struct event_filter *filter = NULL;
+ struct filter_parse_state *ps = NULL;
+ int err;
+
+ err = create_filter_start(filter_str, set_str, &ps, &filter);
+ if (!err) {
+ err = replace_preds(call, filter, ps, filter_str, false);
+ if (err && set_str)
+ append_filter_err(ps, filter);
+ }
+ create_filter_finish(ps);
+
+ *filterp = filter;
+ return err;
+}
+
+/**
+ * create_system_filter - create a filter for an event_subsystem
+ * @system: event_subsystem to create a filter for
+ * @filter_str: filter string
+ * @filterp: out param for created filter (always updated on return)
+ *
+ * Identical to create_filter() except that it creates a subsystem filter
+ * and always remembers @filter_str.
+ */
+static int create_system_filter(struct event_subsystem *system,
+ char *filter_str, struct event_filter **filterp)
+{
+ struct event_filter *filter = NULL;
+ struct filter_parse_state *ps = NULL;
+ int err;
+
+ err = create_filter_start(filter_str, true, &ps, &filter);
+ if (!err) {
+ err = replace_system_preds(system, ps, filter_str);
+ if (!err) {
+ /* System filters just show a default message */
+ kfree(filter->filter_string);
+ filter->filter_string = NULL;
+ } else {
+ append_filter_err(ps, filter);
+ }
+ }
+ create_filter_finish(ps);
+
+ *filterp = filter;
+ return err;
+}
+
int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
{
- struct filter_parse_state *ps;
struct event_filter *filter;
- struct event_filter *tmp;
int err = 0;
mutex_lock(&event_mutex);
@@ -1759,49 +1869,30 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
goto out_unlock;
}
- err = -ENOMEM;
- ps = kzalloc(sizeof(*ps), GFP_KERNEL);
- if (!ps)
- goto out_unlock;
-
- filter = __alloc_filter();
- if (!filter) {
- kfree(ps);
- goto out_unlock;
- }
-
- replace_filter_string(filter, filter_string);
-
- parse_init(ps, filter_ops, filter_string);
- err = filter_parse(ps);
- if (err) {
- append_filter_err(ps, filter);
- goto out;
- }
+ err = create_filter(call, filter_string, true, &filter);
- err = replace_preds(call, filter, ps, filter_string, false);
- if (err) {
- filter_disable(call);
- append_filter_err(ps, filter);
- } else
- call->flags |= TRACE_EVENT_FL_FILTERED;
-out:
/*
* Always swap the call filter with the new filter
* even if there was an error. If there was an error
* in the filter, we disable the filter and show the error
* string
*/
- tmp = call->filter;
- rcu_assign_pointer(call->filter, filter);
- if (tmp) {
- /* Make sure the call is done with the filter */
- synchronize_sched();
- __free_filter(tmp);
+ if (filter) {
+ struct event_filter *tmp = call->filter;
+
+ if (!err)
+ call->flags |= TRACE_EVENT_FL_FILTERED;
+ else
+ filter_disable(call);
+
+ rcu_assign_pointer(call->filter, filter);
+
+ if (tmp) {
+ /* Make sure the call is done with the filter */
+ synchronize_sched();
+ __free_filter(tmp);
+ }
}
- filter_opstack_clear(ps);
- postfix_clear(ps);
- kfree(ps);
out_unlock:
mutex_unlock(&event_mutex);
@@ -1811,7 +1902,6 @@ out_unlock:
int apply_subsystem_event_filter(struct event_subsystem *system,
char *filter_string)
{
- struct filter_parse_state *ps;
struct event_filter *filter;
int err = 0;
@@ -1835,48 +1925,19 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
goto out_unlock;
}
- err = -ENOMEM;
- ps = kzalloc(sizeof(*ps), GFP_KERNEL);
- if (!ps)
- goto out_unlock;
-
- filter = __alloc_filter();
- if (!filter)
- goto out;
-
- /* System filters just show a default message */
- kfree(filter->filter_string);
- filter->filter_string = NULL;
-
- /*
- * No event actually uses the system filter
- * we can free it without synchronize_sched().
- */
- __free_filter(system->filter);
- system->filter = filter;
-
- parse_init(ps, filter_ops, filter_string);
- err = filter_parse(ps);
- if (err)
- goto err_filter;
-
- err = replace_system_preds(system, ps, filter_string);
- if (err)
- goto err_filter;
-
-out:
- filter_opstack_clear(ps);
- postfix_clear(ps);
- kfree(ps);
+ err = create_system_filter(system, filter_string, &filter);
+ if (filter) {
+ /*
+ * No event actually uses the system filter
+ * we can free it without synchronize_sched().
+ */
+ __free_filter(system->filter);
+ system->filter = filter;
+ }
out_unlock:
mutex_unlock(&event_mutex);
return err;
-
-err_filter:
- replace_filter_string(filter, filter_string);
- append_filter_err(ps, system->filter);
- goto out;
}
#ifdef CONFIG_PERF_EVENTS
@@ -1894,7 +1955,6 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
{
int err;
struct event_filter *filter;
- struct filter_parse_state *ps;
struct ftrace_event_call *call;
mutex_lock(&event_mutex);
@@ -1909,33 +1969,10 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
if (event->filter)
goto out_unlock;
- filter = __alloc_filter();
- if (!filter) {
- err = PTR_ERR(filter);
- goto out_unlock;
- }
-
- err = -ENOMEM;
- ps = kzalloc(sizeof(*ps), GFP_KERNEL);
- if (!ps)
- goto free_filter;
-
- parse_init(ps, filter_ops, filter_str);
- err = filter_parse(ps);
- if (err)
- goto free_ps;
-
- err = replace_preds(call, filter, ps, filter_str, false);
+ err = create_filter(call, filter_str, false, &filter);
if (!err)
event->filter = filter;
-
-free_ps:
- filter_opstack_clear(ps);
- postfix_clear(ps);
- kfree(ps);
-
-free_filter:
- if (err)
+ else
__free_filter(filter);
out_unlock:
@@ -1954,43 +1991,6 @@ out_unlock:
#define CREATE_TRACE_POINTS
#include "trace_events_filter_test.h"
-static int test_get_filter(char *filter_str, struct ftrace_event_call *call,
- struct event_filter **pfilter)
-{
- struct event_filter *filter;
- struct filter_parse_state *ps;
- int err = -ENOMEM;
-
- filter = __alloc_filter();
- if (!filter)
- goto out;
-
- ps = kzalloc(sizeof(*ps), GFP_KERNEL);
- if (!ps)
- goto free_filter;
-
- parse_init(ps, filter_ops, filter_str);
- err = filter_parse(ps);
- if (err)
- goto free_ps;
-
- err = replace_preds(call, filter, ps, filter_str, false);
- if (!err)
- *pfilter = filter;
-
- free_ps:
- filter_opstack_clear(ps);
- postfix_clear(ps);
- kfree(ps);
-
- free_filter:
- if (err)
- __free_filter(filter);
-
- out:
- return err;
-}
-
#define DATA_REC(m, va, vb, vc, vd, ve, vf, vg, vh, nvisit) \
{ \
.filter = FILTER, \
@@ -2109,12 +2109,13 @@ static __init int ftrace_test_event_filter(void)
struct test_filter_data_t *d = &test_filter_data[i];
int err;
- err = test_get_filter(d->filter, &event_ftrace_test_filter,
- &filter);
+ err = create_filter(&event_ftrace_test_filter, d->filter,
+ false, &filter);
if (err) {
printk(KERN_INFO
"Failed to get filter for '%s', err %d\n",
d->filter, err);
+ __free_filter(filter);
break;
}
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 77575b386d9..d4545f49242 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -13,6 +13,9 @@
#include <linux/sysctl.h>
#include <linux/init.h>
#include <linux/fs.h>
+
+#include <asm/setup.h>
+
#include "trace.h"
#define STACK_TRACE_ENTRIES 500
@@ -133,7 +136,6 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
static struct ftrace_ops trace_ops __read_mostly =
{
.func = stack_trace_call,
- .flags = FTRACE_OPS_FL_GLOBAL,
};
static ssize_t
@@ -311,6 +313,21 @@ static const struct file_operations stack_trace_fops = {
.release = seq_release,
};
+static int
+stack_trace_filter_open(struct inode *inode, struct file *file)
+{
+ return ftrace_regex_open(&trace_ops, FTRACE_ITER_FILTER,
+ inode, file);
+}
+
+static const struct file_operations stack_trace_filter_fops = {
+ .open = stack_trace_filter_open,
+ .read = seq_read,
+ .write = ftrace_filter_write,
+ .llseek = ftrace_regex_lseek,
+ .release = ftrace_regex_release,
+};
+
int
stack_trace_sysctl(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
@@ -338,8 +355,13 @@ stack_trace_sysctl(struct ctl_table *table, int write,
return ret;
}
+static char stack_trace_filter_buf[COMMAND_LINE_SIZE+1] __initdata;
+
static __init int enable_stacktrace(char *str)
{
+ if (strncmp(str, "_filter=", 8) == 0)
+ strncpy(stack_trace_filter_buf, str+8, COMMAND_LINE_SIZE);
+
stack_tracer_enabled = 1;
last_stack_tracer_enabled = 1;
return 1;
@@ -358,6 +380,12 @@ static __init int stack_trace_init(void)
trace_create_file("stack_trace", 0444, d_tracer,
NULL, &stack_trace_fops);
+ trace_create_file("stack_trace_filter", 0444, d_tracer,
+ NULL, &stack_trace_filter_fops);
+
+ if (stack_trace_filter_buf[0])
+ ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1);
+
if (stack_tracer_enabled)
register_ftrace_function(&trace_ops);