summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c4
-rw-r--r--kernel/groups.c11
-rw-r--r--kernel/irq/internals.h4
-rw-r--r--kernel/irq/irqdesc.c52
-rw-r--r--kernel/irq/proc.c22
-rw-r--r--kernel/module.c170
-rw-r--r--kernel/params.c97
-rw-r--r--kernel/power/Kconfig16
-rw-r--r--kernel/time/tick-sched.c2
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/uid16.c2
-rw-r--r--kernel/user.c1
-rw-r--r--kernel/user_namespace.c124
13 files changed, 332 insertions, 175 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 113b837470c..4c1ee7f2beb 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7477,11 +7477,11 @@ SYSCALL_DEFINE5(perf_event_open,
if (move_group) {
synchronize_rcu();
- perf_install_in_context(ctx, group_leader, event->cpu);
+ perf_install_in_context(ctx, group_leader, group_leader->cpu);
get_ctx(ctx);
list_for_each_entry(sibling, &group_leader->sibling_list,
group_entry) {
- perf_install_in_context(ctx, sibling, event->cpu);
+ perf_install_in_context(ctx, sibling, sibling->cpu);
get_ctx(ctx);
}
}
diff --git a/kernel/groups.c b/kernel/groups.c
index 451698f86cf..664411f171b 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -6,6 +6,7 @@
#include <linux/slab.h>
#include <linux/security.h>
#include <linux/syscalls.h>
+#include <linux/user_namespace.h>
#include <asm/uaccess.h>
/* init to 2 - one for init_task, one to ensure it is never freed */
@@ -213,6 +214,14 @@ out:
return i;
}
+bool may_setgroups(void)
+{
+ struct user_namespace *user_ns = current_user_ns();
+
+ return ns_capable(user_ns, CAP_SETGID) &&
+ userns_may_setgroups(user_ns);
+}
+
/*
* SMP: Our groups are copy-on-write. We can set them safely
* without another task interfering.
@@ -223,7 +232,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
struct group_info *group_info;
int retval;
- if (!ns_capable(current_user_ns(), CAP_SETGID))
+ if (!may_setgroups())
return -EPERM;
if ((unsigned)gidsetsize > NGROUPS_MAX)
return -EINVAL;
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 4332d766619..df553b0af93 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -78,8 +78,12 @@ extern void unmask_threaded_irq(struct irq_desc *desc);
#ifdef CONFIG_SPARSE_IRQ
static inline void irq_mark_irq(unsigned int irq) { }
+extern void irq_lock_sparse(void);
+extern void irq_unlock_sparse(void);
#else
extern void irq_mark_irq(unsigned int irq);
+static inline void irq_lock_sparse(void) { }
+static inline void irq_unlock_sparse(void) { }
#endif
extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index a1782f88f0a..99793b9b6d2 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -132,6 +132,16 @@ static void free_masks(struct irq_desc *desc)
static inline void free_masks(struct irq_desc *desc) { }
#endif
+void irq_lock_sparse(void)
+{
+ mutex_lock(&sparse_irq_lock);
+}
+
+void irq_unlock_sparse(void)
+{
+ mutex_unlock(&sparse_irq_lock);
+}
+
static struct irq_desc *alloc_desc(int irq, int node, struct module *owner)
{
struct irq_desc *desc;
@@ -168,6 +178,12 @@ static void free_desc(unsigned int irq)
unregister_irq_proc(irq, desc);
+ /*
+ * sparse_irq_lock protects also show_interrupts() and
+ * kstat_irq_usr(). Once we deleted the descriptor from the
+ * sparse tree we can free it. Access in proc will fail to
+ * lookup the descriptor.
+ */
mutex_lock(&sparse_irq_lock);
delete_irq_desc(irq);
mutex_unlock(&sparse_irq_lock);
@@ -574,6 +590,15 @@ void kstat_incr_irq_this_cpu(unsigned int irq)
kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
}
+/**
+ * kstat_irqs_cpu - Get the statistics for an interrupt on a cpu
+ * @irq: The interrupt number
+ * @cpu: The cpu number
+ *
+ * Returns the sum of interrupt counts on @cpu since boot for
+ * @irq. The caller must ensure that the interrupt is not removed
+ * concurrently.
+ */
unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
{
struct irq_desc *desc = irq_to_desc(irq);
@@ -582,6 +607,14 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
*per_cpu_ptr(desc->kstat_irqs, cpu) : 0;
}
+/**
+ * kstat_irqs - Get the statistics for an interrupt
+ * @irq: The interrupt number
+ *
+ * Returns the sum of interrupt counts on all cpus since boot for
+ * @irq. The caller must ensure that the interrupt is not removed
+ * concurrently.
+ */
unsigned int kstat_irqs(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
@@ -594,3 +627,22 @@ unsigned int kstat_irqs(unsigned int irq)
sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
return sum;
}
+
+/**
+ * kstat_irqs_usr - Get the statistics for an interrupt
+ * @irq: The interrupt number
+ *
+ * Returns the sum of interrupt counts on all cpus since boot for
+ * @irq. Contrary to kstat_irqs() this can be called from any
+ * preemptible context. It's protected against concurrent removal of
+ * an interrupt descriptor when sparse irqs are enabled.
+ */
+unsigned int kstat_irqs_usr(unsigned int irq)
+{
+ int sum;
+
+ irq_lock_sparse();
+ sum = kstat_irqs(irq);
+ irq_unlock_sparse();
+ return sum;
+}
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index ac1ba2f1103..9dc9bfd8a67 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -15,6 +15,23 @@
#include "internals.h"
+/*
+ * Access rules:
+ *
+ * procfs protects read/write of /proc/irq/N/ files against a
+ * concurrent free of the interrupt descriptor. remove_proc_entry()
+ * immediately prevents new read/writes to happen and waits for
+ * already running read/write functions to complete.
+ *
+ * We remove the proc entries first and then delete the interrupt
+ * descriptor from the radix tree and free it. So it is guaranteed
+ * that irq_to_desc(N) is valid as long as the read/writes are
+ * permitted by procfs.
+ *
+ * The read from /proc/interrupts is a different problem because there
+ * is no protection. So the lookup and the access to irqdesc
+ * information must be protected by sparse_irq_lock.
+ */
static struct proc_dir_entry *root_irq_dir;
#ifdef CONFIG_SMP
@@ -437,9 +454,10 @@ int show_interrupts(struct seq_file *p, void *v)
seq_putc(p, '\n');
}
+ irq_lock_sparse();
desc = irq_to_desc(i);
if (!desc)
- return 0;
+ goto outsparse;
raw_spin_lock_irqsave(&desc->lock, flags);
for_each_online_cpu(j)
@@ -479,6 +497,8 @@ int show_interrupts(struct seq_file *p, void *v)
seq_putc(p, '\n');
out:
raw_spin_unlock_irqrestore(&desc->lock, flags);
+outsparse:
+ irq_unlock_sparse();
return 0;
}
#endif
diff --git a/kernel/module.c b/kernel/module.c
index e52a8739361..3965511ae13 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -42,7 +42,6 @@
#include <linux/vermagic.h>
#include <linux/notifier.h>
#include <linux/sched.h>
-#include <linux/stop_machine.h>
#include <linux/device.h>
#include <linux/string.h>
#include <linux/mutex.h>
@@ -98,7 +97,7 @@
* 1) List of modules (also safely readable with preempt_disable),
* 2) module_use links,
* 3) module_addr_min/module_addr_max.
- * (delete uses stop_machine/add uses RCU list operations). */
+ * (delete and add uses RCU list operations). */
DEFINE_MUTEX(module_mutex);
EXPORT_SYMBOL_GPL(module_mutex);
static LIST_HEAD(modules);
@@ -158,13 +157,13 @@ static BLOCKING_NOTIFIER_HEAD(module_notify_list);
* Protected by module_mutex. */
static unsigned long module_addr_min = -1UL, module_addr_max = 0;
-int register_module_notifier(struct notifier_block * nb)
+int register_module_notifier(struct notifier_block *nb)
{
return blocking_notifier_chain_register(&module_notify_list, nb);
}
EXPORT_SYMBOL(register_module_notifier);
-int unregister_module_notifier(struct notifier_block * nb)
+int unregister_module_notifier(struct notifier_block *nb)
{
return blocking_notifier_chain_unregister(&module_notify_list, nb);
}
@@ -628,18 +627,23 @@ static char last_unloaded_module[MODULE_NAME_LEN+1];
EXPORT_TRACEPOINT_SYMBOL(module_get);
+/* MODULE_REF_BASE is the base reference count by kmodule loader. */
+#define MODULE_REF_BASE 1
+
/* Init the unload section of the module. */
static int module_unload_init(struct module *mod)
{
- mod->refptr = alloc_percpu(struct module_ref);
- if (!mod->refptr)
- return -ENOMEM;
+ /*
+ * Initialize reference counter to MODULE_REF_BASE.
+ * refcnt == 0 means module is going.
+ */
+ atomic_set(&mod->refcnt, MODULE_REF_BASE);
INIT_LIST_HEAD(&mod->source_list);
INIT_LIST_HEAD(&mod->target_list);
/* Hold reference count during initialization. */
- raw_cpu_write(mod->refptr->incs, 1);
+ atomic_inc(&mod->refcnt);
return 0;
}
@@ -721,8 +725,6 @@ static void module_unload_free(struct module *mod)
kfree(use);
}
mutex_unlock(&module_mutex);
-
- free_percpu(mod->refptr);
}
#ifdef CONFIG_MODULE_FORCE_UNLOAD
@@ -740,60 +742,39 @@ static inline int try_force_unload(unsigned int flags)
}
#endif /* CONFIG_MODULE_FORCE_UNLOAD */
-struct stopref
+/* Try to release refcount of module, 0 means success. */
+static int try_release_module_ref(struct module *mod)
{
- struct module *mod;
- int flags;
- int *forced;
-};
+ int ret;
-/* Whole machine is stopped with interrupts off when this runs. */
-static int __try_stop_module(void *_sref)
-{
- struct stopref *sref = _sref;
+ /* Try to decrement refcnt which we set at loading */
+ ret = atomic_sub_return(MODULE_REF_BASE, &mod->refcnt);
+ BUG_ON(ret < 0);
+ if (ret)
+ /* Someone can put this right now, recover with checking */
+ ret = atomic_add_unless(&mod->refcnt, MODULE_REF_BASE, 0);
+
+ return ret;
+}
+static int try_stop_module(struct module *mod, int flags, int *forced)
+{
/* If it's not unused, quit unless we're forcing. */
- if (module_refcount(sref->mod) != 0) {
- if (!(*sref->forced = try_force_unload(sref->flags)))
+ if (try_release_module_ref(mod) != 0) {
+ *forced = try_force_unload(flags);
+ if (!(*forced))
return -EWOULDBLOCK;
}
/* Mark it as dying. */
- sref->mod->state = MODULE_STATE_GOING;
- return 0;
-}
-
-static int try_stop_module(struct module *mod, int flags, int *forced)
-{
- struct stopref sref = { mod, flags, forced };
+ mod->state = MODULE_STATE_GOING;
- return stop_machine(__try_stop_module, &sref, NULL);
+ return 0;
}
unsigned long module_refcount(struct module *mod)
{
- unsigned long incs = 0, decs = 0;
- int cpu;
-
- for_each_possible_cpu(cpu)
- decs += per_cpu_ptr(mod->refptr, cpu)->decs;
- /*
- * ensure the incs are added up after the decs.
- * module_put ensures incs are visible before decs with smp_wmb.
- *
- * This 2-count scheme avoids the situation where the refcount
- * for CPU0 is read, then CPU0 increments the module refcount,
- * then CPU1 drops that refcount, then the refcount for CPU1 is
- * read. We would record a decrement but not its corresponding
- * increment so we would see a low count (disaster).
- *
- * Rare situation? But module_refcount can be preempted, and we
- * might be tallying up 4096+ CPUs. So it is not impossible.
- */
- smp_rmb();
- for_each_possible_cpu(cpu)
- incs += per_cpu_ptr(mod->refptr, cpu)->incs;
- return incs - decs;
+ return (unsigned long)atomic_read(&mod->refcnt) - MODULE_REF_BASE;
}
EXPORT_SYMBOL(module_refcount);
@@ -877,8 +858,10 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod)
seq_printf(m, " %lu ", module_refcount(mod));
- /* Always include a trailing , so userspace can differentiate
- between this and the old multi-field proc format. */
+ /*
+ * Always include a trailing , so userspace can differentiate
+ * between this and the old multi-field proc format.
+ */
list_for_each_entry(use, &mod->source_list, source_list) {
printed_something = 1;
seq_printf(m, "%s,", use->source->name);
@@ -886,11 +869,11 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod)
if (mod->init != NULL && mod->exit == NULL) {
printed_something = 1;
- seq_printf(m, "[permanent],");
+ seq_puts(m, "[permanent],");
}
if (!printed_something)
- seq_printf(m, "-");
+ seq_puts(m, "-");
}
void __symbol_put(const char *symbol)
@@ -935,7 +918,7 @@ void __module_get(struct module *module)
{
if (module) {
preempt_disable();
- __this_cpu_inc(module->refptr->incs);
+ atomic_inc(&module->refcnt);
trace_module_get(module, _RET_IP_);
preempt_enable();
}
@@ -948,11 +931,11 @@ bool try_module_get(struct module *module)
if (module) {
preempt_disable();
-
- if (likely(module_is_live(module))) {
- __this_cpu_inc(module->refptr->incs);
+ /* Note: here, we can fail to get a reference */
+ if (likely(module_is_live(module) &&
+ atomic_inc_not_zero(&module->refcnt) != 0))
trace_module_get(module, _RET_IP_);
- } else
+ else
ret = false;
preempt_enable();
@@ -963,11 +946,12 @@ EXPORT_SYMBOL(try_module_get);
void module_put(struct module *module)
{
+ int ret;
+
if (module) {
preempt_disable();
- smp_wmb(); /* see comment in module_refcount */
- __this_cpu_inc(module->refptr->decs);
-
+ ret = atomic_dec_if_positive(&module->refcnt);
+ WARN_ON(ret < 0); /* Failed to put refcount */
trace_module_put(module, _RET_IP_);
preempt_enable();
}
@@ -978,7 +962,7 @@ EXPORT_SYMBOL(module_put);
static inline void print_unload_info(struct seq_file *m, struct module *mod)
{
/* We don't know the usage count, or what modules are using. */
- seq_printf(m, " - -");
+ seq_puts(m, " - -");
}
static inline void module_unload_free(struct module *mod)
@@ -1131,7 +1115,7 @@ static unsigned long maybe_relocated(unsigned long crc,
static int check_version(Elf_Shdr *sechdrs,
unsigned int versindex,
const char *symname,
- struct module *mod,
+ struct module *mod,
const unsigned long *crc,
const struct module *crc_owner)
{
@@ -1165,7 +1149,7 @@ static int check_version(Elf_Shdr *sechdrs,
return 0;
bad_version:
- printk("%s: disagrees about version of symbol %s\n",
+ pr_warn("%s: disagrees about version of symbol %s\n",
mod->name, symname);
return 0;
}
@@ -1200,7 +1184,7 @@ static inline int same_magic(const char *amagic, const char *bmagic,
static inline int check_version(Elf_Shdr *sechdrs,
unsigned int versindex,
const char *symname,
- struct module *mod,
+ struct module *mod,
const unsigned long *crc,
const struct module *crc_owner)
{
@@ -1288,15 +1272,13 @@ static inline bool sect_empty(const Elf_Shdr *sect)
return !(sect->sh_flags & SHF_ALLOC) || sect->sh_size == 0;
}
-struct module_sect_attr
-{
+struct module_sect_attr {
struct module_attribute mattr;
char *name;
unsigned long address;
};
-struct module_sect_attrs
-{
+struct module_sect_attrs {
struct attribute_group grp;
unsigned int nsections;
struct module_sect_attr attrs[0];
@@ -1550,7 +1532,8 @@ static int module_add_modinfo_attrs(struct module *mod)
(attr->test && attr->test(mod))) {
memcpy(temp_attr, attr, sizeof(*temp_attr));
sysfs_attr_init(&temp_attr->attr);
- error = sysfs_create_file(&mod->mkobj.kobj,&temp_attr->attr);
+ error = sysfs_create_file(&mod->mkobj.kobj,
+ &temp_attr->attr);
++temp_attr;
}
}
@@ -1566,7 +1549,7 @@ static void module_remove_modinfo_attrs(struct module *mod)
/* pick a field to test for end of list */
if (!attr->attr.name)
break;
- sysfs_remove_file(&mod->mkobj.kobj,&attr->attr);
+ sysfs_remove_file(&mod->mkobj.kobj, &attr->attr);
if (attr->free)
attr->free(mod);
}
@@ -1697,18 +1680,6 @@ static void mod_sysfs_teardown(struct module *mod)
mod_sysfs_fini(mod);
}
-/*
- * unlink the module with the whole machine is stopped with interrupts off
- * - this defends against kallsyms not taking locks
- */
-static int __unlink_module(void *_mod)
-{
- struct module *mod = _mod;
- list_del(&mod->list);
- module_bug_cleanup(mod);
- return 0;
-}
-
#ifdef CONFIG_DEBUG_SET_MODULE_RONX
/*
* LKM RO/NX protection: protect module's text/ro-data
@@ -1860,7 +1831,12 @@ static void free_module(struct module *mod)
/* Now we can delete it from the lists */
mutex_lock(&module_mutex);
- stop_machine(__unlink_module, mod, NULL);
+ /* Unlink carefully: kallsyms could be walking list. */
+ list_del_rcu(&mod->list);
+ /* Remove this module from bug list, this uses list_del_rcu */
+ module_bug_cleanup(mod);
+ /* Wait for RCU synchronizing before releasing mod->list and buglist. */
+ synchronize_rcu();
mutex_unlock(&module_mutex);
/* This may be NULL, but that's OK */
@@ -1955,7 +1931,7 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
/* We compiled with -fno-common. These are not
supposed to happen. */
pr_debug("Common symbol: %s\n", name);
- printk("%s: please compile with -fno-common\n",
+ pr_warn("%s: please compile with -fno-common\n",
mod->name);
ret = -ENOEXEC;
break;
@@ -2259,7 +2235,7 @@ static char elf_type(const Elf_Sym *sym, const struct load_info *info)
}
static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs,
- unsigned int shnum)
+ unsigned int shnum)
{
const Elf_Shdr *sec;
@@ -2735,7 +2711,7 @@ static int find_module_sections(struct module *mod, struct load_info *info)
* This shouldn't happen with same compiler and binutils
* building all parts of the module.
*/
- printk(KERN_WARNING "%s: has both .ctors and .init_array.\n",
+ pr_warn("%s: has both .ctors and .init_array.\n",
mod->name);
return -EINVAL;
}
@@ -3023,8 +2999,10 @@ static int do_init_module(struct module *mod)
if (mod->init != NULL)
ret = do_one_initcall(mod->init);
if (ret < 0) {
- /* Init routine failed: abort. Try to protect us from
- buggy refcounters. */
+ /*
+ * Init routine failed: abort. Try to protect us from
+ * buggy refcounters.
+ */
mod->state = MODULE_STATE_GOING;
synchronize_sched();
module_put(mod);
@@ -3202,7 +3180,7 @@ out:
static int unknown_module_param_cb(char *param, char *val, const char *modname)
{
- /* Check for magic 'dyndbg' arg */
+ /* Check for magic 'dyndbg' arg */
int ret = ddebug_dyndbg_module_param_cb(param, val, modname);
if (ret != 0)
pr_warn("%s: unknown parameter '%s' ignored\n", modname, param);
@@ -3352,6 +3330,8 @@ static int load_module(struct load_info *info, const char __user *uargs,
/* Unlink carefully: kallsyms could be walking list. */
list_del_rcu(&mod->list);
wake_up_all(&module_wq);
+ /* Wait for RCU synchronizing before releasing mod->list. */
+ synchronize_rcu();
mutex_unlock(&module_mutex);
free_module:
module_deallocate(mod, info);
@@ -3685,8 +3665,8 @@ static int m_show(struct seq_file *m, void *p)
/* Informative for users. */
seq_printf(m, " %s",
- mod->state == MODULE_STATE_GOING ? "Unloading":
- mod->state == MODULE_STATE_COMING ? "Loading":
+ mod->state == MODULE_STATE_GOING ? "Unloading" :
+ mod->state == MODULE_STATE_COMING ? "Loading" :
"Live");
/* Used by oprofile and other similar tools. */
seq_printf(m, " 0x%pK", mod->module_core);
@@ -3695,7 +3675,7 @@ static int m_show(struct seq_file *m, void *p)
if (mod->taints)
seq_printf(m, " %s", module_flags(mod, buf));
- seq_printf(m, "\n");
+ seq_puts(m, "\n");
return 0;
}
diff --git a/kernel/params.c b/kernel/params.c
index db97b791390..0af9b2c4e56 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -603,74 +603,67 @@ static __modinit int add_sysfs_param(struct module_kobject *mk,
const struct kernel_param *kp,
const char *name)
{
- struct module_param_attrs *new;
- struct attribute **attrs;
- int err, num;
+ struct module_param_attrs *new_mp;
+ struct attribute **new_attrs;
+ unsigned int i;
/* We don't bother calling this with invisible parameters. */
BUG_ON(!kp->perm);
if (!mk->mp) {
- num = 0;
- attrs = NULL;
- } else {
- num = mk->mp->num;
- attrs = mk->mp->grp.attrs;
+ /* First allocation. */
+ mk->mp = kzalloc(sizeof(*mk->mp), GFP_KERNEL);
+ if (!mk->mp)
+ return -ENOMEM;
+ mk->mp->grp.name = "parameters";
+ /* NULL-terminated attribute array. */
+ mk->mp->grp.attrs = kzalloc(sizeof(mk->mp->grp.attrs[0]),
+ GFP_KERNEL);
+ /* Caller will cleanup via free_module_param_attrs */
+ if (!mk->mp->grp.attrs)
+ return -ENOMEM;
}
- /* Enlarge. */
- new = krealloc(mk->mp,
- sizeof(*mk->mp) + sizeof(mk->mp->attrs[0]) * (num+1),
- GFP_KERNEL);
- if (!new) {
- kfree(attrs);
- err = -ENOMEM;
- goto fail;
- }
- /* Despite looking like the typical realloc() bug, this is safe.
- * We *want* the old 'attrs' to be freed either way, and we'll store
- * the new one in the success case. */
- attrs = krealloc(attrs, sizeof(new->grp.attrs[0])*(num+2), GFP_KERNEL);
- if (!attrs) {
- err = -ENOMEM;
- goto fail_free_new;
- }
+ /* Enlarge allocations. */
+ new_mp = krealloc(mk->mp,
+ sizeof(*mk->mp) +
+ sizeof(mk->mp->attrs[0]) * (mk->mp->num + 1),
+ GFP_KERNEL);
+ if (!new_mp)
+ return -ENOMEM;
+ mk->mp = new_mp;
- /* Sysfs wants everything zeroed. */
- memset(new, 0, sizeof(*new));
- memset(&new->attrs[num], 0, sizeof(new->attrs[num]));
- memset(&attrs[num], 0, sizeof(attrs[num]));
- new->grp.name = "parameters";
- new->grp.attrs = attrs;
+ /* Extra pointer for NULL terminator */
+ new_attrs = krealloc(mk->mp->grp.attrs,
+ sizeof(mk->mp->grp.attrs[0]) * (mk->mp->num + 2),
+ GFP_KERNEL);
+ if (!new_attrs)
+ return -ENOMEM;
+ mk->mp->grp.attrs = new_attrs;
/* Tack new one on the end. */
- sysfs_attr_init(&new->attrs[num].mattr.attr);
- new->attrs[num].param = kp;
- new->attrs[num].mattr.show = param_attr_show;
- new->attrs[num].mattr.store = param_attr_store;
- new->attrs[num].mattr.attr.name = (char *)name;
- new->attrs[num].mattr.attr.mode = kp->perm;
- new->num = num+1;
+ sysfs_attr_init(&mk->mp->attrs[mk->mp->num].mattr.attr);
+ mk->mp->attrs[mk->mp->num].param = kp;
+ mk->mp->attrs[mk->mp->num].mattr.show = param_attr_show;
+ /* Do not allow runtime DAC changes to make param writable. */
+ if ((kp->perm & (S_IWUSR | S_IWGRP | S_IWOTH)) != 0)
+ mk->mp->attrs[mk->mp->num].mattr.store = param_attr_store;
+ mk->mp->attrs[mk->mp->num].mattr.attr.name = (char *)name;
+ mk->mp->attrs[mk->mp->num].mattr.attr.mode = kp->perm;
+ mk->mp->num++;
/* Fix up all the pointers, since krealloc can move us */
- for (num = 0; num < new->num; num++)
- new->grp.attrs[num] = &new->attrs[num].mattr.attr;
- new->grp.attrs[num] = NULL;
-
- mk->mp = new;
+ for (i = 0; i < mk->mp->num; i++)
+ mk->mp->grp.attrs[i] = &mk->mp->attrs[i].mattr.attr;
+ mk->mp->grp.attrs[mk->mp->num] = NULL;
return 0;
-
-fail_free_new:
- kfree(new);
-fail:
- mk->mp = NULL;
- return err;
}
#ifdef CONFIG_MODULES
static void free_module_param_attrs(struct module_kobject *mk)
{
- kfree(mk->mp->grp.attrs);
+ if (mk->mp)
+ kfree(mk->mp->grp.attrs);
kfree(mk->mp);
mk->mp = NULL;
}
@@ -695,8 +688,10 @@ int module_param_sysfs_setup(struct module *mod,
if (kparam[i].perm == 0)
continue;
err = add_sysfs_param(&mod->mkobj, &kparam[i], kparam[i].name);
- if (err)
+ if (err) {
+ free_module_param_attrs(&mod->mkobj);
return err;
+ }
params = true;
}
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 6e7708c2c21..48b28d387c7 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -94,7 +94,7 @@ config PM_STD_PARTITION
config PM_SLEEP
def_bool y
depends on SUSPEND || HIBERNATE_CALLBACKS
- select PM_RUNTIME
+ select PM
config PM_SLEEP_SMP
def_bool y
@@ -130,23 +130,19 @@ config PM_WAKELOCKS_GC
depends on PM_WAKELOCKS
default y
-config PM_RUNTIME
- bool "Run-time PM core functionality"
+config PM
+ bool "Device power management core functionality"
---help---
Enable functionality allowing I/O devices to be put into energy-saving
- (low power) states at run time (or autosuspended) after a specified
- period of inactivity and woken up in response to a hardware-generated
+ (low power) states, for example after a specified period of inactivity
+ (autosuspended), and woken up in response to a hardware-generated
wake-up event or a driver's request.
Hardware support is generally required for this functionality to work
and the bus type drivers of the buses the devices are on are
- responsible for the actual handling of the autosuspend requests and
+ responsible for the actual handling of device suspend requests and
wake-up events.
-config PM
- def_bool y
- depends on PM_SLEEP || PM_RUNTIME
-
config PM_DEBUG
bool "Power Management Debug Support"
depends on PM
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 4d54b754058..1363d58f07e 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -847,7 +847,6 @@ void tick_nohz_idle_enter(void)
local_irq_enable();
}
-EXPORT_SYMBOL_GPL(tick_nohz_idle_enter);
/**
* tick_nohz_irq_exit - update next tick event from interrupt exit
@@ -974,7 +973,6 @@ void tick_nohz_idle_exit(void)
local_irq_enable();
}
-EXPORT_SYMBOL_GPL(tick_nohz_idle_exit);
static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now)
{
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 67d6369ddf8..979ccde2672 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -55,7 +55,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
obj-$(CONFIG_TRACEPOINTS) += power-traces.o
-ifeq ($(CONFIG_PM_RUNTIME),y)
+ifeq ($(CONFIG_PM),y)
obj-$(CONFIG_TRACEPOINTS) += rpm-traces.o
endif
ifeq ($(CONFIG_TRACING),y)
diff --git a/kernel/uid16.c b/kernel/uid16.c
index 602e5bbbcef..d58cc4d8f0d 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -176,7 +176,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
struct group_info *group_info;
int retval;
- if (!ns_capable(current_user_ns(), CAP_SETGID))
+ if (!may_setgroups())
return -EPERM;
if ((unsigned)gidsetsize > NGROUPS_MAX)
return -EINVAL;
diff --git a/kernel/user.c b/kernel/user.c
index 69b800aebf1..b069ccbfb0b 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -54,6 +54,7 @@ struct user_namespace init_user_ns = {
#ifdef CONFIG_USER_NS
.ns.ops = &userns_operations,
#endif
+ .flags = USERNS_INIT_FLAGS,
#ifdef CONFIG_PERSISTENT_KEYRINGS
.persistent_keyring_register_sem =
__RWSEM_INITIALIZER(init_user_ns.persistent_keyring_register_sem),
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 1491ad00388..4109f832068 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -24,6 +24,7 @@
#include <linux/fs_struct.h>
static struct kmem_cache *user_ns_cachep __read_mostly;
+static DEFINE_MUTEX(userns_state_mutex);
static bool new_idmap_permitted(const struct file *file,
struct user_namespace *ns, int cap_setid,
@@ -100,6 +101,11 @@ int create_user_ns(struct cred *new)
ns->owner = owner;
ns->group = group;
+ /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
+ mutex_lock(&userns_state_mutex);
+ ns->flags = parent_ns->flags;
+ mutex_unlock(&userns_state_mutex);
+
set_cred_user_ns(new, ns);
#ifdef CONFIG_PERSISTENT_KEYRINGS
@@ -584,9 +590,6 @@ static bool mappings_overlap(struct uid_gid_map *new_map,
return false;
}
-
-static DEFINE_MUTEX(id_map_mutex);
-
static ssize_t map_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos,
int cap_setid,
@@ -603,7 +606,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
ssize_t ret = -EINVAL;
/*
- * The id_map_mutex serializes all writes to any given map.
+ * The userns_state_mutex serializes all writes to any given map.
*
* Any map is only ever written once.
*
@@ -621,7 +624,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
* order and smp_rmb() is guaranteed that we don't have crazy
* architectures returning stale data.
*/
- mutex_lock(&id_map_mutex);
+ mutex_lock(&userns_state_mutex);
ret = -EPERM;
/* Only allow one successful write to the map */
@@ -641,7 +644,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
if (!page)
goto out;
- /* Only allow <= page size writes at the beginning of the file */
+ /* Only allow < page size writes at the beginning of the file */
ret = -EINVAL;
if ((*ppos != 0) || (count >= PAGE_SIZE))
goto out;
@@ -751,7 +754,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
*ppos = count;
ret = count;
out:
- mutex_unlock(&id_map_mutex);
+ mutex_unlock(&userns_state_mutex);
if (page)
free_page(page);
return ret;
@@ -813,16 +816,21 @@ static bool new_idmap_permitted(const struct file *file,
struct user_namespace *ns, int cap_setid,
struct uid_gid_map *new_map)
{
- /* Allow mapping to your own filesystem ids */
- if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) {
+ const struct cred *cred = file->f_cred;
+ /* Don't allow mappings that would allow anything that wouldn't
+ * be allowed without the establishment of unprivileged mappings.
+ */
+ if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
+ uid_eq(ns->owner, cred->euid)) {
u32 id = new_map->extent[0].lower_first;
if (cap_setid == CAP_SETUID) {
kuid_t uid = make_kuid(ns->parent, id);
- if (uid_eq(uid, file->f_cred->fsuid))
+ if (uid_eq(uid, cred->euid))
return true;
} else if (cap_setid == CAP_SETGID) {
kgid_t gid = make_kgid(ns->parent, id);
- if (gid_eq(gid, file->f_cred->fsgid))
+ if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) &&
+ gid_eq(gid, cred->egid))
return true;
}
}
@@ -842,6 +850,100 @@ static bool new_idmap_permitted(const struct file *file,
return false;
}
+int proc_setgroups_show(struct seq_file *seq, void *v)
+{
+ struct user_namespace *ns = seq->private;
+ unsigned long userns_flags = ACCESS_ONCE(ns->flags);
+
+ seq_printf(seq, "%s\n",
+ (userns_flags & USERNS_SETGROUPS_ALLOWED) ?
+ "allow" : "deny");
+ return 0;
+}
+
+ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct seq_file *seq = file->private_data;
+ struct user_namespace *ns = seq->private;
+ char kbuf[8], *pos;
+ bool setgroups_allowed;
+ ssize_t ret;
+
+ /* Only allow a very narrow range of strings to be written */
+ ret = -EINVAL;
+ if ((*ppos != 0) || (count >= sizeof(kbuf)))
+ goto out;
+
+ /* What was written? */
+ ret = -EFAULT;
+ if (copy_from_user(kbuf, buf, count))
+ goto out;
+ kbuf[count] = '\0';
+ pos = kbuf;
+
+ /* What is being requested? */
+ ret = -EINVAL;
+ if (strncmp(pos, "allow", 5) == 0) {
+ pos += 5;
+ setgroups_allowed = true;
+ }
+ else if (strncmp(pos, "deny", 4) == 0) {
+ pos += 4;
+ setgroups_allowed = false;
+ }
+ else
+ goto out;
+
+ /* Verify there is not trailing junk on the line */
+ pos = skip_spaces(pos);
+ if (*pos != '\0')
+ goto out;
+
+ ret = -EPERM;
+ mutex_lock(&userns_state_mutex);
+ if (setgroups_allowed) {
+ /* Enabling setgroups after setgroups has been disabled
+ * is not allowed.
+ */
+ if (!(ns->flags & USERNS_SETGROUPS_ALLOWED))
+ goto out_unlock;
+ } else {
+ /* Permanently disabling setgroups after setgroups has
+ * been enabled by writing the gid_map is not allowed.
+ */
+ if (ns->gid_map.nr_extents != 0)
+ goto out_unlock;
+ ns->flags &= ~USERNS_SETGROUPS_ALLOWED;
+ }
+ mutex_unlock(&userns_state_mutex);
+
+ /* Report a successful write */
+ *ppos = count;
+ ret = count;
+out:
+ return ret;
+out_unlock:
+ mutex_unlock(&userns_state_mutex);
+ goto out;
+}
+
+bool userns_may_setgroups(const struct user_namespace *ns)
+{
+ bool allowed;
+
+ mutex_lock(&userns_state_mutex);
+ /* It is not safe to use setgroups until a gid mapping in
+ * the user namespace has been established.
+ */
+ allowed = ns->gid_map.nr_extents != 0;
+ /* Is setgroups allowed? */
+ allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED);
+ mutex_unlock(&userns_state_mutex);
+
+ return allowed;
+}
+
static inline struct user_namespace *to_user_ns(struct ns_common *ns)
{
return container_of(ns, struct user_namespace, ns);