diff options
Diffstat (limited to 'kernel')
38 files changed, 362 insertions, 419 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 35ef1185e35..1ce47553fb0 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -26,6 +26,7 @@ obj-y += sched/ obj-y += power/ obj-y += printk/ obj-y += cpu/ +obj-y += irq/ obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o obj-$(CONFIG_FREEZER) += freezer.o @@ -79,7 +80,6 @@ obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_KGDB) += debug/ obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o -obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_TREE_RCU) += rcutree.o diff --git a/kernel/capability.c b/kernel/capability.c index 6fc1c8af44d..4e66bf9275b 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -452,3 +452,4 @@ bool inode_capable(const struct inode *inode, int cap) return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid); } +EXPORT_SYMBOL(inode_capable); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e0aeb32415f..2418b6e71a8 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -60,6 +60,7 @@ #include <linux/poll.h> #include <linux/flex_array.h> /* used in cgroup_attach_task */ #include <linux/kthread.h> +#include <linux/file.h> #include <linux/atomic.h> @@ -4034,8 +4035,8 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css, struct cgroup_event *event; struct cgroup_subsys_state *cfile_css; unsigned int efd, cfd; - struct file *efile; - struct file *cfile; + struct fd efile; + struct fd cfile; char *endp; int ret; @@ -4058,31 +4059,31 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css, init_waitqueue_func_entry(&event->wait, cgroup_event_wake); INIT_WORK(&event->remove, cgroup_event_remove); - efile = eventfd_fget(efd); - if (IS_ERR(efile)) { - ret = PTR_ERR(efile); + efile = fdget(efd); + if (!efile.file) { + ret = -EBADF; goto out_kfree; } - event->eventfd = eventfd_ctx_fileget(efile); + event->eventfd = eventfd_ctx_fileget(efile.file); if (IS_ERR(event->eventfd)) { ret = PTR_ERR(event->eventfd); goto out_put_efile; } - cfile = fget(cfd); - if (!cfile) { + cfile = fdget(cfd); + if (!cfile.file) { ret = -EBADF; goto out_put_eventfd; } /* the process need read permission on control file */ /* AV: shouldn't we check that it's been opened for read instead? */ - ret = inode_permission(file_inode(cfile), MAY_READ); + ret = inode_permission(file_inode(cfile.file), MAY_READ); if (ret < 0) goto out_put_cfile; - event->cft = __file_cft(cfile); + event->cft = __file_cft(cfile.file); if (IS_ERR(event->cft)) { ret = PTR_ERR(event->cft); goto out_put_cfile; @@ -4103,7 +4104,7 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css, ret = -EINVAL; event->css = cgroup_css(cgrp, event->cft->ss); - cfile_css = css_from_dir(cfile->f_dentry->d_parent, event->cft->ss); + cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, event->cft->ss); if (event->css && event->css == cfile_css && css_tryget(event->css)) ret = 0; @@ -4121,25 +4122,25 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css, if (ret) goto out_put_css; - efile->f_op->poll(efile, &event->pt); + efile.file->f_op->poll(efile.file, &event->pt); spin_lock(&cgrp->event_list_lock); list_add(&event->list, &cgrp->event_list); spin_unlock(&cgrp->event_list_lock); - fput(cfile); - fput(efile); + fdput(cfile); + fdput(efile); return 0; out_put_css: css_put(event->css); out_put_cfile: - fput(cfile); + fdput(cfile); out_put_eventfd: eventfd_ctx_put(event->eventfd); out_put_efile: - fput(efile); + fdput(efile); out_kfree: kfree(event); diff --git a/kernel/events/core.c b/kernel/events/core.c index 2207efc941d..dd236b66ca3 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5039,6 +5039,7 @@ static void perf_event_mmap_output(struct perf_event *event, mmap_event->event_id.header.size += sizeof(mmap_event->maj); mmap_event->event_id.header.size += sizeof(mmap_event->min); mmap_event->event_id.header.size += sizeof(mmap_event->ino); + mmap_event->event_id.header.size += sizeof(mmap_event->ino_generation); } perf_event_header__init_id(&mmap_event->event_id.header, &sample, event); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index f3569747d62..ad8e1bdca70 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1682,12 +1682,10 @@ static bool handle_trampoline(struct pt_regs *regs) tmp = ri; ri = ri->next; kfree(tmp); + utask->depth--; if (!chained) break; - - utask->depth--; - BUG_ON(!ri); } diff --git a/kernel/extable.c b/kernel/extable.c index 67460b93b1a..832cb28105b 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -41,7 +41,7 @@ u32 __initdata main_extable_sort_needed = 1; /* Sort the kernel's built-in exception table */ void __init sort_main_extable(void) { - if (main_extable_sort_needed) { + if (main_extable_sort_needed && __stop___ex_table > __start___ex_table) { pr_notice("Sorting __ex_table...\n"); sort_extable(__start___ex_table, __stop___ex_table); } diff --git a/kernel/fork.c b/kernel/fork.c index c9eaf201300..086fe73ad6b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -351,7 +351,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) struct rb_node **rb_link, *rb_parent; int retval; unsigned long charge; - struct mempolicy *pol; uprobe_start_dup_mmap(); down_write(&oldmm->mmap_sem); @@ -400,11 +399,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) goto fail_nomem; *tmp = *mpnt; INIT_LIST_HEAD(&tmp->anon_vma_chain); - pol = mpol_dup(vma_policy(mpnt)); - retval = PTR_ERR(pol); - if (IS_ERR(pol)) + retval = vma_dup_policy(mpnt, tmp); + if (retval) goto fail_nomem_policy; - vma_set_policy(tmp, pol); tmp->vm_mm = mm; if (anon_vma_fork(tmp, mpnt)) goto fail_nomem_anon_vma_fork; @@ -472,7 +469,7 @@ out: uprobe_end_dup_mmap(); return retval; fail_nomem_anon_vma_fork: - mpol_put(pol); + mpol_put(vma_policy(tmp)); fail_nomem_policy: kmem_cache_free(vm_area_cachep, tmp); fail_nomem: @@ -522,7 +519,7 @@ static void mm_init_aio(struct mm_struct *mm) { #ifdef CONFIG_AIO spin_lock_init(&mm->ioctx_lock); - INIT_HLIST_HEAD(&mm->ioctx_list); + mm->ioctx_table = NULL; #endif } @@ -1173,13 +1170,16 @@ static struct task_struct *copy_process(unsigned long clone_flags, return ERR_PTR(-EINVAL); /* - * If the new process will be in a different pid namespace - * don't allow the creation of threads. + * If the new process will be in a different pid or user namespace + * do not allow it to share a thread group or signal handlers or + * parent with the forking task. */ - if ((clone_flags & (CLONE_VM|CLONE_NEWPID)) && - (task_active_pid_ns(current) != - current->nsproxy->pid_ns_for_children)) - return ERR_PTR(-EINVAL); + if (clone_flags & (CLONE_SIGHAND | CLONE_PARENT)) { + if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) || + (task_active_pid_ns(current) != + current->nsproxy->pid_ns_for_children)) + return ERR_PTR(-EINVAL); + } retval = security_task_create(clone_flags); if (retval) @@ -1576,15 +1576,6 @@ long do_fork(unsigned long clone_flags, long nr; /* - * Do some preliminary argument and permissions checking before we - * actually start allocating stuff - */ - if (clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) { - if (clone_flags & (CLONE_THREAD|CLONE_PARENT)) - return -EINVAL; - } - - /* * Determine whether and which event to report to ptracer. When * called from kernel_thread or CLONE_UNTRACED is explicitly * requested, no event is reported; otherwise, report if the event diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c index 9bd0934f6c3..7a7d2ee96d4 100644 --- a/kernel/gcov/fs.c +++ b/kernel/gcov/fs.c @@ -74,7 +74,7 @@ static int __init gcov_persist_setup(char *str) { unsigned long val; - if (strict_strtoul(str, 0, &val)) { + if (kstrtoul(str, 0, &val)) { pr_warning("invalid gcov_persist parameter '%s'\n", str); return 0; } diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index d1a758bc972..4a1fef09f65 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -1,15 +1,4 @@ -# Select this to activate the generic irq options below -config HAVE_GENERIC_HARDIRQS - bool - -if HAVE_GENERIC_HARDIRQS menu "IRQ subsystem" -# -# Interrupt subsystem related configuration options -# -config GENERIC_HARDIRQS - def_bool y - # Options selectable by the architecture code # Make sparse irq Kconfig switch below available @@ -84,4 +73,3 @@ config SPARSE_IRQ If you don't know what to do here, say N. endmenu -endif diff --git a/kernel/kexec.c b/kernel/kexec.c index 59f7b55ba74..2a74f307c5e 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1474,11 +1474,8 @@ static int __init __parse_crashkernel(char *cmdline, if (first_colon && (!first_space || first_colon < first_space)) return parse_crashkernel_mem(ck_cmdline, system_ram, crash_size, crash_base); - else - return parse_crashkernel_simple(ck_cmdline, crash_size, - crash_base); - return 0; + return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base); } /* diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 6e33498d665..a0d367a4912 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -112,6 +112,7 @@ static struct kprobe_blackpoint kprobe_blacklist[] = { struct kprobe_insn_page { struct list_head list; kprobe_opcode_t *insns; /* Page of instruction slots */ + struct kprobe_insn_cache *cache; int nused; int ngarbage; char slot_used[]; @@ -121,12 +122,6 @@ struct kprobe_insn_page { (offsetof(struct kprobe_insn_page, slot_used) + \ (sizeof(char) * (slots))) -struct kprobe_insn_cache { - struct list_head pages; /* list of kprobe_insn_page */ - size_t insn_size; /* size of instruction slot */ - int nr_garbage; -}; - static int slots_per_page(struct kprobe_insn_cache *c) { return PAGE_SIZE/(c->insn_size * sizeof(kprobe_opcode_t)); @@ -138,8 +133,20 @@ enum kprobe_slot_state { SLOT_USED = 2, }; -static DEFINE_MUTEX(kprobe_insn_mutex); /* Protects kprobe_insn_slots */ -static struct kprobe_insn_cache kprobe_insn_slots = { +static void *alloc_insn_page(void) +{ + return module_alloc(PAGE_SIZE); +} + +static void free_insn_page(void *page) +{ + module_free(NULL, page); +} + +struct kprobe_insn_cache kprobe_insn_slots = { + .mutex = __MUTEX_INITIALIZER(kprobe_insn_slots.mutex), + .alloc = alloc_insn_page, + .free = free_insn_page, .pages = LIST_HEAD_INIT(kprobe_insn_slots.pages), .insn_size = MAX_INSN_SIZE, .nr_garbage = 0, @@ -150,10 +157,12 @@ static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c); * __get_insn_slot() - Find a slot on an executable page for an instruction. * We allocate an executable page if there's no room on existing ones. */ -static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c) +kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c) { struct kprobe_insn_page *kip; + kprobe_opcode_t *slot = NULL; + mutex_lock(&c->mutex); retry: list_for_each_entry(kip, &c->pages, list) { if (kip->nused < slots_per_page(c)) { @@ -162,7 +171,8 @@ static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c) if (kip->slot_used[i] == SLOT_CLEAN) { kip->slot_used[i] = SLOT_USED; kip->nused++; - return kip->insns + (i * c->insn_size); + slot = kip->insns + (i * c->insn_size); + goto out; } } /* kip->nused is broken. Fix it. */ @@ -178,37 +188,29 @@ static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c) /* All out of space. Need to allocate a new page. */ kip = kmalloc(KPROBE_INSN_PAGE_SIZE(slots_per_page(c)), GFP_KERNEL); if (!kip) - return NULL; + goto out; /* * Use module_alloc so this page is within +/- 2GB of where the * kernel image and loaded module images reside. This is required * so x86_64 can correctly handle the %rip-relative fixups. */ - kip->insns = module_alloc(PAGE_SIZE); + kip->insns = c->alloc(); if (!kip->insns) { kfree(kip); - return NULL; + goto out; } INIT_LIST_HEAD(&kip->list); memset(kip->slot_used, SLOT_CLEAN, slots_per_page(c)); kip->slot_used[0] = SLOT_USED; kip->nused = 1; kip->ngarbage = 0; + kip->cache = c; list_add(&kip->list, &c->pages); - return kip->insns; -} - - -kprobe_opcode_t __kprobes *get_insn_slot(void) -{ - kprobe_opcode_t *ret = NULL; - - mutex_lock(&kprobe_insn_mutex); - ret = __get_insn_slot(&kprobe_insn_slots); - mutex_unlock(&kprobe_insn_mutex); - - return ret; + slot = kip->insns; +out: + mutex_unlock(&c->mutex); + return slot; } /* Return 1 if all garbages are collected, otherwise 0. */ @@ -225,7 +227,7 @@ static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx) */ if (!list_is_singular(&kip->list)) { list_del(&kip->list); - module_free(NULL, kip->insns); + kip->cache->free(kip->insns); kfree(kip); } return 1; @@ -255,11 +257,12 @@ static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c) return 0; } -static void __kprobes __free_insn_slot(struct kprobe_insn_cache *c, - kprobe_opcode_t *slot, int dirty) +void __kprobes __free_insn_slot(struct kprobe_insn_cache *c, + kprobe_opcode_t *slot, int dirty) { struct kprobe_insn_page *kip; + mutex_lock(&c->mutex); list_for_each_entry(kip, &c->pages, list) { long idx = ((long)slot - (long)kip->insns) / (c->insn_size * sizeof(kprobe_opcode_t)); @@ -272,45 +275,25 @@ static void __kprobes __free_insn_slot(struct kprobe_insn_cache *c, collect_garbage_slots(c); } else collect_one_slot(kip, idx); - return; + goto out; } } /* Could not free this slot. */ WARN_ON(1); +out: + mutex_unlock(&c->mutex); } -void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty) -{ - mutex_lock(&kprobe_insn_mutex); - __free_insn_slot(&kprobe_insn_slots, slot, dirty); - mutex_unlock(&kprobe_insn_mutex); -} #ifdef CONFIG_OPTPROBES /* For optimized_kprobe buffer */ -static DEFINE_MUTEX(kprobe_optinsn_mutex); /* Protects kprobe_optinsn_slots */ -static struct kprobe_insn_cache kprobe_optinsn_slots = { +struct kprobe_insn_cache kprobe_optinsn_slots = { + .mutex = __MUTEX_INITIALIZER(kprobe_optinsn_slots.mutex), + .alloc = alloc_insn_page, + .free = free_insn_page, .pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages), /* .insn_size is initialized later */ .nr_garbage = 0, }; -/* Get a slot for optimized_kprobe buffer */ -kprobe_opcode_t __kprobes *get_optinsn_slot(void) -{ - kprobe_opcode_t *ret = NULL; - - mutex_lock(&kprobe_optinsn_mutex); - ret = __get_insn_slot(&kprobe_optinsn_slots); - mutex_unlock(&kprobe_optinsn_mutex); - - return ret; -} - -void __kprobes free_optinsn_slot(kprobe_opcode_t * slot, int dirty) -{ - mutex_lock(&kprobe_optinsn_mutex); - __free_insn_slot(&kprobe_optinsn_slots, slot, dirty); - mutex_unlock(&kprobe_optinsn_mutex); -} #endif #endif diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 6ada93c23a9..9659d38e008 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -113,7 +113,7 @@ static ssize_t kexec_crash_size_store(struct kobject *kobj, unsigned long cnt; int ret; - if (strict_strtoul(buf, 0, &cnt)) + if (kstrtoul(buf, 0, &cnt)) return -EINVAL; ret = crash_shrink_memory(cnt); diff --git a/kernel/modsign_pubkey.c b/kernel/modsign_pubkey.c index 2b6e69909c3..7cbd4507a7e 100644 --- a/kernel/modsign_pubkey.c +++ b/kernel/modsign_pubkey.c @@ -18,14 +18,14 @@ struct key *modsign_keyring; -extern __initdata const u8 modsign_certificate_list[]; -extern __initdata const u8 modsign_certificate_list_end[]; +extern __initconst const u8 modsign_certificate_list[]; +extern __initconst const u8 modsign_certificate_list_end[]; /* * We need to make sure ccache doesn't cache the .o file as it doesn't notice * if modsign.pub changes. */ -static __initdata const char annoy_ccache[] = __TIME__ "foo"; +static __initconst const char annoy_ccache[] = __TIME__ "foo"; /* * Load the compiled-in keys diff --git a/kernel/panic.c b/kernel/panic.c index 80186460051..b6c482ccc5d 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -123,10 +123,14 @@ void panic(const char *fmt, ...) */ smp_send_stop(); - kmsg_dump(KMSG_DUMP_PANIC); - + /* + * Run any panic handlers, including those that might need to + * add information to the kmsg dump output. + */ atomic_notifier_call_chain(&panic_notifier_list, 0, buf); + kmsg_dump(KMSG_DUMP_PANIC); + bust_spinlocks(0); if (!panic_blink) diff --git a/kernel/params.c b/kernel/params.c index 501bde4f3be..81c4e78c8f4 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -253,13 +253,13 @@ int parse_args(const char *doing, EXPORT_SYMBOL(param_ops_##name) -STANDARD_PARAM_DEF(byte, unsigned char, "%hhu", unsigned long, strict_strtoul); -STANDARD_PARAM_DEF(short, short, "%hi", long, strict_strtol); -STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", unsigned long, strict_strtoul); -STANDARD_PARAM_DEF(int, int, "%i", long, strict_strtol); -STANDARD_PARAM_DEF(uint, unsigned int, "%u", unsigned long, strict_strtoul); -STANDARD_PARAM_DEF(long, long, "%li", long, strict_strtol); -STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", unsigned long, strict_strtoul); +STANDARD_PARAM_DEF(byte, unsigned char, "%hhu", unsigned long, kstrtoul); +STANDARD_PARAM_DEF(short, short, "%hi", long, kstrtoul); +STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", unsigned long, kstrtoul); +STANDARD_PARAM_DEF(int, int, "%i", long, kstrtoul); +STANDARD_PARAM_DEF(uint, unsigned int, "%u", unsigned long, kstrtoul); +STANDARD_PARAM_DEF(long, long, "%li", long, kstrtoul); +STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", unsigned long, kstrtoul); int param_set_charp(const char *val, const struct kernel_param *kp) { diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 3085e62a80a..c9c759d5a15 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -644,22 +644,23 @@ int hibernate(void) if (error) goto Exit; - /* Allocate memory management structures */ - error = create_basic_memory_bitmaps(); - if (error) - goto Exit; - printk(KERN_INFO "PM: Syncing filesystems ... "); sys_sync(); printk("done.\n"); error = freeze_processes(); if (error) - goto Free_bitmaps; + goto Exit; + + lock_device_hotplug(); + /* Allocate memory management structures */ + error = create_basic_memory_bitmaps(); + if (error) + goto Thaw; error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); if (error || freezer_test_done) - goto Thaw; + goto Free_bitmaps; if (in_suspend) { unsigned int flags = 0; @@ -682,14 +683,14 @@ int hibernate(void) pr_debug("PM: Image restored successfully.\n"); } + Free_bitmaps: + free_basic_memory_bitmaps(); Thaw: + unlock_device_hotplug(); thaw_processes(); /* Don't bother checking whether freezer_test_done is true */ freezer_test_done = false; - - Free_bitmaps: - free_basic_memory_bitmaps(); Exit: pm_notifier_call_chain(PM_POST_HIBERNATION); pm_restore_console(); @@ -806,21 +807,20 @@ static int software_resume(void) pm_prepare_console(); error = pm_notifier_call_chain(PM_RESTORE_PREPARE); if (error) - goto close_finish; - - error = create_basic_memory_bitmaps(); - if (error) - goto close_finish; + goto Close_Finish; pr_debug("PM: Preparing processes for restore.\n"); error = freeze_processes(); - if (error) { - swsusp_close(FMODE_READ); - goto Done; - } + if (error) + goto Close_Finish; pr_debug("PM: Loading hibernation image.\n"); + lock_device_hotplug(); + error = create_basic_memory_bitmaps(); + if (error) + goto Thaw; + error = swsusp_read(&flags); swsusp_close(FMODE_READ); if (!error) @@ -828,9 +828,10 @@ static int software_resume(void) printk(KERN_ERR "PM: Failed to load hibernation image, recovering.\n"); swsusp_free(); - thaw_processes(); - Done: free_basic_memory_bitmaps(); + Thaw: + unlock_device_hotplug(); + thaw_processes(); Finish: pm_notifier_call_chain(PM_POST_RESTORE); pm_restore_console(); @@ -840,7 +841,7 @@ static int software_resume(void) mutex_unlock(&pm_mutex); pr_debug("PM: Hibernation image not present or could not be loaded.\n"); return error; -close_finish: + Close_Finish: swsusp_close(FMODE_READ); goto Finish; } diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 349587bb03e..358a146fd4d 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -352,7 +352,7 @@ static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) struct mem_extent *ext, *cur, *aux; zone_start = zone->zone_start_pfn; - zone_end = zone->zone_start_pfn + zone->spanned_pages; + zone_end = zone_end_pfn(zone); list_for_each_entry(ext, list, hook) if (zone_start <= ext->end) @@ -884,7 +884,7 @@ static unsigned int count_highmem_pages(void) continue; mark_free_pages(zone); - max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + max_zone_pfn = zone_end_pfn(zone); for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) if (saveable_highmem_page(zone, pfn)) n++; @@ -948,7 +948,7 @@ static unsigned int count_data_pages(void) continue; mark_free_pages(zone); - max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + max_zone_pfn = zone_end_pfn(zone); for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) if (saveable_page(zone, pfn)) n++; @@ -1041,7 +1041,7 @@ copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm) unsigned long max_zone_pfn; mark_free_pages(zone); - max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + max_zone_pfn = zone_end_pfn(zone); for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) if (page_is_saveable(zone, pfn)) memory_bm_set_bit(orig_bm, pfn); @@ -1093,7 +1093,7 @@ void swsusp_free(void) unsigned long pfn, max_zone_pfn; for_each_populated_zone(zone) { - max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + max_zone_pfn = zone_end_pfn(zone); for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) if (pfn_valid(pfn)) { struct page *page = pfn_to_page(pfn); @@ -1755,7 +1755,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm) /* Clear page flags */ for_each_populated_zone(zone) { - max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + max_zone_pfn = zone_end_pfn(zone); for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) if (pfn_valid(pfn)) swsusp_unset_page_free(pfn_to_page(pfn)); diff --git a/kernel/power/user.c b/kernel/power/user.c index 4ed81e74f86..72e8f4fd616 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -60,11 +60,6 @@ static int snapshot_open(struct inode *inode, struct file *filp) error = -ENOSYS; goto Unlock; } - if(create_basic_memory_bitmaps()) { - atomic_inc(&snapshot_device_available); - error = -ENOMEM; - goto Unlock; - } nonseekable_open(inode, filp); data = &snapshot_state; filp->private_data = data; @@ -90,10 +85,9 @@ static int snapshot_open(struct inode *inode, struct file *filp) if (error) pm_notifier_call_chain(PM_POST_RESTORE); } - if (error) { - free_basic_memory_bitmaps(); + if (error) atomic_inc(&snapshot_device_available); - } + data->frozen = 0; data->ready = 0; data->platform_support = 0; @@ -111,11 +105,11 @@ static int snapshot_release(struct inode *inode, struct file *filp) lock_system_sleep(); swsusp_free(); - free_basic_memory_bitmaps(); data = filp->private_data; free_all_swap_pages(data->swap); if (data->frozen) { pm_restore_gfp_mask(); + free_basic_memory_bitmaps(); thaw_processes(); } pm_notifier_call_chain(data->mode == O_RDONLY ? @@ -207,6 +201,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, if (!mutex_trylock(&pm_mutex)) return -EBUSY; + lock_device_hotplug(); data = filp->private_data; switch (cmd) { @@ -220,14 +215,22 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, printk("done.\n"); error = freeze_processes(); - if (!error) + if (error) + break; + + error = create_basic_memory_bitmaps(); + if (error) + thaw_processes(); + else data->frozen = 1; + break; case SNAPSHOT_UNFREEZE: if (!data->frozen || data->ready) break; pm_restore_gfp_mask(); + free_basic_memory_bitmaps(); thaw_processes(); data->frozen = 0; break; @@ -371,6 +374,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, } + unlock_device_hotplug(); mutex_unlock(&pm_mutex); return error; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index a146ee327f6..dd562e9aa2c 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -236,7 +236,7 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode) */ int dumpable = 0; /* Don't let security modules deny introspection */ - if (task == current) + if (same_thread_group(task, current)) return 0; rcu_read_lock(); tcred = __task_cred(task); diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 33eb4620aa1..b02a339836b 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -122,7 +122,7 @@ struct lockdep_map rcu_sched_lock_map = STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_sched", &rcu_sched_lock_key); EXPORT_SYMBOL_GPL(rcu_sched_lock_map); -int debug_lockdep_rcu_enabled(void) +int notrace debug_lockdep_rcu_enabled(void) { return rcu_scheduler_active && debug_locks && current->lockdep_recursion == 0; diff --git a/kernel/res_counter.c b/kernel/res_counter.c index ff55247e704..4aa8a305aed 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c @@ -17,8 +17,8 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent) { spin_lock_init(&counter->lock); - counter->limit = RESOURCE_MAX; - counter->soft_limit = RESOURCE_MAX; + counter->limit = RES_COUNTER_MAX; + counter->soft_limit = RES_COUNTER_MAX; counter->parent = parent; } @@ -178,23 +178,30 @@ u64 res_counter_read_u64(struct res_counter *counter, int member) #endif int res_counter_memparse_write_strategy(const char *buf, - unsigned long long *res) + unsigned long long *resp) { char *end; + unsigned long long res; - /* return RESOURCE_MAX(unlimited) if "-1" is specified */ + /* return RES_COUNTER_MAX(unlimited) if "-1" is specified */ if (*buf == '-') { - *res = simple_strtoull(buf + 1, &end, 10); - if (*res != 1 || *end != '\0') + res = simple_strtoull(buf + 1, &end, 10); + if (res != 1 || *end != '\0') return -EINVAL; - *res = RESOURCE_MAX; + *resp = RES_COUNTER_MAX; return 0; } - *res = memparse(buf, &end); + res = memparse(buf, &end); if (*end != '\0') return -EINVAL; - *res = PAGE_ALIGN(*res); + if (PAGE_ALIGN(res) >= res) + res = PAGE_ALIGN(res); + else + res = RES_COUNTER_MAX; + + *resp = res; + return 0; } diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index e076bddd4c6..196559994f7 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -124,7 +124,7 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) SEQ_printf(m, " "); SEQ_printf(m, "%15s %5d %9Ld.%06ld %9Ld %5d ", - p->comm, p->pid, + p->comm, task_pid_nr(p), SPLIT_NS(p->se.vruntime), (long long)(p->nvcsw + p->nivcsw), p->prio); @@ -289,7 +289,7 @@ do { \ P(nr_load_updates); P(nr_uninterruptible); PN(next_balance); - P(curr->pid); + SEQ_printf(m, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr))); PN(clock); P(cpu_load[0]); P(cpu_load[1]); @@ -492,7 +492,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) { unsigned long nr_switches; - SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, p->pid, + SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr(p), get_nr_threads(p)); SEQ_printf(m, "---------------------------------------------------------" diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7f0a5e6cdae..11cd1366735 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5151,7 +5151,7 @@ static int should_we_balance(struct lb_env *env) * First idle cpu or the first cpu(busiest) in this sched group * is eligible for doing load balancing at this and above domains. */ - return balance_cpu != env->dst_cpu; + return balance_cpu == env->dst_cpu; } /* @@ -5928,11 +5928,15 @@ static void task_fork_fair(struct task_struct *p) cfs_rq = task_cfs_rq(current); curr = cfs_rq->curr; - if (unlikely(task_cpu(p) != this_cpu)) { - rcu_read_lock(); - __set_task_cpu(p, this_cpu); - rcu_read_unlock(); - } + /* + * Not only the cpu but also the task_group of the parent might have + * been changed after parent->se.parent,cfs_rq were copied to + * child->se.parent,cfs_rq. So call __set_task_cpu() to make those + * of child point to valid ones. + */ + rcu_read_lock(); + __set_task_cpu(p, this_cpu); + rcu_read_unlock(); update_curr(cfs_rq); diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 5aef494fc8b..c7edee71bce 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -104,8 +104,9 @@ static inline void sched_info_queued(struct task_struct *t) } /* - * Called when a process ceases being the active-running process, either - * voluntarily or involuntarily. Now we can calculate how long we ran. + * Called when a process ceases being the active-running process involuntarily + * due, typically, to expiring its time slice (this may also be called when + * switching to the idle task). Now we can calculate how long we ran. * Also, if the process is still in the TASK_RUNNING state, call * sched_info_queued() to mark that it has now again started waiting on * the runqueue. diff --git a/kernel/signal.c b/kernel/signal.c index 50e41075ac7..ded28b91fa5 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3394,7 +3394,7 @@ COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig, new_ka.sa.sa_restorer = compat_ptr(restorer); #endif ret |= copy_from_user(&mask, &act->sa_mask, sizeof(mask)); - ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); + ret |= get_user(new_ka.sa.sa_flags, &act->sa_flags); if (ret) return -EFAULT; sigset_from_compat(&new_ka.sa.sa_mask, &mask); @@ -3406,7 +3406,7 @@ COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig, ret = put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler); ret |= copy_to_user(&oact->sa_mask, &mask, sizeof(mask)); - ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags); + ret |= put_user(old_ka.sa.sa_flags, &oact->sa_flags); #ifdef __ARCH_HAS_SA_RESTORER ret |= put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer); diff --git a/kernel/smp.c b/kernel/smp.c index 449b707fc20..0564571dcdf 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -48,10 +48,13 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) cpu_to_node(cpu))) return notifier_from_errno(-ENOMEM); if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL, - cpu_to_node(cpu))) + cpu_to_node(cpu))) { + free_cpumask_var(cfd->cpumask); return notifier_from_errno(-ENOMEM); + } cfd->csd = alloc_percpu(struct call_single_data); if (!cfd->csd) { + free_cpumask_var(cfd->cpumask_ipi); free_cpumask_var(cfd->cpumask); return notifier_from_errno(-ENOMEM); } @@ -572,8 +575,10 @@ EXPORT_SYMBOL(on_each_cpu); * * If @wait is true, then returns once @func has returned. * - * You must not call this function with disabled interrupts or - * from a hardware interrupt handler or from a bottom half handler. + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler or from a bottom half handler. The + * exception is that it may be used during early boot while + * early_boot_irqs_disabled is set. */ void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func, void *info, bool wait) @@ -582,9 +587,10 @@ void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func, smp_call_function_many(mask, func, info, wait); if (cpumask_test_cpu(cpu, mask)) { - local_irq_disable(); + unsigned long flags; + local_irq_save(flags); func(info); - local_irq_enable(); + local_irq_restore(flags); } put_cpu(); } diff --git a/kernel/softirq.c b/kernel/softirq.c index be3d3514c32..53cc09ceb0b 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -876,7 +876,6 @@ int __init __weak early_irq_init(void) return 0; } -#ifdef CONFIG_GENERIC_HARDIRQS int __init __weak arch_probe_nr_irqs(void) { return NR_IRQS_LEGACY; @@ -886,4 +885,3 @@ int __init __weak arch_early_irq_init(void) { return 0; } -#endif diff --git a/kernel/spinlock.c b/kernel/spinlock.c index 5cdd8065a3c..4b082b5cac9 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c @@ -34,6 +34,20 @@ #else #define raw_read_can_lock(l) read_can_lock(l) #define raw_write_can_lock(l) write_can_lock(l) + +/* + * Some architectures can relax in favour of the CPU owning the lock. + */ +#ifndef arch_read_relax +# define arch_read_relax(l) cpu_relax() +#endif +#ifndef arch_write_relax +# define arch_write_relax(l) cpu_relax() +#endif +#ifndef arch_spin_relax +# define arch_spin_relax(l) cpu_relax() +#endif + /* * We build the __lock_function inlines here. They are too large for * inlining all over the place, but here is only one user per function diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 07f6fc468e1..b2f06f3c6a3 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1225,7 +1225,7 @@ static struct ctl_table vm_table[] = { .data = &hugepages_treat_as_movable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = hugetlb_treat_movable_handler, + .proc_handler = proc_dointvec, }, { .procname = "nr_overcommit_hugepages", @@ -1471,14 +1471,14 @@ static struct ctl_table fs_table[] = { { .procname = "inode-nr", .data = &inodes_stat, - .maxlen = 2*sizeof(int), + .maxlen = 2*sizeof(long), .mode = 0444, .proc_handler = proc_nr_inodes, }, { .procname = "inode-state", .data = &inodes_stat, - .maxlen = 7*sizeof(int), + .maxlen = 7*sizeof(long), .mode = 0444, .proc_handler = proc_nr_inodes, }, @@ -1508,7 +1508,7 @@ static struct ctl_table fs_table[] = { { .procname = "dentry-state", .data = &dentry_stat, - .maxlen = 6*sizeof(int), + .maxlen = 6*sizeof(long), .mode = 0444, .proc_handler = proc_nr_dentry, }, diff --git a/kernel/task_work.c b/kernel/task_work.c index 65bd3c92d6f..8727032e3a6 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -4,6 +4,23 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */ +/** + * task_work_add - ask the @task to execute @work->func() + * @task: the task which should run the callback + * @work: the callback to run + * @notify: send the notification if true + * + * Queue @work for task_work_run() below and notify the @task if @notify. + * Fails if the @task is exiting/exited and thus it can't process this @work. + * Otherwise @work->func() will be called when the @task returns from kernel + * mode or exits. + * + * This is like the signal handler which runs in kernel mode, but it doesn't + * try to wake up the @task. + * + * RETURNS: + * 0 if succeeds or -ESRCH. + */ int task_work_add(struct task_struct *task, struct callback_head *work, bool notify) { @@ -21,11 +38,22 @@ task_work_add(struct task_struct *task, struct callback_head *work, bool notify) return 0; } +/** + * task_work_cancel - cancel a pending work added by task_work_add() + * @task: the task which should execute the work + * @func: identifies the work to remove + * + * Find the last queued pending work with ->func == @func and remove + * it from queue. + * + * RETURNS: + * The found work or NULL if not found. + */ struct callback_head * task_work_cancel(struct task_struct *task, task_work_func_t func) { struct callback_head **pprev = &task->task_works; - struct callback_head *work = NULL; + struct callback_head *work; unsigned long flags; /* * If cmpxchg() fails we continue without updating pprev. @@ -35,7 +63,7 @@ task_work_cancel(struct task_struct *task, task_work_func_t func) */ raw_spin_lock_irqsave(&task->pi_lock, flags); while ((work = ACCESS_ONCE(*pprev))) { - read_barrier_depends(); + smp_read_barrier_depends(); if (work->func != func) pprev = &work->next; else if (cmpxchg(pprev, work, work->next) == work) @@ -46,6 +74,14 @@ task_work_cancel(struct task_struct *task, task_work_func_t func) return work; } +/** + * task_work_run - execute the works added by task_work_add() + * + * Flush the pending works. Should be used by the core kernel code. + * Called before the task returns to the user-mode or stops, or when + * it exits. In the latter case task_work_add() can no longer add the + * new work after task_work_run() returns. + */ void task_work_run(void) { struct task_struct *task = current; diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 8f5b3b98577..bb2215174f0 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -516,13 +516,13 @@ static void sync_cmos_clock(struct work_struct *work) schedule_delayed_work(&sync_cmos_work, timespec_to_jiffies(&next)); } -static void notify_cmos_timer(void) +void ntp_notify_cmos_timer(void) { schedule_delayed_work(&sync_cmos_work, 0); } #else -static inline void notify_cmos_timer(void) { } +void ntp_notify_cmos_timer(void) { } #endif @@ -687,8 +687,6 @@ int __do_adjtimex(struct timex *txc, struct timespec *ts, s32 *time_tai) if (!(time_status & STA_NANO)) txc->time.tv_usec /= NSEC_PER_USEC; - notify_cmos_timer(); - return result; } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 48b9fffabdc..947ba25a95a 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1703,6 +1703,8 @@ int do_adjtimex(struct timex *txc) write_seqcount_end(&timekeeper_seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); + ntp_notify_cmos_timer(); + return ret; } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a6d098c6df3..03cf44ac54d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1978,12 +1978,27 @@ int __weak ftrace_arch_code_modify_post_process(void) void ftrace_modify_all_code(int command) { + int update = command & FTRACE_UPDATE_TRACE_FUNC; + + /* + * If the ftrace_caller calls a ftrace_ops func directly, + * we need to make sure that it only traces functions it + * expects to trace. When doing the switch of functions, + * we need to update to the ftrace_ops_list_func first + * before the transition between old and new calls are set, + * as the ftrace_ops_list_func will check the ops hashes + * to make sure the ops are having the right functions + * traced. + */ + if (update) + ftrace_update_ftrace_func(ftrace_ops_list_func); + if (command & FTRACE_UPDATE_CALLS) ftrace_replace_code(1); else if (command & FTRACE_DISABLE_CALLS) ftrace_replace_code(0); - if (command & FTRACE_UPDATE_TRACE_FUNC) + if (update && ftrace_trace_function != ftrace_ops_list_func) ftrace_update_ftrace_func(ftrace_trace_function); if (command & FTRACE_START_FUNC_RET) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 496f94d5769..7974ba20557 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3166,11 +3166,6 @@ static const struct file_operations show_traces_fops = { }; /* - * Only trace on a CPU if the bitmask is set: - */ -static cpumask_var_t tracing_cpumask; - -/* * The tracer itself will not take this lock, but still we want * to provide a consistent cpumask to user-space: */ @@ -3186,11 +3181,12 @@ static ssize_t tracing_cpumask_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { + struct trace_array *tr = file_inode(filp)->i_private; int len; mutex_lock(&tracing_cpumask_update_lock); - len = cpumask_scnprintf(mask_str, count, tracing_cpumask); + len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask); if (count - len < 2) { count = -EINVAL; goto out_err; @@ -3208,7 +3204,7 @@ static ssize_t tracing_cpumask_write(struct file *filp, const char __user *ubuf, size_t count, loff_t *ppos) { - struct trace_array *tr = filp->private_data; + struct trace_array *tr = file_inode(filp)->i_private; cpumask_var_t tracing_cpumask_new; int err, cpu; @@ -3228,12 +3224,12 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, * Increase/decrease the disabled counter if we are * about to flip a bit in the cpumask: */ - if (cpumask_test_cpu(cpu, tracing_cpumask) && + if (cpumask_test_cpu(cpu, tr->tracing_cpumask) && !cpumask_test_cpu(cpu, tracing_cpumask_new)) { atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled); ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu); } - if (!cpumask_test_cpu(cpu, tracing_cpumask) && + if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) && cpumask_test_cpu(cpu, tracing_cpumask_new)) { atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled); ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu); @@ -3242,7 +3238,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, arch_spin_unlock(&ftrace_max_lock); local_irq_enable(); - cpumask_copy(tracing_cpumask, tracing_cpumask_new); + cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new); mutex_unlock(&tracing_cpumask_update_lock); free_cpumask_var(tracing_cpumask_new); @@ -3256,9 +3252,10 @@ err_unlock: } static const struct file_operations tracing_cpumask_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tr, .read = tracing_cpumask_read, .write = tracing_cpumask_write, + .release = tracing_release_generic_tr, .llseek = generic_file_llseek, }; @@ -5938,6 +5935,11 @@ static int new_instance_create(const char *name) if (!tr->name) goto out_free_tr; + if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL)) + goto out_free_tr; + + cpumask_copy(tr->tracing_cpumask, cpu_all_mask); + raw_spin_lock_init(&tr->start_lock); tr->current_trace = &nop_trace; @@ -5969,6 +5971,7 @@ static int new_instance_create(const char *name) out_free_tr: if (tr->trace_buffer.buffer) ring_buffer_free(tr->trace_buffer.buffer); + free_cpumask_var(tr->tracing_cpumask); kfree(tr->name); kfree(tr); @@ -6098,6 +6101,9 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer) { int cpu; + trace_create_file("tracing_cpumask", 0644, d_tracer, + tr, &tracing_cpumask_fops); + trace_create_file("trace_options", 0644, d_tracer, tr, &tracing_iter_fops); @@ -6147,9 +6153,6 @@ static __init int tracer_init_debugfs(void) init_tracer_debugfs(&global_trace, d_tracer); - trace_create_file("tracing_cpumask", 0644, d_tracer, - &global_trace, &tracing_cpumask_fops); - trace_create_file("available_tracers", 0444, d_tracer, &global_trace, &show_traces_fops); @@ -6371,7 +6374,7 @@ __init static int tracer_alloc_buffers(void) if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL)) goto out; - if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) + if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL)) goto out_free_buffer_mask; /* Only allocate trace_printk buffers if a trace_printk exists */ @@ -6386,7 +6389,7 @@ __init static int tracer_alloc_buffers(void) ring_buf_size = 1; cpumask_copy(tracing_buffer_mask, cpu_possible_mask); - cpumask_copy(tracing_cpumask, cpu_all_mask); + cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask); raw_spin_lock_init(&global_trace.start_lock); @@ -6441,7 +6444,7 @@ out_free_cpumask: #ifdef CONFIG_TRACER_MAX_TRACE free_percpu(global_trace.max_buffer.data); #endif - free_cpumask_var(tracing_cpumask); + free_cpumask_var(global_trace.tracing_cpumask); out_free_buffer_mask: free_cpumask_var(tracing_buffer_mask); out: diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index fe39acd4c1a..10c86fb7a2b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -206,6 +206,7 @@ struct trace_array { struct dentry *event_dir; struct list_head systems; struct list_head events; + cpumask_var_t tracing_cpumask; /* only trace on set CPUs */ int ref; }; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 29a7ebcfb42..368a4d50cc3 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1489,12 +1489,7 @@ event_subsystem_dir(struct trace_array *tr, const char *name, } static int -event_create_dir(struct dentry *parent, - struct ftrace_event_file *file, - const struct file_operations *id, - const struct file_operations *enable, - const struct file_operations *filter, - const struct file_operations *format) +event_create_dir(struct dentry *parent, struct ftrace_event_file *file) { struct ftrace_event_call *call = file->event_call; struct trace_array *tr = file->tr; @@ -1522,12 +1517,13 @@ event_create_dir(struct dentry *parent, if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) trace_create_file("enable", 0644, file->dir, file, - enable); + &ftrace_enable_fops); #ifdef CONFIG_PERF_EVENTS if (call->event.type && call->class->reg) trace_create_file("id", 0444, file->dir, - (void *)(long)call->event.type, id); + (void *)(long)call->event.type, + &ftrace_event_id_fops); #endif /* @@ -1544,10 +1540,10 @@ event_create_dir(struct dentry *parent, } } trace_create_file("filter", 0644, file->dir, call, - filter); + &ftrace_event_filter_fops); trace_create_file("format", 0444, file->dir, call, - format); + &ftrace_event_format_fops); return 0; } @@ -1648,12 +1644,7 @@ trace_create_new_event(struct ftrace_event_call *call, /* Add an event to a trace directory */ static int -__trace_add_new_event(struct ftrace_event_call *call, - struct trace_array *tr, - const struct file_operations *id, - const struct file_operations *enable, - const struct file_operations *filter, - const struct file_operations *format) +__trace_add_new_event(struct ftrace_event_call *call, struct trace_array *tr) { struct ftrace_event_file *file; @@ -1661,7 +1652,7 @@ __trace_add_new_event(struct ftrace_event_call *call, if (!file) return -ENOMEM; - return event_create_dir(tr->event_dir, file, id, enable, filter, format); + return event_create_dir(tr->event_dir, file); } /* @@ -1683,8 +1674,7 @@ __trace_early_add_new_event(struct ftrace_event_call *call, } struct ftrace_module_file_ops; -static void __add_event_to_tracers(struct ftrace_event_call *call, - struct ftrace_module_file_ops *file_ops); +static void __add_event_to_tracers(struct ftrace_event_call *call); /* Add an additional event_call dynamically */ int trace_add_event_call(struct ftrace_event_call *call) @@ -1695,7 +1685,7 @@ int trace_add_event_call(struct ftrace_event_call *call) ret = __register_event(call, NULL); if (ret >= 0) - __add_event_to_tracers(call, NULL); + __add_event_to_tracers(call); mutex_unlock(&event_mutex); mutex_unlock(&trace_types_lock); @@ -1769,100 +1759,21 @@ int trace_remove_event_call(struct ftrace_event_call *call) #ifdef CONFIG_MODULES -static LIST_HEAD(ftrace_module_file_list); - -/* - * Modules must own their file_operations to keep up with - * reference counting. - */ -struct ftrace_module_file_ops { - struct list_head list; - struct module *mod; - struct file_operations id; - struct file_operations enable; - struct file_operations format; - struct file_operations filter; -}; - -static struct ftrace_module_file_ops * -find_ftrace_file_ops(struct ftrace_module_file_ops *file_ops, struct module *mod) -{ - /* - * As event_calls are added in groups by module, - * when we find one file_ops, we don't need to search for - * each call in that module, as the rest should be the - * same. Only search for a new one if the last one did - * not match. - */ - if (file_ops && mod == file_ops->mod) - return file_ops; - - list_for_each_entry(file_ops, &ftrace_module_file_list, list) { - if (file_ops->mod == mod) - return file_ops; - } - return NULL; -} - -static struct ftrace_module_file_ops * -trace_create_file_ops(struct module *mod) -{ - struct ftrace_module_file_ops *file_ops; - - /* - * This is a bit of a PITA. To allow for correct reference - * counting, modules must "own" their file_operations. - * To do this, we allocate the file operations that will be - * used in the event directory. - */ - - file_ops = kmalloc(sizeof(*file_ops), GFP_KERNEL); - if (!file_ops) - return NULL; - - file_ops->mod = mod; - - file_ops->id = ftrace_event_id_fops; - file_ops->id.owner = mod; - - file_ops->enable = ftrace_enable_fops; - file_ops->enable.owner = mod; - - file_ops->filter = ftrace_event_filter_fops; - file_ops->filter.owner = mod; - - file_ops->format = ftrace_event_format_fops; - file_ops->format.owner = mod; - - list_add(&file_ops->list, &ftrace_module_file_list); - - return file_ops; -} - static void trace_module_add_events(struct module *mod) { - struct ftrace_module_file_ops *file_ops = NULL; struct ftrace_event_call **call, **start, **end; start = mod->trace_events; end = mod->trace_events + mod->num_trace_events; - if (start == end) - return; - - file_ops = trace_create_file_ops(mod); - if (!file_ops) - return; - for_each_event(call, start, end) { __register_event(*call, mod); - __add_event_to_tracers(*call, file_ops); + __add_event_to_tracers(*call); } } static void trace_module_remove_events(struct module *mod) { - struct ftrace_module_file_ops *file_ops; struct ftrace_event_call *call, *p; bool clear_trace = false; @@ -1874,16 +1785,6 @@ static void trace_module_remove_events(struct module *mod) __trace_remove_event_call(call); } } - - /* Now free the file_operations */ - list_for_each_entry(file_ops, &ftrace_module_file_list, list) { - if (file_ops->mod == mod) - break; - } - if (&file_ops->list != &ftrace_module_file_list) { - list_del(&file_ops->list); - kfree(file_ops); - } up_write(&trace_event_sem); /* @@ -1919,67 +1820,21 @@ static int trace_module_notify(struct notifier_block *self, return 0; } -static int -__trace_add_new_mod_event(struct ftrace_event_call *call, - struct trace_array *tr, - struct ftrace_module_file_ops *file_ops) -{ - return __trace_add_new_event(call, tr, - &file_ops->id, &file_ops->enable, - &file_ops->filter, &file_ops->format); -} - -#else -static inline struct ftrace_module_file_ops * -find_ftrace_file_ops(struct ftrace_module_file_ops *file_ops, struct module *mod) -{ - return NULL; -} -static inline int trace_module_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - return 0; -} -static inline int -__trace_add_new_mod_event(struct ftrace_event_call *call, - struct trace_array *tr, - struct ftrace_module_file_ops *file_ops) -{ - return -ENODEV; -} +static struct notifier_block trace_module_nb = { + .notifier_call = trace_module_notify, + .priority = 0, +}; #endif /* CONFIG_MODULES */ /* Create a new event directory structure for a trace directory. */ static void __trace_add_event_dirs(struct trace_array *tr) { - struct ftrace_module_file_ops *file_ops = NULL; struct ftrace_event_call *call; int ret; list_for_each_entry(call, &ftrace_events, list) { - if (call->mod) { - /* - * Directories for events by modules need to - * keep module ref counts when opened (as we don't - * want the module to disappear when reading one - * of these files). The file_ops keep account of - * the module ref count. - */ - file_ops = find_ftrace_file_ops(file_ops, call->mod); - if (!file_ops) - continue; /* Warn? */ - ret = __trace_add_new_mod_event(call, tr, file_ops); - if (ret < 0) - pr_warning("Could not create directory for event %s\n", - call->name); - continue; - } - ret = __trace_add_new_event(call, tr, - &ftrace_event_id_fops, - &ftrace_enable_fops, - &ftrace_event_filter_fops, - &ftrace_event_format_fops); + ret = __trace_add_new_event(call, tr); if (ret < 0) pr_warning("Could not create directory for event %s\n", call->name); @@ -2287,11 +2142,7 @@ __trace_early_add_event_dirs(struct trace_array *tr) list_for_each_entry(file, &tr->events, list) { - ret = event_create_dir(tr->event_dir, file, - &ftrace_event_id_fops, - &ftrace_enable_fops, - &ftrace_event_filter_fops, - &ftrace_event_format_fops); + ret = event_create_dir(tr->event_dir, file); if (ret < 0) pr_warning("Could not create directory for event %s\n", file->event_call->name); @@ -2332,29 +2183,14 @@ __trace_remove_event_dirs(struct trace_array *tr) remove_event_file_dir(file); } -static void -__add_event_to_tracers(struct ftrace_event_call *call, - struct ftrace_module_file_ops *file_ops) +static void __add_event_to_tracers(struct ftrace_event_call *call) { struct trace_array *tr; - list_for_each_entry(tr, &ftrace_trace_arrays, list) { - if (file_ops) - __trace_add_new_mod_event(call, tr, file_ops); - else - __trace_add_new_event(call, tr, - &ftrace_event_id_fops, - &ftrace_enable_fops, - &ftrace_event_filter_fops, - &ftrace_event_format_fops); - } + list_for_each_entry(tr, &ftrace_trace_arrays, list) + __trace_add_new_event(call, tr); } -static struct notifier_block trace_module_nb = { - .notifier_call = trace_module_notify, - .priority = 0, -}; - extern struct ftrace_event_call *__start_ftrace_events[]; extern struct ftrace_event_call *__stop_ftrace_events[]; @@ -2559,10 +2395,11 @@ static __init int event_trace_init(void) if (ret) return ret; +#ifdef CONFIG_MODULES ret = register_module_notifier(&trace_module_nb); if (ret) pr_warning("Failed to register trace events module notifier\n"); - +#endif return 0; } early_initcall(event_trace_memsetup); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 8fd03657bc7..559329d9bd2 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -200,8 +200,8 @@ extern char *__bad_type_size(void); #type, #name, offsetof(typeof(trace), name), \ sizeof(trace.name), is_signed_type(type) -static -int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len) +static int __init +__set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len) { int i; int pos = 0; @@ -228,7 +228,7 @@ int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len) return pos; } -static int set_syscall_print_fmt(struct ftrace_event_call *call) +static int __init set_syscall_print_fmt(struct ftrace_event_call *call) { char *print_fmt; int len; @@ -253,7 +253,7 @@ static int set_syscall_print_fmt(struct ftrace_event_call *call) return 0; } -static void free_syscall_print_fmt(struct ftrace_event_call *call) +static void __init free_syscall_print_fmt(struct ftrace_event_call *call) { struct syscall_metadata *entry = call->data; @@ -459,7 +459,7 @@ static void unreg_event_syscall_exit(struct ftrace_event_file *file, mutex_unlock(&syscall_trace_lock); } -static int init_syscall_trace(struct ftrace_event_call *call) +static int __init init_syscall_trace(struct ftrace_event_call *call) { int id; int num; diff --git a/kernel/up.c b/kernel/up.c index c54c75e9faf..630d72bf7e4 100644 --- a/kernel/up.c +++ b/kernel/up.c @@ -10,12 +10,64 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, int wait) { + unsigned long flags; + WARN_ON(cpu != 0); - local_irq_disable(); - (func)(info); - local_irq_enable(); + local_irq_save(flags); + func(info); + local_irq_restore(flags); return 0; } EXPORT_SYMBOL(smp_call_function_single); + +int on_each_cpu(smp_call_func_t func, void *info, int wait) +{ + unsigned long flags; + + local_irq_save(flags); + func(info); + local_irq_restore(flags); + return 0; +} +EXPORT_SYMBOL(on_each_cpu); + +/* + * Note we still need to test the mask even for UP + * because we actually can get an empty mask from + * code that on SMP might call us without the local + * CPU in the mask. + */ +void on_each_cpu_mask(const struct cpumask *mask, + smp_call_func_t func, void *info, bool wait) +{ + unsigned long flags; + + if (cpumask_test_cpu(0, mask)) { + local_irq_save(flags); + func(info); + local_irq_restore(flags); + } +} +EXPORT_SYMBOL(on_each_cpu_mask); + +/* + * Preemption is disabled here to make sure the cond_func is called under the + * same condtions in UP and SMP. + */ +void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), + smp_call_func_t func, void *info, bool wait, + gfp_t gfp_flags) +{ + unsigned long flags; + + preempt_disable(); + if (cond_func(0, info)) { + local_irq_save(flags); + func(info); + local_irq_restore(flags); + } + preempt_enable(); +} +EXPORT_SYMBOL(on_each_cpu_cond); |