diff options
Diffstat (limited to 'arch')
247 files changed, 3100 insertions, 2376 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 4877a8c8ee1..53d7f619a1b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -32,8 +32,9 @@ config HAVE_OPROFILE config KPROBES bool "Kprobes" - depends on KALLSYMS && MODULES + depends on MODULES depends on HAVE_KPROBES + select KALLSYMS help Kprobes allows you to trap at almost any kernel address and execute a callback function. register_kprobe() establishes @@ -45,7 +46,6 @@ config OPTPROBES def_bool y depends on KPROBES && HAVE_OPTPROBES depends on !PREEMPT - select KALLSYMS_ALL config HAVE_EFFICIENT_UNALIGNED_ACCESS bool @@ -158,4 +158,7 @@ config HAVE_PERF_EVENTS_NMI subsystem. Also has support for calculating CPU cycle events to determine how many clock cycles in a given period. +config HAVE_ARCH_JUMP_LABEL + bool + source "kernel/gcov/Kconfig" diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index b9647bb66d1..d04ccd73af4 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -9,6 +9,7 @@ config ALPHA select HAVE_IDE select HAVE_OPROFILE select HAVE_SYSCALL_WRAPPERS + select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select HAVE_DMA_ATTRS help diff --git a/arch/alpha/include/asm/cacheflush.h b/arch/alpha/include/asm/cacheflush.h index 01d71e1c8a9..012f1243b1c 100644 --- a/arch/alpha/include/asm/cacheflush.h +++ b/arch/alpha/include/asm/cacheflush.h @@ -43,6 +43,8 @@ extern void smp_imb(void); /* ??? Ought to use this in arch/alpha/kernel/signal.c too. */ #ifndef CONFIG_SMP +#include <linux/sched.h> + extern void __load_new_mm_context(struct mm_struct *); static inline void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, diff --git a/arch/alpha/include/asm/perf_event.h b/arch/alpha/include/asm/perf_event.h index 4157cd3c44a..fe792ca818f 100644 --- a/arch/alpha/include/asm/perf_event.h +++ b/arch/alpha/include/asm/perf_event.h @@ -1,11 +1,6 @@ #ifndef __ASM_ALPHA_PERF_EVENT_H #define __ASM_ALPHA_PERF_EVENT_H -/* Alpha only supports software events through this interface. */ -extern void set_perf_event_pending(void); - -#define PERF_EVENT_INDEX_OFFSET 0 - #ifdef CONFIG_PERF_EVENTS extern void init_hw_perf_events(void); #else diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h index 804e5311c84..058937bf5a7 100644 --- a/arch/alpha/include/asm/unistd.h +++ b/arch/alpha/include/asm/unistd.h @@ -449,10 +449,13 @@ #define __NR_pwritev 491 #define __NR_rt_tgsigqueueinfo 492 #define __NR_perf_event_open 493 +#define __NR_fanotify_init 494 +#define __NR_fanotify_mark 495 +#define __NR_prlimit64 496 #ifdef __KERNEL__ -#define NR_SYSCALLS 494 +#define NR_SYSCALLS 497 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR @@ -463,6 +466,7 @@ #define __ARCH_WANT_SYS_OLD_GETRLIMIT #define __ARCH_WANT_SYS_OLDUMOUNT #define __ARCH_WANT_SYS_SIGPENDING +#define __ARCH_WANT_SYS_RT_SIGSUSPEND /* "Conditional" syscalls. What we want is diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S index b45d913a51c..6d159cee5f2 100644 --- a/arch/alpha/kernel/entry.S +++ b/arch/alpha/kernel/entry.S @@ -73,8 +73,6 @@ ldq $20, HAE_REG($19); \ stq $21, HAE_CACHE($19); \ stq $21, 0($20); \ - ldq $0, 0($sp); \ - ldq $1, 8($sp); \ 99:; \ ldq $19, 72($sp); \ ldq $20, 80($sp); \ @@ -316,19 +314,24 @@ ret_from_sys_call: cmovne $26, 0, $19 /* $19 = 0 => non-restartable */ ldq $0, SP_OFF($sp) and $0, 8, $0 - beq $0, restore_all -ret_from_reschedule: + beq $0, ret_to_kernel +ret_to_user: /* Make sure need_resched and sigpending don't change between sampling and the rti. */ lda $16, 7 call_pal PAL_swpipl ldl $5, TI_FLAGS($8) and $5, _TIF_WORK_MASK, $2 - bne $5, work_pending + bne $2, work_pending restore_all: RESTORE_ALL call_pal PAL_rti +ret_to_kernel: + lda $16, 7 + call_pal PAL_swpipl + br restore_all + .align 3 $syscall_error: /* @@ -363,7 +366,7 @@ $ret_success: * $8: current. * $19: The old syscall number, or zero if this is not a return * from a syscall that errored and is possibly restartable. - * $20: Error indication. + * $20: The old a3 value */ .align 4 @@ -392,12 +395,18 @@ $work_resched: $work_notifysig: mov $sp, $16 - br $1, do_switch_stack + bsr $1, do_switch_stack mov $sp, $17 mov $5, $18 + mov $19, $9 /* save old syscall number */ + mov $20, $10 /* save old a3 */ + and $5, _TIF_SIGPENDING, $2 + cmovne $2, 0, $9 /* we don't want double syscall restarts */ jsr $26, do_notify_resume + mov $9, $19 + mov $10, $20 bsr $1, undo_switch_stack - br restore_all + br ret_to_user .end work_pending /* @@ -430,6 +439,7 @@ strace: beq $1, 1f ldq $27, 0($2) 1: jsr $26, ($27), sys_gettimeofday +ret_from_straced: ldgp $gp, 0($26) /* check return.. */ @@ -650,7 +660,7 @@ kernel_thread: /* We don't actually care for a3 success widgetry in the kernel. Not for positive errno values. */ stq $0, 0($sp) /* $0 */ - br restore_all + br ret_to_kernel .end kernel_thread /* @@ -757,11 +767,15 @@ sys_vfork: .ent sys_sigreturn sys_sigreturn: .prologue 0 + lda $9, ret_from_straced + cmpult $26, $9, $9 mov $sp, $17 lda $18, -SWITCH_STACK_SIZE($sp) lda $sp, -SWITCH_STACK_SIZE($sp) jsr $26, do_sigreturn - br $1, undo_switch_stack + bne $9, 1f + jsr $26, syscall_trace +1: br $1, undo_switch_stack br ret_from_sys_call .end sys_sigreturn @@ -770,47 +784,19 @@ sys_sigreturn: .ent sys_rt_sigreturn sys_rt_sigreturn: .prologue 0 + lda $9, ret_from_straced + cmpult $26, $9, $9 mov $sp, $17 lda $18, -SWITCH_STACK_SIZE($sp) lda $sp, -SWITCH_STACK_SIZE($sp) jsr $26, do_rt_sigreturn - br $1, undo_switch_stack + bne $9, 1f + jsr $26, syscall_trace +1: br $1, undo_switch_stack br ret_from_sys_call .end sys_rt_sigreturn .align 4 - .globl sys_sigsuspend - .ent sys_sigsuspend -sys_sigsuspend: - .prologue 0 - mov $sp, $17 - br $1, do_switch_stack - mov $sp, $18 - subq $sp, 16, $sp - stq $26, 0($sp) - jsr $26, do_sigsuspend - ldq $26, 0($sp) - lda $sp, SWITCH_STACK_SIZE+16($sp) - ret -.end sys_sigsuspend - - .align 4 - .globl sys_rt_sigsuspend - .ent sys_rt_sigsuspend -sys_rt_sigsuspend: - .prologue 0 - mov $sp, $18 - br $1, do_switch_stack - mov $sp, $19 - subq $sp, 16, $sp - stq $26, 0($sp) - jsr $26, do_rt_sigsuspend - ldq $26, 0($sp) - lda $sp, SWITCH_STACK_SIZE+16($sp) - ret -.end sys_rt_sigsuspend - - .align 4 .globl sys_sethae .ent sys_sethae sys_sethae: @@ -929,15 +915,6 @@ sys_execve: .end sys_execve .align 4 - .globl osf_sigprocmask - .ent osf_sigprocmask -osf_sigprocmask: - .prologue 0 - mov $sp, $18 - jmp $31, sys_osf_sigprocmask -.end osf_sigprocmask - - .align 4 .globl alpha_ni_syscall .ent alpha_ni_syscall alpha_ni_syscall: diff --git a/arch/alpha/kernel/err_ev6.c b/arch/alpha/kernel/err_ev6.c index 8ca6345bf13..253cf1a8748 100644 --- a/arch/alpha/kernel/err_ev6.c +++ b/arch/alpha/kernel/err_ev6.c @@ -90,11 +90,13 @@ static int ev6_parse_cbox(u64 c_addr, u64 c1_syn, u64 c2_syn, u64 c_stat, u64 c_sts, int print) { - char *sourcename[] = { "UNKNOWN", "UNKNOWN", "UNKNOWN", - "MEMORY", "BCACHE", "DCACHE", - "BCACHE PROBE", "BCACHE PROBE" }; - char *streamname[] = { "D", "I" }; - char *bitsname[] = { "SINGLE", "DOUBLE" }; + static const char * const sourcename[] = { + "UNKNOWN", "UNKNOWN", "UNKNOWN", + "MEMORY", "BCACHE", "DCACHE", + "BCACHE PROBE", "BCACHE PROBE" + }; + static const char * const streamname[] = { "D", "I" }; + static const char * const bitsname[] = { "SINGLE", "DOUBLE" }; int status = MCHK_DISPOSITION_REPORT; int source = -1, stream = -1, bits = -1; diff --git a/arch/alpha/kernel/err_marvel.c b/arch/alpha/kernel/err_marvel.c index 5c905aaaecc..648ae88aeb8 100644 --- a/arch/alpha/kernel/err_marvel.c +++ b/arch/alpha/kernel/err_marvel.c @@ -589,22 +589,23 @@ marvel_print_pox_spl_cmplt(u64 spl_cmplt) static void marvel_print_pox_trans_sum(u64 trans_sum) { - char *pcix_cmd[] = { "Interrupt Acknowledge", - "Special Cycle", - "I/O Read", - "I/O Write", - "Reserved", - "Reserved / Device ID Message", - "Memory Read", - "Memory Write", - "Reserved / Alias to Memory Read Block", - "Reserved / Alias to Memory Write Block", - "Configuration Read", - "Configuration Write", - "Memory Read Multiple / Split Completion", - "Dual Address Cycle", - "Memory Read Line / Memory Read Block", - "Memory Write and Invalidate / Memory Write Block" + static const char * const pcix_cmd[] = { + "Interrupt Acknowledge", + "Special Cycle", + "I/O Read", + "I/O Write", + "Reserved", + "Reserved / Device ID Message", + "Memory Read", + "Memory Write", + "Reserved / Alias to Memory Read Block", + "Reserved / Alias to Memory Write Block", + "Configuration Read", + "Configuration Write", + "Memory Read Multiple / Split Completion", + "Dual Address Cycle", + "Memory Read Line / Memory Read Block", + "Memory Write and Invalidate / Memory Write Block" }; #define IO7__POX_TRANSUM__PCI_ADDR__S (0) diff --git a/arch/alpha/kernel/err_titan.c b/arch/alpha/kernel/err_titan.c index f7ed97ce0df..c3b3781a03d 100644 --- a/arch/alpha/kernel/err_titan.c +++ b/arch/alpha/kernel/err_titan.c @@ -75,8 +75,12 @@ titan_parse_p_serror(int which, u64 serror, int print) int status = MCHK_DISPOSITION_REPORT; #ifdef CONFIG_VERBOSE_MCHECK - char *serror_src[] = {"GPCI", "APCI", "AGP HP", "AGP LP"}; - char *serror_cmd[] = {"DMA Read", "DMA RMW", "SGTE Read", "Reserved"}; + static const char * const serror_src[] = { + "GPCI", "APCI", "AGP HP", "AGP LP" + }; + static const char * const serror_cmd[] = { + "DMA Read", "DMA RMW", "SGTE Read", "Reserved" + }; #endif /* CONFIG_VERBOSE_MCHECK */ #define TITAN__PCHIP_SERROR__LOST_UECC (1UL << 0) @@ -140,14 +144,15 @@ titan_parse_p_perror(int which, int port, u64 perror, int print) int status = MCHK_DISPOSITION_REPORT; #ifdef CONFIG_VERBOSE_MCHECK - char *perror_cmd[] = { "Interrupt Acknowledge", "Special Cycle", - "I/O Read", "I/O Write", - "Reserved", "Reserved", - "Memory Read", "Memory Write", - "Reserved", "Reserved", - "Configuration Read", "Configuration Write", - "Memory Read Multiple", "Dual Address Cycle", - "Memory Read Line","Memory Write and Invalidate" + static const char * const perror_cmd[] = { + "Interrupt Acknowledge", "Special Cycle", + "I/O Read", "I/O Write", + "Reserved", "Reserved", + "Memory Read", "Memory Write", + "Reserved", "Reserved", + "Configuration Read", "Configuration Write", + "Memory Read Multiple", "Dual Address Cycle", + "Memory Read Line", "Memory Write and Invalidate" }; #endif /* CONFIG_VERBOSE_MCHECK */ @@ -273,11 +278,11 @@ titan_parse_p_agperror(int which, u64 agperror, int print) int cmd, len; unsigned long addr; - char *agperror_cmd[] = { "Read (low-priority)", "Read (high-priority)", - "Write (low-priority)", - "Write (high-priority)", - "Reserved", "Reserved", - "Flush", "Fence" + static const char * const agperror_cmd[] = { + "Read (low-priority)", "Read (high-priority)", + "Write (low-priority)", "Write (high-priority)", + "Reserved", "Reserved", + "Flush", "Fence" }; #endif /* CONFIG_VERBOSE_MCHECK */ diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 5d1e6d6ce68..547e8b84b2f 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -15,7 +15,6 @@ #include <linux/kernel.h> #include <linux/mm.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/stddef.h> #include <linux/syscalls.h> #include <linux/unistd.h> @@ -69,7 +68,6 @@ SYSCALL_DEFINE4(osf_set_program_attributes, unsigned long, text_start, { struct mm_struct *mm; - lock_kernel(); mm = current->mm; mm->end_code = bss_start + bss_len; mm->start_brk = bss_start + bss_len; @@ -78,7 +76,6 @@ SYSCALL_DEFINE4(osf_set_program_attributes, unsigned long, text_start, printk("set_program_attributes(%lx %lx %lx %lx)\n", text_start, text_len, bss_start, bss_len); #endif - unlock_kernel(); return 0; } @@ -517,7 +514,6 @@ SYSCALL_DEFINE2(osf_proplist_syscall, enum pl_code, code, long error; int __user *min_buf_size_ptr; - lock_kernel(); switch (code) { case PL_SET: if (get_user(error, &args->set.nbytes)) @@ -547,7 +543,6 @@ SYSCALL_DEFINE2(osf_proplist_syscall, enum pl_code, code, error = -EOPNOTSUPP; break; }; - unlock_kernel(); return error; } @@ -594,7 +589,7 @@ SYSCALL_DEFINE2(osf_sigstack, struct sigstack __user *, uss, SYSCALL_DEFINE3(osf_sysinfo, int, command, char __user *, buf, long, count) { - char *sysinfo_table[] = { + const char *sysinfo_table[] = { utsname()->sysname, utsname()->nodename, utsname()->release, @@ -606,7 +601,7 @@ SYSCALL_DEFINE3(osf_sysinfo, int, command, char __user *, buf, long, count) "dummy", /* secure RPC domain */ }; unsigned long offset; - char *res; + const char *res; long len, err = -EINVAL; offset = command-1; diff --git a/arch/alpha/kernel/pci-sysfs.c b/arch/alpha/kernel/pci-sysfs.c index 738fc824e2e..b899e95f79f 100644 --- a/arch/alpha/kernel/pci-sysfs.c +++ b/arch/alpha/kernel/pci-sysfs.c @@ -66,7 +66,7 @@ static int pci_mmap_resource(struct kobject *kobj, { struct pci_dev *pdev = to_pci_dev(container_of(kobj, struct device, kobj)); - struct resource *res = (struct resource *)attr->private; + struct resource *res = attr->private; enum pci_mmap_state mmap_type; struct pci_bus_region bar; int i; diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 85d8e4f58c8..1cc49683fb6 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -307,7 +307,7 @@ again: new_raw_count) != prev_raw_count) goto again; - delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf; + delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf; /* It is possible on very rare occasions that the PMC has overflowed * but the interrupt is yet to come. Detect and fix this situation. @@ -402,14 +402,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc) struct hw_perf_event *hwc = &pe->hw; int idx = hwc->idx; - if (cpuc->current_idx[j] != PMC_NO_INDEX) { - cpuc->idx_mask |= (1<<cpuc->current_idx[j]); - continue; + if (cpuc->current_idx[j] == PMC_NO_INDEX) { + alpha_perf_event_set_period(pe, hwc, idx); + cpuc->current_idx[j] = idx; } - alpha_perf_event_set_period(pe, hwc, idx); - cpuc->current_idx[j] = idx; - cpuc->idx_mask |= (1<<cpuc->current_idx[j]); + if (!(hwc->state & PERF_HES_STOPPED)) + cpuc->idx_mask |= (1<<cpuc->current_idx[j]); } cpuc->config = cpuc->event[0]->hw.config_base; } @@ -420,12 +419,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc) * - this function is called from outside this module via the pmu struct * returned from perf event initialisation. */ -static int alpha_pmu_enable(struct perf_event *event) +static int alpha_pmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; int n0; int ret; - unsigned long flags; + unsigned long irq_flags; /* * The Sparc code has the IRQ disable first followed by the perf @@ -435,8 +435,8 @@ static int alpha_pmu_enable(struct perf_event *event) * nevertheless we disable the PMCs first to enable a potential * final PMI to occur before we disable interrupts. */ - perf_disable(); - local_irq_save(flags); + perf_pmu_disable(event->pmu); + local_irq_save(irq_flags); /* Default to error to be returned */ ret = -EAGAIN; @@ -455,8 +455,12 @@ static int alpha_pmu_enable(struct perf_event *event) } } - local_irq_restore(flags); - perf_enable(); + hwc->state = PERF_HES_UPTODATE; + if (!(flags & PERF_EF_START)) + hwc->state |= PERF_HES_STOPPED; + + local_irq_restore(irq_flags); + perf_pmu_enable(event->pmu); return ret; } @@ -467,15 +471,15 @@ static int alpha_pmu_enable(struct perf_event *event) * - this function is called from outside this module via the pmu struct * returned from perf event initialisation. */ -static void alpha_pmu_disable(struct perf_event *event) +static void alpha_pmu_del(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - unsigned long flags; + unsigned long irq_flags; int j; - perf_disable(); - local_irq_save(flags); + perf_pmu_disable(event->pmu); + local_irq_save(irq_flags); for (j = 0; j < cpuc->n_events; j++) { if (event == cpuc->event[j]) { @@ -501,8 +505,8 @@ static void alpha_pmu_disable(struct perf_event *event) } } - local_irq_restore(flags); - perf_enable(); + local_irq_restore(irq_flags); + perf_pmu_enable(event->pmu); } @@ -514,13 +518,44 @@ static void alpha_pmu_read(struct perf_event *event) } -static void alpha_pmu_unthrottle(struct perf_event *event) +static void alpha_pmu_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (!(hwc->state & PERF_HES_STOPPED)) { + cpuc->idx_mask &= ~(1UL<<hwc->idx); + hwc->state |= PERF_HES_STOPPED; + } + + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + alpha_perf_event_update(event, hwc, hwc->idx, 0); + hwc->state |= PERF_HES_UPTODATE; + } + + if (cpuc->enabled) + wrperfmon(PERFMON_CMD_DISABLE, (1UL<<hwc->idx)); +} + + +static void alpha_pmu_start(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + if (flags & PERF_EF_RELOAD) { + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + alpha_perf_event_set_period(event, hwc, hwc->idx); + } + + hwc->state = 0; + cpuc->idx_mask |= 1UL<<hwc->idx; - wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx)); + if (cpuc->enabled) + wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx)); } @@ -642,39 +677,36 @@ static int __hw_perf_event_init(struct perf_event *event) return 0; } -static const struct pmu pmu = { - .enable = alpha_pmu_enable, - .disable = alpha_pmu_disable, - .read = alpha_pmu_read, - .unthrottle = alpha_pmu_unthrottle, -}; - - /* * Main entry point to initialise a HW performance event. */ -const struct pmu *hw_perf_event_init(struct perf_event *event) +static int alpha_pmu_event_init(struct perf_event *event) { int err; + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + + default: + return -ENOENT; + } + if (!alpha_pmu) - return ERR_PTR(-ENODEV); + return -ENODEV; /* Do the real initialisation work. */ err = __hw_perf_event_init(event); - if (err) - return ERR_PTR(err); - - return &pmu; + return err; } - - /* * Main entry point - enable HW performance counters. */ -void hw_perf_enable(void) +static void alpha_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -700,7 +732,7 @@ void hw_perf_enable(void) * Main entry point - disable HW performance counters. */ -void hw_perf_disable(void) +static void alpha_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -713,6 +745,17 @@ void hw_perf_disable(void) wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask); } +static struct pmu pmu = { + .pmu_enable = alpha_pmu_enable, + .pmu_disable = alpha_pmu_disable, + .event_init = alpha_pmu_event_init, + .add = alpha_pmu_add, + .del = alpha_pmu_del, + .start = alpha_pmu_start, + .stop = alpha_pmu_stop, + .read = alpha_pmu_read, +}; + /* * Main entry point - don't know when this is called but it @@ -766,7 +809,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr, wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask); /* la_ptr is the counter that overflowed. */ - if (unlikely(la_ptr >= perf_max_events)) { + if (unlikely(la_ptr >= alpha_pmu->num_pmcs)) { /* This should never occur! */ irq_err_count++; pr_warning("PMI: silly index %ld\n", la_ptr); @@ -807,7 +850,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr, /* Interrupts coming too quickly; "throttle" the * counter, i.e., disable it for a little while. */ - cpuc->idx_mask &= ~(1UL<<idx); + alpha_pmu_stop(event, 0); } } wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask); @@ -837,6 +880,7 @@ void __init init_hw_perf_events(void) /* And set up PMU specification */ alpha_pmu = &ev67_pmu; - perf_max_events = alpha_pmu->num_pmcs; + + perf_pmu_register(&pmu); } diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 842dba308ea..3ec35066f1d 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -356,7 +356,7 @@ dump_elf_thread(elf_greg_t *dest, struct pt_regs *pt, struct thread_info *ti) dest[27] = pt->r27; dest[28] = pt->r28; dest[29] = pt->gp; - dest[30] = rdusp(); + dest[30] = ti == current_thread_info() ? rdusp() : ti->pcb.usp; dest[31] = pt->pc; /* Once upon a time this was the PS value. Which is stupid diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c index 0932dbb1ef8..6f7feb5db27 100644 --- a/arch/alpha/kernel/signal.c +++ b/arch/alpha/kernel/signal.c @@ -41,46 +41,20 @@ static void do_signal(struct pt_regs *, struct switch_stack *, /* * The OSF/1 sigprocmask calling sequence is different from the * C sigprocmask() sequence.. - * - * how: - * 1 - SIG_BLOCK - * 2 - SIG_UNBLOCK - * 3 - SIG_SETMASK - * - * We change the range to -1 .. 1 in order to let gcc easily - * use the conditional move instructions. - * - * Note that we don't need to acquire the kernel lock for SMP - * operation, as all of this is local to this thread. */ -SYSCALL_DEFINE3(osf_sigprocmask, int, how, unsigned long, newmask, - struct pt_regs *, regs) +SYSCALL_DEFINE2(osf_sigprocmask, int, how, unsigned long, newmask) { - unsigned long oldmask = -EINVAL; - - if ((unsigned long)how-1 <= 2) { - long sign = how-2; /* -1 .. 1 */ - unsigned long block, unblock; - - newmask &= _BLOCKABLE; - spin_lock_irq(¤t->sighand->siglock); - oldmask = current->blocked.sig[0]; - - unblock = oldmask & ~newmask; - block = oldmask | newmask; - if (!sign) - block = unblock; - if (sign <= 0) - newmask = block; - if (_NSIG_WORDS > 1 && sign > 0) - sigemptyset(¤t->blocked); - current->blocked.sig[0] = newmask; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - - regs->r0 = 0; /* special no error return */ + sigset_t oldmask; + sigset_t mask; + unsigned long res; + + siginitset(&mask, newmask & _BLOCKABLE); + res = sigprocmask(how, &mask, &oldmask); + if (!res) { + force_successful_syscall_return(); + res = oldmask.sig[0]; } - return oldmask; + return res; } SYSCALL_DEFINE3(osf_sigaction, int, sig, @@ -94,9 +68,9 @@ SYSCALL_DEFINE3(osf_sigaction, int, sig, old_sigset_t mask; if (!access_ok(VERIFY_READ, act, sizeof(*act)) || __get_user(new_ka.sa.sa_handler, &act->sa_handler) || - __get_user(new_ka.sa.sa_flags, &act->sa_flags)) + __get_user(new_ka.sa.sa_flags, &act->sa_flags) || + __get_user(mask, &act->sa_mask)) return -EFAULT; - __get_user(mask, &act->sa_mask); siginitset(&new_ka.sa.sa_mask, mask); new_ka.ka_restorer = NULL; } @@ -106,9 +80,9 @@ SYSCALL_DEFINE3(osf_sigaction, int, sig, if (!ret && oact) { if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || - __put_user(old_ka.sa.sa_flags, &oact->sa_flags)) + __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) return -EFAULT; - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); } return ret; @@ -144,8 +118,7 @@ SYSCALL_DEFINE5(rt_sigaction, int, sig, const struct sigaction __user *, act, /* * Atomically swap in the new signal mask, and wait for a signal. */ -asmlinkage int -do_sigsuspend(old_sigset_t mask, struct pt_regs *regs, struct switch_stack *sw) +SYSCALL_DEFINE1(sigsuspend, old_sigset_t, mask) { mask &= _BLOCKABLE; spin_lock_irq(¤t->sighand->siglock); @@ -154,41 +127,6 @@ do_sigsuspend(old_sigset_t mask, struct pt_regs *regs, struct switch_stack *sw) recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - /* Indicate EINTR on return from any possible signal handler, - which will not come back through here, but via sigreturn. */ - regs->r0 = EINTR; - regs->r19 = 1; - - current->state = TASK_INTERRUPTIBLE; - schedule(); - set_thread_flag(TIF_RESTORE_SIGMASK); - return -ERESTARTNOHAND; -} - -asmlinkage int -do_rt_sigsuspend(sigset_t __user *uset, size_t sigsetsize, - struct pt_regs *regs, struct switch_stack *sw) -{ - sigset_t set; - - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (sigsetsize != sizeof(sigset_t)) - return -EINVAL; - if (copy_from_user(&set, uset, sizeof(set))) - return -EFAULT; - - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->saved_sigmask = current->blocked; - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - - /* Indicate EINTR on return from any possible signal handler, - which will not come back through here, but via sigreturn. */ - regs->r0 = EINTR; - regs->r19 = 1; - current->state = TASK_INTERRUPTIBLE; schedule(); set_thread_flag(TIF_RESTORE_SIGMASK); @@ -239,6 +177,8 @@ restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned long usp; long i, err = __get_user(regs->pc, &sc->sc_pc); + current_thread_info()->restart_block.fn = do_no_restart_syscall; + sw->r26 = (unsigned long) ret_from_sys_call; err |= __get_user(regs->r0, sc->sc_regs+0); @@ -591,7 +531,6 @@ syscall_restart(unsigned long r0, unsigned long r19, regs->pc -= 4; break; case ERESTART_RESTARTBLOCK: - current_thread_info()->restart_block.fn = do_no_restart_syscall; regs->r0 = EINTR; break; } diff --git a/arch/alpha/kernel/srm_env.c b/arch/alpha/kernel/srm_env.c index 4afc1a1e2e5..f0df3fbd840 100644 --- a/arch/alpha/kernel/srm_env.c +++ b/arch/alpha/kernel/srm_env.c @@ -87,7 +87,7 @@ static int srm_env_proc_show(struct seq_file *m, void *v) srm_env_t *entry; char *page; - entry = (srm_env_t *)m->private; + entry = m->private; page = (char *)__get_free_page(GFP_USER); if (!page) return -ENOMEM; diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S index 09acb786e72..a6a1de9db16 100644 --- a/arch/alpha/kernel/systbls.S +++ b/arch/alpha/kernel/systbls.S @@ -58,7 +58,7 @@ sys_call_table: .quad sys_open /* 45 */ .quad alpha_ni_syscall .quad sys_getxgid - .quad osf_sigprocmask + .quad sys_osf_sigprocmask .quad alpha_ni_syscall .quad alpha_ni_syscall /* 50 */ .quad sys_acct @@ -512,6 +512,9 @@ sys_call_table: .quad sys_pwritev .quad sys_rt_tgsigqueueinfo .quad sys_perf_event_open + .quad sys_fanotify_init + .quad sys_fanotify_mark /* 495 */ + .quad sys_prlimit64 .size sys_call_table, . - sys_call_table .type sys_call_table, @object diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c index eacceb26d9c..0f1d8493cfc 100644 --- a/arch/alpha/kernel/time.c +++ b/arch/alpha/kernel/time.c @@ -41,7 +41,7 @@ #include <linux/init.h> #include <linux/bcd.h> #include <linux/profile.h> -#include <linux/perf_event.h> +#include <linux/irq_work.h> #include <asm/uaccess.h> #include <asm/io.h> @@ -83,25 +83,25 @@ static struct { unsigned long est_cycle_freq; -#ifdef CONFIG_PERF_EVENTS +#ifdef CONFIG_IRQ_WORK -DEFINE_PER_CPU(u8, perf_event_pending); +DEFINE_PER_CPU(u8, irq_work_pending); -#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1 -#define test_perf_event_pending() __get_cpu_var(perf_event_pending) -#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 +#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1 +#define test_irq_work_pending() __get_cpu_var(irq_work_pending) +#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0 -void set_perf_event_pending(void) +void set_irq_work_pending(void) { - set_perf_event_pending_flag(); + set_irq_work_pending_flag(); } -#else /* CONFIG_PERF_EVENTS */ +#else /* CONFIG_IRQ_WORK */ -#define test_perf_event_pending() 0 -#define clear_perf_event_pending() +#define test_irq_work_pending() 0 +#define clear_irq_work_pending() -#endif /* CONFIG_PERF_EVENTS */ +#endif /* CONFIG_IRQ_WORK */ static inline __u32 rpcc(void) @@ -191,16 +191,16 @@ irqreturn_t timer_interrupt(int irq, void *dev) write_sequnlock(&xtime_lock); + if (test_irq_work_pending()) { + clear_irq_work_pending(); + irq_work_run(); + } + #ifndef CONFIG_SMP while (nticks--) update_process_times(user_mode(get_irq_regs())); #endif - if (test_perf_event_pending()) { - clear_perf_event_pending(); - perf_event_do_pending(); - } - return IRQ_HANDLED; } diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index b14f015008a..0414e021a91 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -13,7 +13,6 @@ #include <linux/sched.h> #include <linux/tty.h> #include <linux/delay.h> -#include <linux/smp_lock.h> #include <linux/module.h> #include <linux/init.h> #include <linux/kallsyms.h> @@ -623,7 +622,6 @@ do_entUna(void * va, unsigned long opcode, unsigned long reg, return; } - lock_kernel(); printk("Bad unaligned kernel access at %016lx: %p %lx %lu\n", pc, va, opcode, reg); do_exit(SIGSEGV); @@ -646,7 +644,6 @@ got_exception: * Yikes! No one to forward the exception to. * Since the registers are in a weird format, dump them ourselves. */ - lock_kernel(); printk("%s(%d): unhandled unaligned exception\n", current->comm, task_pid_nr(current)); diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 553b7cf17bf..9103904b3da 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -23,6 +23,7 @@ config ARM select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZO select HAVE_KERNEL_LZMA + select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select PERF_USE_VMALLOC select HAVE_REGS_AND_STACK_ACCESS_API @@ -271,7 +272,6 @@ config ARCH_AT91 bool "Atmel AT91" select ARCH_REQUIRE_GPIOLIB select HAVE_CLK - select ARCH_USES_GETTIMEOFFSET help This enables support for systems based on the Atmel AT91RM9200, AT91SAM9 and AT91CAP9 processors. @@ -1051,6 +1051,32 @@ config ARM_ERRATA_460075 ACTLR register. Note that setting specific bits in the ACTLR register may not be available in non-secure mode. +config ARM_ERRATA_742230 + bool "ARM errata: DMB operation may be faulty" + depends on CPU_V7 && SMP + help + This option enables the workaround for the 742230 Cortex-A9 + (r1p0..r2p2) erratum. Under rare circumstances, a DMB instruction + between two write operations may not ensure the correct visibility + ordering of the two writes. This workaround sets a specific bit in + the diagnostic register of the Cortex-A9 which causes the DMB + instruction to behave as a DSB, ensuring the correct behaviour of + the two writes. + +config ARM_ERRATA_742231 + bool "ARM errata: Incorrect hazard handling in the SCU may lead to data corruption" + depends on CPU_V7 && SMP + help + This option enables the workaround for the 742231 Cortex-A9 + (r2p0..r2p2) erratum. Under certain conditions, specific to the + Cortex-A9 MPCore micro-architecture, two CPUs working in SMP mode, + accessing some data located in the same cache line, may get corrupted + data due to bad handling of the address hazard when the line gets + replaced from one of the CPUs at the same time as another CPU is + accessing it. This workaround sets specific bits in the diagnostic + register of the Cortex-A9 which reduces the linefill issuing + capabilities of the processor. + config PL310_ERRATA_588369 bool "Clean & Invalidate maintenance operations do not invalidate clean lines" depends on CACHE_L2X0 && ARCH_OMAP4 @@ -1076,6 +1102,20 @@ config ARM_ERRATA_720789 invalidated are not, resulting in an incoherency in the system page tables. The workaround changes the TLB flushing routines to invalidate entries regardless of the ASID. + +config ARM_ERRATA_743622 + bool "ARM errata: Faulty hazard checking in the Store Buffer may lead to data corruption" + depends on CPU_V7 + help + This option enables the workaround for the 743622 Cortex-A9 + (r2p0..r2p2) erratum. Under very rare conditions, a faulty + optimisation in the Cortex-A9 Store Buffer may lead to data + corruption. This workaround sets a specific bit in the diagnostic + register of the Cortex-A9 which disables the Store Buffer + optimisation, preventing the defect from occurring. This has no + visible impact on the overall performance or power consumption of the + processor. + endmenu source "arch/arm/common/Kconfig" diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index b23f6bc46cf..65a7c1c588a 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -116,5 +116,5 @@ CFLAGS_font.o := -Dstatic= $(obj)/font.c: $(FONTC) $(call cmd,shipped) -$(obj)/vmlinux.lds: $(obj)/vmlinux.lds.in arch/arm/boot/Makefile .config +$(obj)/vmlinux.lds: $(obj)/vmlinux.lds.in arch/arm/boot/Makefile $(KCONFIG_CONFIG) @sed "$(SEDFLAGS)" < $< > $@ diff --git a/arch/arm/common/it8152.c b/arch/arm/common/it8152.c index 7974baacafc..1bec96e8519 100644 --- a/arch/arm/common/it8152.c +++ b/arch/arm/common/it8152.c @@ -271,6 +271,14 @@ int dma_needs_bounce(struct device *dev, dma_addr_t dma_addr, size_t size) ((dma_addr + size - PHYS_OFFSET) >= SZ_64M); } +int dma_set_coherent_mask(struct device *dev, u64 mask) +{ + if (mask >= PHYS_OFFSET + SZ_64M - 1) + return 0; + + return -EIO; +} + int __init it8152_pci_setup(int nr, struct pci_sys_data *sys) { it8152_io.start = IT8152_IO_BASE + 0x12000; diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index b5799a3b711..c4aa4e8c6af 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h @@ -12,18 +12,6 @@ #ifndef __ARM_PERF_EVENT_H__ #define __ARM_PERF_EVENT_H__ -/* - * NOP: on *most* (read: all supported) ARM platforms, the performance - * counter interrupts are regular interrupts and not an NMI. This - * means that when we receive the interrupt we can call - * perf_event_do_pending() that handles all of the work with - * interrupts disabled. - */ -static inline void -set_perf_event_pending(void) -{ -} - /* ARM performance counters start from 1 (in the cp15 accesses) so use the * same indexes here for consistency. */ #define PERF_EVENT_INDEX_OFFSET 1 diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index ab68cf1ef80..e90b167ea84 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -317,6 +317,10 @@ static inline pte_t pte_mkspecial(pte_t pte) { return pte; } #ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE #define pgprot_dmacoherent(prot) \ __pgprot_modify(prot, L_PTE_MT_MASK|L_PTE_EXEC, L_PTE_MT_BUFFERABLE) +#define __HAVE_PHYS_MEM_ACCESS_PROT +struct file; +extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot); #else #define pgprot_dmacoherent(prot) \ __pgprot_modify(prot, L_PTE_MT_MASK|L_PTE_EXEC, L_PTE_MT_UNCACHED) diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 1b560825e1c..7885722bdf4 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -48,6 +48,8 @@ work_pending: beq no_work_pending mov r0, sp @ 'regs' mov r2, why @ 'syscall' + tst r1, #_TIF_SIGPENDING @ delivering a signal? + movne why, #0 @ prevent further restarts bl do_notify_resume b ret_slow_syscall @ Check work again diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c index 8bccbfa693f..2c1f0050c9c 100644 --- a/arch/arm/kernel/kprobes-decode.c +++ b/arch/arm/kernel/kprobes-decode.c @@ -1162,11 +1162,12 @@ space_cccc_001x(kprobe_opcode_t insn, struct arch_specific_insn *asi) { /* * MSR : cccc 0011 0x10 xxxx xxxx xxxx xxxx xxxx - * Undef : cccc 0011 0x00 xxxx xxxx xxxx xxxx xxxx + * Undef : cccc 0011 0100 xxxx xxxx xxxx xxxx xxxx * ALU op with S bit and Rd == 15 : * cccc 001x xxx1 xxxx 1111 xxxx xxxx xxxx */ - if ((insn & 0x0f900000) == 0x03200000 || /* MSR & Undef */ + if ((insn & 0x0fb00000) == 0x03200000 || /* MSR */ + (insn & 0x0ff00000) == 0x03400000 || /* Undef */ (insn & 0x0e10f000) == 0x0210f000) /* ALU s-bit, R15 */ return INSN_REJECTED; @@ -1177,7 +1178,7 @@ space_cccc_001x(kprobe_opcode_t insn, struct arch_specific_insn *asi) * *S (bit 20) updates condition codes * ADC/SBC/RSC reads the C flag */ - insn &= 0xfff00fff; /* Rn = r0, Rd = r0 */ + insn &= 0xffff0fff; /* Rd = r0 */ asi->insn[0] = insn; asi->insn_handler = (insn & (1 << 20)) ? /* S-bit */ emulate_alu_imm_rwflags : emulate_alu_imm_rflags; diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index ecbb0288e5d..49643b1467e 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -123,6 +123,12 @@ armpmu_get_max_events(void) } EXPORT_SYMBOL_GPL(armpmu_get_max_events); +int perf_num_counters(void) +{ + return armpmu_get_max_events(); +} +EXPORT_SYMBOL_GPL(perf_num_counters); + #define HW_OP_UNSUPPORTED 0xFFFF #define C(_x) \ @@ -221,46 +227,56 @@ again: } static void -armpmu_disable(struct perf_event *event) +armpmu_read(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - WARN_ON(idx < 0); - - clear_bit(idx, cpuc->active_mask); - armpmu->disable(hwc, idx); - - barrier(); - armpmu_event_update(event, hwc, idx); - cpuc->events[idx] = NULL; - clear_bit(idx, cpuc->used_mask); + /* Don't read disabled counters! */ + if (hwc->idx < 0) + return; - perf_event_update_userpage(event); + armpmu_event_update(event, hwc, hwc->idx); } static void -armpmu_read(struct perf_event *event) +armpmu_stop(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; - /* Don't read disabled counters! */ - if (hwc->idx < 0) + if (!armpmu) return; - armpmu_event_update(event, hwc, hwc->idx); + /* + * ARM pmu always has to update the counter, so ignore + * PERF_EF_UPDATE, see comments in armpmu_start(). + */ + if (!(hwc->state & PERF_HES_STOPPED)) { + armpmu->disable(hwc, hwc->idx); + barrier(); /* why? */ + armpmu_event_update(event, hwc, hwc->idx); + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + } } static void -armpmu_unthrottle(struct perf_event *event) +armpmu_start(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; + if (!armpmu) + return; + + /* + * ARM pmu always has to reprogram the period, so ignore + * PERF_EF_RELOAD, see the comment below. + */ + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + + hwc->state = 0; /* * Set the period again. Some counters can't be stopped, so when we - * were throttled we simply disabled the IRQ source and the counter + * were stopped we simply disabled the IRQ source and the counter * may have been left counting. If we don't do this step then we may * get an interrupt too soon or *way* too late if the overflow has * happened since disabling. @@ -269,14 +285,33 @@ armpmu_unthrottle(struct perf_event *event) armpmu->enable(hwc, hwc->idx); } +static void +armpmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + WARN_ON(idx < 0); + + clear_bit(idx, cpuc->active_mask); + armpmu_stop(event, PERF_EF_UPDATE); + cpuc->events[idx] = NULL; + clear_bit(idx, cpuc->used_mask); + + perf_event_update_userpage(event); +} + static int -armpmu_enable(struct perf_event *event) +armpmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx; int err = 0; + perf_pmu_disable(event->pmu); + /* If we don't have a space for the counter then finish early. */ idx = armpmu->get_event_idx(cpuc, hwc); if (idx < 0) { @@ -293,25 +328,19 @@ armpmu_enable(struct perf_event *event) cpuc->events[idx] = event; set_bit(idx, cpuc->active_mask); - /* Set the period for the event. */ - armpmu_event_set_period(event, hwc, idx); - - /* Enable the event. */ - armpmu->enable(hwc, idx); + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + if (flags & PERF_EF_START) + armpmu_start(event, PERF_EF_RELOAD); /* Propagate our changes to the userspace mapping. */ perf_event_update_userpage(event); out: + perf_pmu_enable(event->pmu); return err; } -static struct pmu pmu = { - .enable = armpmu_enable, - .disable = armpmu_disable, - .unthrottle = armpmu_unthrottle, - .read = armpmu_read, -}; +static struct pmu pmu; static int validate_event(struct cpu_hw_events *cpuc, @@ -491,20 +520,29 @@ __hw_perf_event_init(struct perf_event *event) return err; } -const struct pmu * -hw_perf_event_init(struct perf_event *event) +static int armpmu_event_init(struct perf_event *event) { int err = 0; + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + + default: + return -ENOENT; + } + if (!armpmu) - return ERR_PTR(-ENODEV); + return -ENODEV; event->destroy = hw_perf_event_destroy; if (!atomic_inc_not_zero(&active_events)) { - if (atomic_read(&active_events) > perf_max_events) { + if (atomic_read(&active_events) > armpmu->num_events) { atomic_dec(&active_events); - return ERR_PTR(-ENOSPC); + return -ENOSPC; } mutex_lock(&pmu_reserve_mutex); @@ -518,17 +556,16 @@ hw_perf_event_init(struct perf_event *event) } if (err) - return ERR_PTR(err); + return err; err = __hw_perf_event_init(event); if (err) hw_perf_event_destroy(event); - return err ? ERR_PTR(err) : &pmu; + return err; } -void -hw_perf_enable(void) +static void armpmu_enable(struct pmu *pmu) { /* Enable all of the perf events on hardware. */ int idx; @@ -549,13 +586,23 @@ hw_perf_enable(void) armpmu->start(); } -void -hw_perf_disable(void) +static void armpmu_disable(struct pmu *pmu) { if (armpmu) armpmu->stop(); } +static struct pmu pmu = { + .pmu_enable = armpmu_enable, + .pmu_disable = armpmu_disable, + .event_init = armpmu_event_init, + .add = armpmu_add, + .del = armpmu_del, + .start = armpmu_start, + .stop = armpmu_stop, + .read = armpmu_read, +}; + /* * ARMv6 Performance counter handling code. * @@ -1045,7 +1092,7 @@ armv6pmu_handle_irq(int irq_num, * platforms that can have the PMU interrupts raised as an NMI, this * will not work. */ - perf_event_do_pending(); + irq_work_run(); return IRQ_HANDLED; } @@ -2021,7 +2068,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) * platforms that can have the PMU interrupts raised as an NMI, this * will not work. */ - perf_event_do_pending(); + irq_work_run(); return IRQ_HANDLED; } @@ -2389,7 +2436,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev) armpmu->disable(hwc, idx); } - perf_event_do_pending(); + irq_work_run(); /* * Re-enable the PMU. @@ -2716,7 +2763,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev) armpmu->disable(hwc, idx); } - perf_event_do_pending(); + irq_work_run(); /* * Re-enable the PMU. @@ -2933,14 +2980,12 @@ init_hw_perf_events(void) armpmu = &armv6pmu; memcpy(armpmu_perf_cache_map, armv6_perf_cache_map, sizeof(armv6_perf_cache_map)); - perf_max_events = armv6pmu.num_events; break; case 0xB020: /* ARM11mpcore */ armpmu = &armv6mpcore_pmu; memcpy(armpmu_perf_cache_map, armv6mpcore_perf_cache_map, sizeof(armv6mpcore_perf_cache_map)); - perf_max_events = armv6mpcore_pmu.num_events; break; case 0xC080: /* Cortex-A8 */ armv7pmu.id = ARM_PERF_PMU_ID_CA8; @@ -2952,7 +2997,6 @@ init_hw_perf_events(void) /* Reset PMNC and read the nb of CNTx counters supported */ armv7pmu.num_events = armv7_reset_read_pmnc(); - perf_max_events = armv7pmu.num_events; break; case 0xC090: /* Cortex-A9 */ armv7pmu.id = ARM_PERF_PMU_ID_CA9; @@ -2964,7 +3008,6 @@ init_hw_perf_events(void) /* Reset PMNC and read the nb of CNTx counters supported */ armv7pmu.num_events = armv7_reset_read_pmnc(); - perf_max_events = armv7pmu.num_events; break; } /* Intel CPUs [xscale]. */ @@ -2975,13 +3018,11 @@ init_hw_perf_events(void) armpmu = &xscale1pmu; memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, sizeof(xscale_perf_cache_map)); - perf_max_events = xscale1pmu.num_events; break; case 2: armpmu = &xscale2pmu; memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, sizeof(xscale_perf_cache_map)); - perf_max_events = xscale2pmu.num_events; break; } } @@ -2991,9 +3032,10 @@ init_hw_perf_events(void) arm_pmu_names[armpmu->id], armpmu->num_events); } else { pr_info("no hardware support available\n"); - perf_max_events = -1; } + perf_pmu_register(&pmu); + return 0; } arch_initcall(init_hw_perf_events); @@ -3001,13 +3043,6 @@ arch_initcall(init_hw_perf_events); /* * Callchain handling code. */ -static inline void -callchain_store(struct perf_callchain_entry *entry, - u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} /* * The registers we're interested in are at the end of the variable @@ -3039,7 +3074,7 @@ user_backtrace(struct frame_tail *tail, if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail))) return NULL; - callchain_store(entry, buftail.lr); + perf_callchain_store(entry, buftail.lr); /* * Frame pointers should strictly progress back up the stack @@ -3051,16 +3086,11 @@ user_backtrace(struct frame_tail *tail, return buftail.fp - 1; } -static void -perf_callchain_user(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { struct frame_tail *tail; - callchain_store(entry, PERF_CONTEXT_USER); - - if (!user_mode(regs)) - regs = task_pt_regs(current); tail = (struct frame_tail *)regs->ARM_fp - 1; @@ -3078,56 +3108,18 @@ callchain_trace(struct stackframe *fr, void *data) { struct perf_callchain_entry *entry = data; - callchain_store(entry, fr->pc); + perf_callchain_store(entry, fr->pc); return 0; } -static void -perf_callchain_kernel(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { struct stackframe fr; - callchain_store(entry, PERF_CONTEXT_KERNEL); fr.fp = regs->ARM_fp; fr.sp = regs->ARM_sp; fr.lr = regs->ARM_lr; fr.pc = regs->ARM_pc; walk_stackframe(&fr, callchain_trace, entry); } - -static void -perf_do_callchain(struct pt_regs *regs, - struct perf_callchain_entry *entry) -{ - int is_user; - - if (!regs) - return; - - is_user = user_mode(regs); - - if (!current || !current->pid) - return; - - if (is_user && current->state != TASK_RUNNING) - return; - - if (!is_user) - perf_callchain_kernel(regs, entry); - - if (current->mm) - perf_callchain_user(regs, entry); -} - -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); - -struct perf_callchain_entry * -perf_callchain(struct pt_regs *regs) -{ - struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry); - - entry->nr = 0; - perf_do_callchain(regs, entry); - return entry; -} diff --git a/arch/arm/mach-at91/at91sam9g45_devices.c b/arch/arm/mach-at91/at91sam9g45_devices.c index 5e71ccd5e7d..1276babf84d 100644 --- a/arch/arm/mach-at91/at91sam9g45_devices.c +++ b/arch/arm/mach-at91/at91sam9g45_devices.c @@ -426,7 +426,7 @@ static struct i2c_gpio_platform_data pdata_i2c0 = { .sda_is_open_drain = 1, .scl_pin = AT91_PIN_PA21, .scl_is_open_drain = 1, - .udelay = 2, /* ~100 kHz */ + .udelay = 5, /* ~100 kHz */ }; static struct platform_device at91sam9g45_twi0_device = { @@ -440,7 +440,7 @@ static struct i2c_gpio_platform_data pdata_i2c1 = { .sda_is_open_drain = 1, .scl_pin = AT91_PIN_PB11, .scl_is_open_drain = 1, - .udelay = 2, /* ~100 kHz */ + .udelay = 5, /* ~100 kHz */ }; static struct platform_device at91sam9g45_twi1_device = { diff --git a/arch/arm/mach-at91/include/mach/system.h b/arch/arm/mach-at91/include/mach/system.h index c80e090b367..ee8db152592 100644 --- a/arch/arm/mach-at91/include/mach/system.h +++ b/arch/arm/mach-at91/include/mach/system.h @@ -28,17 +28,16 @@ static inline void arch_idle(void) { -#ifndef CONFIG_DEBUG_KERNEL /* * Disable the processor clock. The processor will be automatically * re-enabled by an interrupt or by a reset. */ at91_sys_write(AT91_PMC_SCDR, AT91_PMC_PCK); -#else +#ifndef CONFIG_CPU_ARM920T /* * Set the processor (CP15) into 'Wait for Interrupt' mode. - * Unlike disabling the processor clock via the PMC (above) - * this allows the processor to be woken via JTAG. + * Post-RM9200 processors need this in conjunction with the above + * to save power when idle. */ cpu_do_idle(); #endif diff --git a/arch/arm/mach-bcmring/dma.c b/arch/arm/mach-bcmring/dma.c index 29c0a911df2..77eb35c89cd 100644 --- a/arch/arm/mach-bcmring/dma.c +++ b/arch/arm/mach-bcmring/dma.c @@ -691,7 +691,7 @@ int dma_init(void) memset(&gDMA, 0, sizeof(gDMA)); - init_MUTEX_LOCKED(&gDMA.lock); + sema_init(&gDMA.lock, 0); init_waitqueue_head(&gDMA.freeChannelQ); /* Initialize the Hardware */ @@ -1574,7 +1574,7 @@ int dma_init_mem_map(DMA_MemMap_t *memMap) { memset(memMap, 0, sizeof(*memMap)); - init_MUTEX(&memMap->lock); + sema_init(&memMap->lock, 1); return 0; } diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c index 3d996b659ff..9be261beae7 100644 --- a/arch/arm/mach-davinci/dm355.c +++ b/arch/arm/mach-davinci/dm355.c @@ -769,8 +769,7 @@ static struct map_desc dm355_io_desc[] = { .virtual = SRAM_VIRT, .pfn = __phys_to_pfn(0x00010000), .length = SZ_32K, - /* MT_MEMORY_NONCACHED requires supersection alignment */ - .type = MT_DEVICE, + .type = MT_MEMORY_NONCACHED, }, }; diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c index 6b6f4c64370..7781e35daec 100644 --- a/arch/arm/mach-davinci/dm365.c +++ b/arch/arm/mach-davinci/dm365.c @@ -969,8 +969,7 @@ static struct map_desc dm365_io_desc[] = { .virtual = SRAM_VIRT, .pfn = __phys_to_pfn(0x00010000), .length = SZ_32K, - /* MT_MEMORY_NONCACHED requires supersection alignment */ - .type = MT_DEVICE, + .type = MT_MEMORY_NONCACHED, }, }; diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c index 40fec315c99..5e5b0a7831f 100644 --- a/arch/arm/mach-davinci/dm644x.c +++ b/arch/arm/mach-davinci/dm644x.c @@ -653,8 +653,7 @@ static struct map_desc dm644x_io_desc[] = { .virtual = SRAM_VIRT, .pfn = __phys_to_pfn(0x00008000), .length = SZ_16K, - /* MT_MEMORY_NONCACHED requires supersection alignment */ - .type = MT_DEVICE, + .type = MT_MEMORY_NONCACHED, }, }; diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c index e4a3df1872a..26e8a9c7f50 100644 --- a/arch/arm/mach-davinci/dm646x.c +++ b/arch/arm/mach-davinci/dm646x.c @@ -737,8 +737,7 @@ static struct map_desc dm646x_io_desc[] = { .virtual = SRAM_VIRT, .pfn = __phys_to_pfn(0x00010000), .length = SZ_32K, - /* MT_MEMORY_NONCACHED requires supersection alignment */ - .type = MT_DEVICE, + .type = MT_MEMORY_NONCACHED, }, }; diff --git a/arch/arm/mach-dove/include/mach/io.h b/arch/arm/mach-dove/include/mach/io.h index 3b3e4721ce2..eb4936ff90a 100644 --- a/arch/arm/mach-dove/include/mach/io.h +++ b/arch/arm/mach-dove/include/mach/io.h @@ -13,8 +13,8 @@ #define IO_SPACE_LIMIT 0xffffffff -#define __io(a) ((void __iomem *)(((a) - DOVE_PCIE0_IO_PHYS_BASE) +\ - DOVE_PCIE0_IO_VIRT_BASE)) -#define __mem_pci(a) (a) +#define __io(a) ((void __iomem *)(((a) - DOVE_PCIE0_IO_BUS_BASE) + \ + DOVE_PCIE0_IO_VIRT_BASE)) +#define __mem_pci(a) (a) #endif diff --git a/arch/arm/mach-ep93xx/dma-m2p.c b/arch/arm/mach-ep93xx/dma-m2p.c index 8904ca4e2e2..a696d354b1f 100644 --- a/arch/arm/mach-ep93xx/dma-m2p.c +++ b/arch/arm/mach-ep93xx/dma-m2p.c @@ -276,7 +276,7 @@ static void channel_disable(struct m2p_channel *ch) v &= ~(M2P_CONTROL_STALL_IRQ_EN | M2P_CONTROL_NFB_IRQ_EN); m2p_set_control(ch, v); - while (m2p_channel_state(ch) == STATE_ON) + while (m2p_channel_state(ch) >= STATE_ON) cpu_relax(); m2p_set_control(ch, 0x0); diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig index c5c0369bb48..2f7e2728970 100644 --- a/arch/arm/mach-imx/Kconfig +++ b/arch/arm/mach-imx/Kconfig @@ -122,6 +122,7 @@ config MACH_CPUIMX27 select IMX_HAVE_PLATFORM_IMX_I2C select IMX_HAVE_PLATFORM_IMX_UART select IMX_HAVE_PLATFORM_MXC_NAND + select MXC_ULPI if USB_ULPI help Include support for Eukrea CPUIMX27 platform. This includes specific configurations for the module and its peripherals. diff --git a/arch/arm/mach-imx/mach-cpuimx27.c b/arch/arm/mach-imx/mach-cpuimx27.c index 339150ab0ea..6830afd1d2b 100644 --- a/arch/arm/mach-imx/mach-cpuimx27.c +++ b/arch/arm/mach-imx/mach-cpuimx27.c @@ -259,7 +259,7 @@ static void __init eukrea_cpuimx27_init(void) i2c_register_board_info(0, eukrea_cpuimx27_i2c_devices, ARRAY_SIZE(eukrea_cpuimx27_i2c_devices)); - imx27_add_i2c_imx1(&cpuimx27_i2c1_data); + imx27_add_i2c_imx0(&cpuimx27_i2c1_data); platform_add_devices(platform_devices, ARRAY_SIZE(platform_devices)); diff --git a/arch/arm/mach-ixp4xx/common-pci.c b/arch/arm/mach-ixp4xx/common-pci.c index 61cd4d64b98..24498a932ba 100644 --- a/arch/arm/mach-ixp4xx/common-pci.c +++ b/arch/arm/mach-ixp4xx/common-pci.c @@ -503,6 +503,14 @@ struct pci_bus * __devinit ixp4xx_scan_bus(int nr, struct pci_sys_data *sys) return pci_scan_bus(sys->busnr, &ixp4xx_ops, sys); } +int dma_set_coherent_mask(struct device *dev, u64 mask) +{ + if (mask >= SZ_64M - 1) + return 0; + + return -EIO; +} + EXPORT_SYMBOL(ixp4xx_pci_read); EXPORT_SYMBOL(ixp4xx_pci_write); diff --git a/arch/arm/mach-ixp4xx/include/mach/hardware.h b/arch/arm/mach-ixp4xx/include/mach/hardware.h index f91ca6d4fbe..8138371c406 100644 --- a/arch/arm/mach-ixp4xx/include/mach/hardware.h +++ b/arch/arm/mach-ixp4xx/include/mach/hardware.h @@ -26,6 +26,8 @@ #define PCIBIOS_MAX_MEM 0x4BFFFFFF #endif +#define ARCH_HAS_DMA_SET_COHERENT_MASK + #define pcibios_assign_all_busses() 1 /* Register locations and bits */ diff --git a/arch/arm/mach-kirkwood/include/mach/kirkwood.h b/arch/arm/mach-kirkwood/include/mach/kirkwood.h index 93fc2ec95e7..6e924b39891 100644 --- a/arch/arm/mach-kirkwood/include/mach/kirkwood.h +++ b/arch/arm/mach-kirkwood/include/mach/kirkwood.h @@ -38,7 +38,7 @@ #define KIRKWOOD_PCIE1_IO_PHYS_BASE 0xf3000000 #define KIRKWOOD_PCIE1_IO_VIRT_BASE 0xfef00000 -#define KIRKWOOD_PCIE1_IO_BUS_BASE 0x00000000 +#define KIRKWOOD_PCIE1_IO_BUS_BASE 0x00100000 #define KIRKWOOD_PCIE1_IO_SIZE SZ_1M #define KIRKWOOD_PCIE_IO_PHYS_BASE 0xf2000000 diff --git a/arch/arm/mach-kirkwood/pcie.c b/arch/arm/mach-kirkwood/pcie.c index 55e7f00836b..513ad3102d7 100644 --- a/arch/arm/mach-kirkwood/pcie.c +++ b/arch/arm/mach-kirkwood/pcie.c @@ -117,7 +117,7 @@ static void __init pcie0_ioresources_init(struct pcie_port *pp) * IORESOURCE_IO */ pp->res[0].name = "PCIe 0 I/O Space"; - pp->res[0].start = KIRKWOOD_PCIE_IO_PHYS_BASE; + pp->res[0].start = KIRKWOOD_PCIE_IO_BUS_BASE; pp->res[0].end = pp->res[0].start + KIRKWOOD_PCIE_IO_SIZE - 1; pp->res[0].flags = IORESOURCE_IO; @@ -139,7 +139,7 @@ static void __init pcie1_ioresources_init(struct pcie_port *pp) * IORESOURCE_IO */ pp->res[0].name = "PCIe 1 I/O Space"; - pp->res[0].start = KIRKWOOD_PCIE1_IO_PHYS_BASE; + pp->res[0].start = KIRKWOOD_PCIE1_IO_BUS_BASE; pp->res[0].end = pp->res[0].start + KIRKWOOD_PCIE1_IO_SIZE - 1; pp->res[0].flags = IORESOURCE_IO; diff --git a/arch/arm/mach-mmp/include/mach/system.h b/arch/arm/mach-mmp/include/mach/system.h index 4f5b0e0ce6c..1a8a25edb1b 100644 --- a/arch/arm/mach-mmp/include/mach/system.h +++ b/arch/arm/mach-mmp/include/mach/system.h @@ -9,6 +9,8 @@ #ifndef __ASM_MACH_SYSTEM_H #define __ASM_MACH_SYSTEM_H +#include <mach/cputype.h> + static inline void arch_idle(void) { cpu_do_idle(); @@ -16,6 +18,9 @@ static inline void arch_idle(void) static inline void arch_reset(char mode, const char *cmd) { - cpu_reset(0); + if (cpu_is_pxa168()) + cpu_reset(0xffff0000); + else + cpu_reset(0); } #endif /* __ASM_MACH_SYSTEM_H */ diff --git a/arch/arm/mach-pxa/cpufreq-pxa2xx.c b/arch/arm/mach-pxa/cpufreq-pxa2xx.c index 50d5939a78f..58093d9e07b 100644 --- a/arch/arm/mach-pxa/cpufreq-pxa2xx.c +++ b/arch/arm/mach-pxa/cpufreq-pxa2xx.c @@ -312,8 +312,7 @@ static int pxa_set_target(struct cpufreq_policy *policy, freqs.cpu = policy->cpu; if (freq_debug) - pr_debug(KERN_INFO "Changing CPU frequency to %d Mhz, " - "(SDRAM %d Mhz)\n", + pr_debug("Changing CPU frequency to %d Mhz, (SDRAM %d Mhz)\n", freqs.new / 1000, (pxa_freq_settings[idx].div2) ? (new_freq_mem / 2000) : (new_freq_mem / 1000)); diff --git a/arch/arm/mach-pxa/include/mach/hardware.h b/arch/arm/mach-pxa/include/mach/hardware.h index 7f64d24cd56..814f1458a06 100644 --- a/arch/arm/mach-pxa/include/mach/hardware.h +++ b/arch/arm/mach-pxa/include/mach/hardware.h @@ -264,23 +264,35 @@ * <= 0x2 for pxa21x/pxa25x/pxa26x/pxa27x * == 0x3 for pxa300/pxa310/pxa320 */ +#if defined(CONFIG_PXA25x) || defined(CONFIG_PXA27x) #define __cpu_is_pxa2xx(id) \ ({ \ unsigned int _id = (id) >> 13 & 0x7; \ _id <= 0x2; \ }) +#else +#define __cpu_is_pxa2xx(id) (0) +#endif +#ifdef CONFIG_PXA3xx #define __cpu_is_pxa3xx(id) \ ({ \ unsigned int _id = (id) >> 13 & 0x7; \ _id == 0x3; \ }) +#else +#define __cpu_is_pxa3xx(id) (0) +#endif +#if defined(CONFIG_CPU_PXA930) || defined(CONFIG_CPU_PXA935) #define __cpu_is_pxa93x(id) \ ({ \ unsigned int _id = (id) >> 4 & 0xfff; \ _id == 0x683 || _id == 0x693; \ }) +#else +#define __cpu_is_pxa93x(id) (0) +#endif #define cpu_is_pxa2xx() \ ({ \ @@ -309,7 +321,7 @@ extern unsigned long get_clock_tick_rate(void); #define PCIBIOS_MIN_IO 0 #define PCIBIOS_MIN_MEM 0 #define pcibios_assign_all_busses() 1 +#define ARCH_HAS_DMA_SET_COHERENT_MASK #endif - #endif /* _ASM_ARCH_HARDWARE_H */ diff --git a/arch/arm/mach-pxa/include/mach/io.h b/arch/arm/mach-pxa/include/mach/io.h index 262691fb97d..fdca3be47d9 100644 --- a/arch/arm/mach-pxa/include/mach/io.h +++ b/arch/arm/mach-pxa/include/mach/io.h @@ -6,6 +6,8 @@ #ifndef __ASM_ARM_ARCH_IO_H #define __ASM_ARM_ARCH_IO_H +#include <mach/hardware.h> + #define IO_SPACE_LIMIT 0xffffffff /* diff --git a/arch/arm/mach-pxa/palm27x.c b/arch/arm/mach-pxa/palm27x.c index 77ad6d34ab5..405b92a2979 100644 --- a/arch/arm/mach-pxa/palm27x.c +++ b/arch/arm/mach-pxa/palm27x.c @@ -469,9 +469,13 @@ static struct i2c_board_info __initdata palm27x_pi2c_board_info[] = { }, }; +static struct i2c_pxa_platform_data palm27x_i2c_power_info = { + .use_pio = 1, +}; + void __init palm27x_pmic_init(void) { i2c_register_board_info(1, ARRAY_AND_SIZE(palm27x_pi2c_board_info)); - pxa27x_set_i2c_power_info(NULL); + pxa27x_set_i2c_power_info(&palm27x_i2c_power_info); } #endif diff --git a/arch/arm/mach-pxa/vpac270.c b/arch/arm/mach-pxa/vpac270.c index c9b747cedea..37d6173bbb6 100644 --- a/arch/arm/mach-pxa/vpac270.c +++ b/arch/arm/mach-pxa/vpac270.c @@ -240,6 +240,7 @@ static void __init vpac270_onenand_init(void) {} #if defined(CONFIG_MMC_PXA) || defined(CONFIG_MMC_PXA_MODULE) static struct pxamci_platform_data vpac270_mci_platform_data = { .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, + .gpio_power = -1, .gpio_card_detect = GPIO53_VPAC270_SD_DETECT_N, .gpio_card_ro = GPIO52_VPAC270_SD_READONLY, .detect_delay_ms = 200, diff --git a/arch/arm/mach-s3c64xx/dev-spi.c b/arch/arm/mach-s3c64xx/dev-spi.c index a492b982aa0..405e6212891 100644 --- a/arch/arm/mach-s3c64xx/dev-spi.c +++ b/arch/arm/mach-s3c64xx/dev-spi.c @@ -18,10 +18,11 @@ #include <mach/map.h> #include <mach/gpio-bank-c.h> #include <mach/spi-clocks.h> +#include <mach/irqs.h> #include <plat/s3c64xx-spi.h> #include <plat/gpio-cfg.h> -#include <plat/irqs.h> +#include <plat/devs.h> static char *spi_src_clks[] = { [S3C64XX_SPI_SRCCLK_PCLK] = "pclk", diff --git a/arch/arm/mach-s3c64xx/mach-real6410.c b/arch/arm/mach-s3c64xx/mach-real6410.c index 5c07d013b23..e130379ba0e 100644 --- a/arch/arm/mach-s3c64xx/mach-real6410.c +++ b/arch/arm/mach-s3c64xx/mach-real6410.c @@ -30,73 +30,73 @@ #include <plat/devs.h> #include <plat/regs-serial.h> -#define UCON S3C2410_UCON_DEFAULT | S3C2410_UCON_UCLK -#define ULCON S3C2410_LCON_CS8 | S3C2410_LCON_PNONE | S3C2410_LCON_STOPB -#define UFCON S3C2410_UFCON_RXTRIG8 | S3C2410_UFCON_FIFOMODE +#define UCON (S3C2410_UCON_DEFAULT | S3C2410_UCON_UCLK) +#define ULCON (S3C2410_LCON_CS8 | S3C2410_LCON_PNONE | S3C2410_LCON_STOPB) +#define UFCON (S3C2410_UFCON_RXTRIG8 | S3C2410_UFCON_FIFOMODE) static struct s3c2410_uartcfg real6410_uartcfgs[] __initdata = { [0] = { - .hwport = 0, - .flags = 0, - .ucon = UCON, - .ulcon = ULCON, - .ufcon = UFCON, + .hwport = 0, + .flags = 0, + .ucon = UCON, + .ulcon = ULCON, + .ufcon = UFCON, }, [1] = { - .hwport = 1, - .flags = 0, - .ucon = UCON, - .ulcon = ULCON, - .ufcon = UFCON, + .hwport = 1, + .flags = 0, + .ucon = UCON, + .ulcon = ULCON, + .ufcon = UFCON, }, [2] = { - .hwport = 2, - .flags = 0, - .ucon = UCON, - .ulcon = ULCON, - .ufcon = UFCON, + .hwport = 2, + .flags = 0, + .ucon = UCON, + .ulcon = ULCON, + .ufcon = UFCON, }, [3] = { - .hwport = 3, - .flags = 0, - .ucon = UCON, - .ulcon = ULCON, - .ufcon = UFCON, + .hwport = 3, + .flags = 0, + .ucon = UCON, + .ulcon = ULCON, + .ufcon = UFCON, }, }; /* DM9000AEP 10/100 ethernet controller */ static struct resource real6410_dm9k_resource[] = { - [0] = { - .start = S3C64XX_PA_XM0CSN1, - .end = S3C64XX_PA_XM0CSN1 + 1, - .flags = IORESOURCE_MEM - }, - [1] = { - .start = S3C64XX_PA_XM0CSN1 + 4, - .end = S3C64XX_PA_XM0CSN1 + 5, - .flags = IORESOURCE_MEM - }, - [2] = { - .start = S3C_EINT(7), - .end = S3C_EINT(7), - .flags = IORESOURCE_IRQ, - } + [0] = { + .start = S3C64XX_PA_XM0CSN1, + .end = S3C64XX_PA_XM0CSN1 + 1, + .flags = IORESOURCE_MEM + }, + [1] = { + .start = S3C64XX_PA_XM0CSN1 + 4, + .end = S3C64XX_PA_XM0CSN1 + 5, + .flags = IORESOURCE_MEM + }, + [2] = { + .start = S3C_EINT(7), + .end = S3C_EINT(7), + .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL + } }; static struct dm9000_plat_data real6410_dm9k_pdata = { - .flags = (DM9000_PLATF_16BITONLY | DM9000_PLATF_NO_EEPROM), + .flags = (DM9000_PLATF_16BITONLY | DM9000_PLATF_NO_EEPROM), }; static struct platform_device real6410_device_eth = { - .name = "dm9000", - .id = -1, - .num_resources = ARRAY_SIZE(real6410_dm9k_resource), - .resource = real6410_dm9k_resource, - .dev = { - .platform_data = &real6410_dm9k_pdata, - }, + .name = "dm9000", + .id = -1, + .num_resources = ARRAY_SIZE(real6410_dm9k_resource), + .resource = real6410_dm9k_resource, + .dev = { + .platform_data = &real6410_dm9k_pdata, + }, }; static struct platform_device *real6410_devices[] __initdata = { @@ -129,12 +129,12 @@ static void __init real6410_machine_init(void) /* set timing for nCS1 suitable for ethernet chip */ __raw_writel((0 << S3C64XX_SROM_BCX__PMC__SHIFT) | - (6 << S3C64XX_SROM_BCX__TACP__SHIFT) | - (4 << S3C64XX_SROM_BCX__TCAH__SHIFT) | - (1 << S3C64XX_SROM_BCX__TCOH__SHIFT) | - (13 << S3C64XX_SROM_BCX__TACC__SHIFT) | - (4 << S3C64XX_SROM_BCX__TCOS__SHIFT) | - (0 << S3C64XX_SROM_BCX__TACS__SHIFT), S3C64XX_SROM_BC1); + (6 << S3C64XX_SROM_BCX__TACP__SHIFT) | + (4 << S3C64XX_SROM_BCX__TCAH__SHIFT) | + (1 << S3C64XX_SROM_BCX__TCOH__SHIFT) | + (13 << S3C64XX_SROM_BCX__TACC__SHIFT) | + (4 << S3C64XX_SROM_BCX__TCOS__SHIFT) | + (0 << S3C64XX_SROM_BCX__TACS__SHIFT), S3C64XX_SROM_BC1); platform_add_devices(real6410_devices, ARRAY_SIZE(real6410_devices)); } diff --git a/arch/arm/mach-s5p6440/cpu.c b/arch/arm/mach-s5p6440/cpu.c index 526f33adb31..ec592e86605 100644 --- a/arch/arm/mach-s5p6440/cpu.c +++ b/arch/arm/mach-s5p6440/cpu.c @@ -19,6 +19,7 @@ #include <linux/sysdev.h> #include <linux/serial_core.h> #include <linux/platform_device.h> +#include <linux/sched.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> diff --git a/arch/arm/mach-s5p6442/cpu.c b/arch/arm/mach-s5p6442/cpu.c index a48fb553fd0..70ac681af72 100644 --- a/arch/arm/mach-s5p6442/cpu.c +++ b/arch/arm/mach-s5p6442/cpu.c @@ -19,6 +19,7 @@ #include <linux/sysdev.h> #include <linux/serial_core.h> #include <linux/platform_device.h> +#include <linux/sched.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> diff --git a/arch/arm/mach-s5pc100/cpu.c b/arch/arm/mach-s5pc100/cpu.c index 251c92ac5b2..cd1afbce83e 100644 --- a/arch/arm/mach-s5pc100/cpu.c +++ b/arch/arm/mach-s5pc100/cpu.c @@ -21,6 +21,7 @@ #include <linux/sysdev.h> #include <linux/serial_core.h> #include <linux/platform_device.h> +#include <linux/sched.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> diff --git a/arch/arm/mach-s5pv210/clock.c b/arch/arm/mach-s5pv210/clock.c index af91fefef2c..d562670e1b0 100644 --- a/arch/arm/mach-s5pv210/clock.c +++ b/arch/arm/mach-s5pv210/clock.c @@ -173,11 +173,6 @@ static int s5pv210_clk_ip3_ctrl(struct clk *clk, int enable) return s5p_gatectrl(S5P_CLKGATE_IP3, clk, enable); } -static int s5pv210_clk_ip4_ctrl(struct clk *clk, int enable) -{ - return s5p_gatectrl(S5P_CLKGATE_IP4, clk, enable); -} - static int s5pv210_clk_mask0_ctrl(struct clk *clk, int enable) { return s5p_gatectrl(S5P_CLK_SRC_MASK0, clk, enable); @@ -281,6 +276,24 @@ static struct clk init_clocks_disable[] = { .enable = s5pv210_clk_ip0_ctrl, .ctrlbit = (1<<29), }, { + .name = "fimc", + .id = 0, + .parent = &clk_hclk_dsys.clk, + .enable = s5pv210_clk_ip0_ctrl, + .ctrlbit = (1 << 24), + }, { + .name = "fimc", + .id = 1, + .parent = &clk_hclk_dsys.clk, + .enable = s5pv210_clk_ip0_ctrl, + .ctrlbit = (1 << 25), + }, { + .name = "fimc", + .id = 2, + .parent = &clk_hclk_dsys.clk, + .enable = s5pv210_clk_ip0_ctrl, + .ctrlbit = (1 << 26), + }, { .name = "otg", .id = -1, .parent = &clk_hclk_psys.clk, @@ -357,7 +370,7 @@ static struct clk init_clocks_disable[] = { .id = 1, .parent = &clk_pclk_psys.clk, .enable = s5pv210_clk_ip3_ctrl, - .ctrlbit = (1<<8), + .ctrlbit = (1 << 10), }, { .name = "i2c", .id = 2, diff --git a/arch/arm/mach-s5pv210/cpu.c b/arch/arm/mach-s5pv210/cpu.c index b9f4d677cf5..245b82b53df 100644 --- a/arch/arm/mach-s5pv210/cpu.c +++ b/arch/arm/mach-s5pv210/cpu.c @@ -19,6 +19,7 @@ #include <linux/io.h> #include <linux/sysdev.h> #include <linux/platform_device.h> +#include <linux/sched.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> @@ -47,7 +48,7 @@ static struct map_desc s5pv210_iodesc[] __initdata = { { .virtual = (unsigned long)S5P_VA_SYSTIMER, .pfn = __phys_to_pfn(S5PV210_PA_SYSTIMER), - .length = SZ_1M, + .length = SZ_4K, .type = MT_DEVICE, }, { .virtual = (unsigned long)VA_VIC2, diff --git a/arch/arm/mach-u300/include/mach/gpio.h b/arch/arm/mach-u300/include/mach/gpio.h index 7b1fc984abb..d5a71abcbae 100644 --- a/arch/arm/mach-u300/include/mach/gpio.h +++ b/arch/arm/mach-u300/include/mach/gpio.h @@ -273,6 +273,9 @@ extern void gpio_pullup(unsigned gpio, int value); extern int gpio_get_value(unsigned gpio); extern void gpio_set_value(unsigned gpio, int value); +#define gpio_get_value_cansleep gpio_get_value +#define gpio_set_value_cansleep gpio_set_value + /* wrappers to sleep-enable the previous two functions */ static inline unsigned gpio_to_irq(unsigned gpio) { diff --git a/arch/arm/mach-vexpress/ct-ca9x4.c b/arch/arm/mach-vexpress/ct-ca9x4.c index 577df6cccb0..71fb1734952 100644 --- a/arch/arm/mach-vexpress/ct-ca9x4.c +++ b/arch/arm/mach-vexpress/ct-ca9x4.c @@ -68,7 +68,7 @@ static void __init ct_ca9x4_init_irq(void) } #if 0 -static void ct_ca9x4_timer_init(void) +static void __init ct_ca9x4_timer_init(void) { writel(0, MMIO_P2V(CT_CA9X4_TIMER0) + TIMER_CTRL); writel(0, MMIO_P2V(CT_CA9X4_TIMER1) + TIMER_CTRL); @@ -222,12 +222,18 @@ static struct platform_device pmu_device = { .resource = pmu_resources, }; -static void ct_ca9x4_init(void) +static void __init ct_ca9x4_init(void) { int i; #ifdef CONFIG_CACHE_L2X0 - l2x0_init(MMIO_P2V(CT_CA9X4_L2CC), 0x00000000, 0xfe0fffff); + void __iomem *l2x0_base = MMIO_P2V(CT_CA9X4_L2CC); + + /* set RAM latencies to 1 cycle for this core tile. */ + writel(0, l2x0_base + L2X0_TAG_LATENCY_CTRL); + writel(0, l2x0_base + L2X0_DATA_LATENCY_CTRL); + + l2x0_init(l2x0_base, 0x00400000, 0xfe0fffff); #endif clkdev_add_table(lookups, ARRAY_SIZE(lookups)); diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c index 817f0ad38a0..7eaa232180a 100644 --- a/arch/arm/mach-vexpress/v2m.c +++ b/arch/arm/mach-vexpress/v2m.c @@ -48,7 +48,7 @@ void __init v2m_map_io(struct map_desc *tile, size_t num) } -static void v2m_timer_init(void) +static void __init v2m_timer_init(void) { writel(0, MMIO_P2V(V2M_TIMER0) + TIMER_CTRL); writel(0, MMIO_P2V(V2M_TIMER1) + TIMER_CTRL); diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index d073b64ae87..724ba3bce72 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -885,8 +885,23 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (ai_usermode & UM_SIGNAL) force_sig(SIGBUS, current); - else - set_cr(cr_no_alignment); + else { + /* + * We're about to disable the alignment trap and return to + * user space. But if an interrupt occurs before actually + * reaching user space, then the IRQ vector entry code will + * notice that we were still in kernel space and therefore + * the alignment trap won't be re-enabled in that case as it + * is presumed to be always on from kernel space. + * Let's prevent that race by disabling interrupts here (they + * are disabled on the way back to user space anyway in + * entry-common.S) and disable the alignment trap only if + * there is no work pending for this thread. + */ + raw_local_irq_disable(); + if (!(current_thread_info()->flags & _TIF_WORK_MASK)) + set_cr(cr_no_alignment); + } return 0; } diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index ab506272b2d..17e7b0b57e4 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -204,8 +204,12 @@ void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, /* * Don't allow RAM to be mapped - this causes problems with ARMv6+ */ - if (WARN_ON(pfn_valid(pfn))) - return NULL; + if (pfn_valid(pfn)) { + printk(KERN_WARNING "BUG: Your driver calls ioremap() on system memory. This leads\n" + KERN_WARNING "to architecturally unpredictable behaviour on ARMv6+, and ioremap()\n" + KERN_WARNING "will fail in the next kernel release. Please fix your driver.\n"); + WARN_ON(1); + } type = get_mem_type(mtype); if (!type) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 6e1c4f6a2b3..e8ed9dc461f 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -15,6 +15,7 @@ #include <linux/nodemask.h> #include <linux/memblock.h> #include <linux/sort.h> +#include <linux/fs.h> #include <asm/cputype.h> #include <asm/sections.h> @@ -246,6 +247,9 @@ static struct mem_type mem_types[] = { .domain = DOMAIN_USER, }, [MT_MEMORY] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_WRITE | L_PTE_EXEC, + .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, .domain = DOMAIN_KERNEL, }, @@ -254,6 +258,9 @@ static struct mem_type mem_types[] = { .domain = DOMAIN_KERNEL, }, [MT_MEMORY_NONCACHED] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_WRITE | L_PTE_EXEC | L_PTE_MT_BUFFERABLE, + .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, .domain = DOMAIN_KERNEL, }, @@ -411,9 +418,12 @@ static void __init build_mem_type_table(void) * Enable CPU-specific coherency if supported. * (Only available on XSC3 at the moment.) */ - if (arch_is_coherent() && cpu_is_xsc3()) + if (arch_is_coherent() && cpu_is_xsc3()) { mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; - + mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; + } /* * ARMv6 and above have extended page tables. */ @@ -438,7 +448,9 @@ static void __init build_mem_type_table(void) mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S; mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; #endif } @@ -475,6 +487,8 @@ static void __init build_mem_type_table(void) mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask; mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask; mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd; + mem_types[MT_MEMORY].prot_pte |= kern_pgprot; + mem_types[MT_MEMORY_NONCACHED].prot_sect |= ecc_mask; mem_types[MT_ROM].prot_sect |= cp->pmd; switch (cp->pmd) { @@ -498,6 +512,19 @@ static void __init build_mem_type_table(void) } } +#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE +pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot) +{ + if (!pfn_valid(pfn)) + return pgprot_noncached(vma_prot); + else if (file->f_flags & O_SYNC) + return pgprot_writecombine(vma_prot); + return vma_prot; +} +EXPORT_SYMBOL(phys_mem_access_prot); +#endif + #define vectors_base() (vectors_high() ? 0xffff0000 : 0) static void __init *early_alloc(unsigned long sz) diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 6a8506d99ee..197f21bed5e 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -186,13 +186,14 @@ cpu_v7_name: * It is assumed that: * - cache type register is implemented */ -__v7_setup: +__v7_ca9mp_setup: #ifdef CONFIG_SMP mrc p15, 0, r0, c1, c0, 1 tst r0, #(1 << 6) @ SMP/nAMP mode enabled? orreq r0, r0, #(1 << 6) | (1 << 0) @ Enable SMP/nAMP mode and mcreq p15, 0, r0, c1, c0, 1 @ TLB ops broadcasting #endif +__v7_setup: adr r12, __v7_setup_stack @ the local stack stmia r12, {r0-r5, r7, r9, r11, lr} bl v7_flush_dcache_all @@ -201,11 +202,16 @@ __v7_setup: mrc p15, 0, r0, c0, c0, 0 @ read main ID register and r10, r0, #0xff000000 @ ARM? teq r10, #0x41000000 - bne 2f + bne 3f and r5, r0, #0x00f00000 @ variant and r6, r0, #0x0000000f @ revision - orr r0, r6, r5, lsr #20-4 @ combine variant and revision + orr r6, r6, r5, lsr #20-4 @ combine variant and revision + ubfx r0, r0, #4, #12 @ primary part number + /* Cortex-A8 Errata */ + ldr r10, =0x00000c08 @ Cortex-A8 primary part number + teq r0, r10 + bne 2f #ifdef CONFIG_ARM_ERRATA_430973 teq r5, #0x00100000 @ only present in r1p* mrceq p15, 0, r10, c1, c0, 1 @ read aux control register @@ -213,21 +219,50 @@ __v7_setup: mcreq p15, 0, r10, c1, c0, 1 @ write aux control register #endif #ifdef CONFIG_ARM_ERRATA_458693 - teq r0, #0x20 @ only present in r2p0 + teq r6, #0x20 @ only present in r2p0 mrceq p15, 0, r10, c1, c0, 1 @ read aux control register orreq r10, r10, #(1 << 5) @ set L1NEON to 1 orreq r10, r10, #(1 << 9) @ set PLDNOP to 1 mcreq p15, 0, r10, c1, c0, 1 @ write aux control register #endif #ifdef CONFIG_ARM_ERRATA_460075 - teq r0, #0x20 @ only present in r2p0 + teq r6, #0x20 @ only present in r2p0 mrceq p15, 1, r10, c9, c0, 2 @ read L2 cache aux ctrl register tsteq r10, #1 << 22 orreq r10, r10, #(1 << 22) @ set the Write Allocate disable bit mcreq p15, 1, r10, c9, c0, 2 @ write the L2 cache aux ctrl register #endif + b 3f -2: mov r10, #0 + /* Cortex-A9 Errata */ +2: ldr r10, =0x00000c09 @ Cortex-A9 primary part number + teq r0, r10 + bne 3f +#ifdef CONFIG_ARM_ERRATA_742230 + cmp r6, #0x22 @ only present up to r2p2 + mrcle p15, 0, r10, c15, c0, 1 @ read diagnostic register + orrle r10, r10, #1 << 4 @ set bit #4 + mcrle p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif +#ifdef CONFIG_ARM_ERRATA_742231 + teq r6, #0x20 @ present in r2p0 + teqne r6, #0x21 @ present in r2p1 + teqne r6, #0x22 @ present in r2p2 + mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register + orreq r10, r10, #1 << 12 @ set bit #12 + orreq r10, r10, #1 << 22 @ set bit #22 + mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif +#ifdef CONFIG_ARM_ERRATA_743622 + teq r6, #0x20 @ present in r2p0 + teqne r6, #0x21 @ present in r2p1 + teqne r6, #0x22 @ present in r2p2 + mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register + orreq r10, r10, #1 << 6 @ set bit #6 + mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif + +3: mov r10, #0 #ifdef HARVARD_CACHE mcr p15, 0, r10, c7, c5, 0 @ I+BTB cache invalidate #endif @@ -323,6 +358,29 @@ cpu_elf_name: .section ".proc.info.init", #alloc, #execinstr + .type __v7_ca9mp_proc_info, #object +__v7_ca9mp_proc_info: + .long 0x410fc090 @ Required ID value + .long 0xff0ffff0 @ Mask for ID + .long PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ | \ + PMD_FLAGS + .long PMD_TYPE_SECT | \ + PMD_SECT_XN | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __v7_ca9mp_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_TLS + .long cpu_v7_name + .long v7_processor_functions + .long v7wbi_tlb_fns + .long v6_user_fns + .long v7_cache_fns + .size __v7_ca9mp_proc_info, . - __v7_ca9mp_proc_info + /* * Match any ARMv7 processor core. */ diff --git a/arch/arm/oprofile/Makefile b/arch/arm/oprofile/Makefile index e666eafed15..b2215c61cdf 100644 --- a/arch/arm/oprofile/Makefile +++ b/arch/arm/oprofile/Makefile @@ -6,4 +6,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ oprofilefs.o oprofile_stats.o \ timer_int.o ) +ifeq ($(CONFIG_HW_PERF_EVENTS),y) +DRIVER_OBJS += $(addprefix ../../../drivers/oprofile/, oprofile_perf.o) +endif + oprofile-y := $(DRIVER_OBJS) common.o diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c index 0691176899f..8aa974491df 100644 --- a/arch/arm/oprofile/common.c +++ b/arch/arm/oprofile/common.c @@ -25,138 +25,10 @@ #include <asm/ptrace.h> #ifdef CONFIG_HW_PERF_EVENTS -/* - * Per performance monitor configuration as set via oprofilefs. - */ -struct op_counter_config { - unsigned long count; - unsigned long enabled; - unsigned long event; - unsigned long unit_mask; - unsigned long kernel; - unsigned long user; - struct perf_event_attr attr; -}; - -static int op_arm_enabled; -static DEFINE_MUTEX(op_arm_mutex); - -static struct op_counter_config *counter_config; -static struct perf_event **perf_events[nr_cpumask_bits]; -static int perf_num_counters; - -/* - * Overflow callback for oprofile. - */ -static void op_overflow_handler(struct perf_event *event, int unused, - struct perf_sample_data *data, struct pt_regs *regs) -{ - int id; - u32 cpu = smp_processor_id(); - - for (id = 0; id < perf_num_counters; ++id) - if (perf_events[cpu][id] == event) - break; - - if (id != perf_num_counters) - oprofile_add_sample(regs, id); - else - pr_warning("oprofile: ignoring spurious overflow " - "on cpu %u\n", cpu); -} - -/* - * Called by op_arm_setup to create perf attributes to mirror the oprofile - * settings in counter_config. Attributes are created as `pinned' events and - * so are permanently scheduled on the PMU. - */ -static void op_perf_setup(void) -{ - int i; - u32 size = sizeof(struct perf_event_attr); - struct perf_event_attr *attr; - - for (i = 0; i < perf_num_counters; ++i) { - attr = &counter_config[i].attr; - memset(attr, 0, size); - attr->type = PERF_TYPE_RAW; - attr->size = size; - attr->config = counter_config[i].event; - attr->sample_period = counter_config[i].count; - attr->pinned = 1; - } -} - -static int op_create_counter(int cpu, int event) -{ - int ret = 0; - struct perf_event *pevent; - - if (!counter_config[event].enabled || (perf_events[cpu][event] != NULL)) - return ret; - - pevent = perf_event_create_kernel_counter(&counter_config[event].attr, - cpu, -1, - op_overflow_handler); - - if (IS_ERR(pevent)) { - ret = PTR_ERR(pevent); - } else if (pevent->state != PERF_EVENT_STATE_ACTIVE) { - pr_warning("oprofile: failed to enable event %d " - "on CPU %d\n", event, cpu); - ret = -EBUSY; - } else { - perf_events[cpu][event] = pevent; - } - - return ret; -} - -static void op_destroy_counter(int cpu, int event) -{ - struct perf_event *pevent = perf_events[cpu][event]; - - if (pevent) { - perf_event_release_kernel(pevent); - perf_events[cpu][event] = NULL; - } -} - -/* - * Called by op_arm_start to create active perf events based on the - * perviously configured attributes. - */ -static int op_perf_start(void) -{ - int cpu, event, ret = 0; - - for_each_online_cpu(cpu) { - for (event = 0; event < perf_num_counters; ++event) { - ret = op_create_counter(cpu, event); - if (ret) - goto out; - } - } - -out: - return ret; -} - -/* - * Called by op_arm_stop at the end of a profiling run. - */ -static void op_perf_stop(void) +char *op_name_from_perf_id(void) { - int cpu, event; + enum arm_perf_pmu_ids id = armpmu_get_pmu_id(); - for_each_online_cpu(cpu) - for (event = 0; event < perf_num_counters; ++event) - op_destroy_counter(cpu, event); -} - - -static char *op_name_from_perf_id(enum arm_perf_pmu_ids id) -{ switch (id) { case ARM_PERF_PMU_ID_XSCALE1: return "arm/xscale1"; @@ -175,116 +47,6 @@ static char *op_name_from_perf_id(enum arm_perf_pmu_ids id) } } -static int op_arm_create_files(struct super_block *sb, struct dentry *root) -{ - unsigned int i; - - for (i = 0; i < perf_num_counters; i++) { - struct dentry *dir; - char buf[4]; - - snprintf(buf, sizeof buf, "%d", i); - dir = oprofilefs_mkdir(sb, root, buf); - oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); - oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event); - oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count); - oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); - oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); - oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); - } - - return 0; -} - -static int op_arm_setup(void) -{ - spin_lock(&oprofilefs_lock); - op_perf_setup(); - spin_unlock(&oprofilefs_lock); - return 0; -} - -static int op_arm_start(void) -{ - int ret = -EBUSY; - - mutex_lock(&op_arm_mutex); - if (!op_arm_enabled) { - ret = 0; - op_perf_start(); - op_arm_enabled = 1; - } - mutex_unlock(&op_arm_mutex); - return ret; -} - -static void op_arm_stop(void) -{ - mutex_lock(&op_arm_mutex); - if (op_arm_enabled) - op_perf_stop(); - op_arm_enabled = 0; - mutex_unlock(&op_arm_mutex); -} - -#ifdef CONFIG_PM -static int op_arm_suspend(struct platform_device *dev, pm_message_t state) -{ - mutex_lock(&op_arm_mutex); - if (op_arm_enabled) - op_perf_stop(); - mutex_unlock(&op_arm_mutex); - return 0; -} - -static int op_arm_resume(struct platform_device *dev) -{ - mutex_lock(&op_arm_mutex); - if (op_arm_enabled && op_perf_start()) - op_arm_enabled = 0; - mutex_unlock(&op_arm_mutex); - return 0; -} - -static struct platform_driver oprofile_driver = { - .driver = { - .name = "arm-oprofile", - }, - .resume = op_arm_resume, - .suspend = op_arm_suspend, -}; - -static struct platform_device *oprofile_pdev; - -static int __init init_driverfs(void) -{ - int ret; - - ret = platform_driver_register(&oprofile_driver); - if (ret) - goto out; - - oprofile_pdev = platform_device_register_simple( - oprofile_driver.driver.name, 0, NULL, 0); - if (IS_ERR(oprofile_pdev)) { - ret = PTR_ERR(oprofile_pdev); - platform_driver_unregister(&oprofile_driver); - } - -out: - return ret; -} - -static void exit_driverfs(void) -{ - platform_device_unregister(oprofile_pdev); - platform_driver_unregister(&oprofile_driver); -} -#else -static int __init init_driverfs(void) { return 0; } -#define exit_driverfs() do { } while (0) -#endif /* CONFIG_PM */ - static int report_trace(struct stackframe *frame, void *d) { unsigned int *depth = d; @@ -349,72 +111,14 @@ static void arm_backtrace(struct pt_regs * const regs, unsigned int depth) int __init oprofile_arch_init(struct oprofile_operations *ops) { - int cpu, ret = 0; - - perf_num_counters = armpmu_get_max_events(); - - counter_config = kcalloc(perf_num_counters, - sizeof(struct op_counter_config), GFP_KERNEL); - - if (!counter_config) { - pr_info("oprofile: failed to allocate %d " - "counters\n", perf_num_counters); - return -ENOMEM; - } - - ret = init_driverfs(); - if (ret) { - kfree(counter_config); - return ret; - } - - for_each_possible_cpu(cpu) { - perf_events[cpu] = kcalloc(perf_num_counters, - sizeof(struct perf_event *), GFP_KERNEL); - if (!perf_events[cpu]) { - pr_info("oprofile: failed to allocate %d perf events " - "for cpu %d\n", perf_num_counters, cpu); - while (--cpu >= 0) - kfree(perf_events[cpu]); - return -ENOMEM; - } - } - ops->backtrace = arm_backtrace; - ops->create_files = op_arm_create_files; - ops->setup = op_arm_setup; - ops->start = op_arm_start; - ops->stop = op_arm_stop; - ops->shutdown = op_arm_stop; - ops->cpu_type = op_name_from_perf_id(armpmu_get_pmu_id()); - - if (!ops->cpu_type) - ret = -ENODEV; - else - pr_info("oprofile: using %s\n", ops->cpu_type); - return ret; + return oprofile_perf_init(ops); } -void oprofile_arch_exit(void) +void __exit oprofile_arch_exit(void) { - int cpu, id; - struct perf_event *event; - - if (*perf_events) { - exit_driverfs(); - for_each_possible_cpu(cpu) { - for (id = 0; id < perf_num_counters; ++id) { - event = perf_events[cpu][id]; - if (event != NULL) - perf_event_release_kernel(event); - } - kfree(perf_events[cpu]); - } - } - - if (counter_config) - kfree(counter_config); + oprofile_perf_exit(); } #else int __init oprofile_arch_init(struct oprofile_operations *ops) @@ -422,5 +126,5 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) pr_info("oprofile: hardware counters not available\n"); return -ENODEV; } -void oprofile_arch_exit(void) {} +void __exit oprofile_arch_exit(void) {} #endif /* CONFIG_HW_PERF_EVENTS */ diff --git a/arch/arm/plat-nomadik/timer.c b/arch/arm/plat-nomadik/timer.c index ea3ca86c528..aedf9c1d645 100644 --- a/arch/arm/plat-nomadik/timer.c +++ b/arch/arm/plat-nomadik/timer.c @@ -1,5 +1,5 @@ /* - * linux/arch/arm/mach-nomadik/timer.c + * linux/arch/arm/plat-nomadik/timer.c * * Copyright (C) 2008 STMicroelectronics * Copyright (C) 2010 Alessandro Rubini @@ -75,7 +75,7 @@ static void nmdk_clkevt_mode(enum clock_event_mode mode, cr = readl(mtu_base + MTU_CR(1)); writel(0, mtu_base + MTU_LR(1)); writel(cr | MTU_CRn_ENA, mtu_base + MTU_CR(1)); - writel(0x2, mtu_base + MTU_IMSC); + writel(1 << 1, mtu_base + MTU_IMSC); break; case CLOCK_EVT_MODE_SHUTDOWN: case CLOCK_EVT_MODE_UNUSED: @@ -131,25 +131,23 @@ void __init nmdk_timer_init(void) { unsigned long rate; struct clk *clk0; - struct clk *clk1; - u32 cr; + u32 cr = MTU_CRn_32BITS; clk0 = clk_get_sys("mtu0", NULL); BUG_ON(IS_ERR(clk0)); - clk1 = clk_get_sys("mtu1", NULL); - BUG_ON(IS_ERR(clk1)); - clk_enable(clk0); - clk_enable(clk1); /* - * Tick rate is 2.4MHz for Nomadik and 110MHz for ux500: - * use a divide-by-16 counter if it's more than 16MHz + * Tick rate is 2.4MHz for Nomadik and 2.4Mhz, 100MHz or 133 MHz + * for ux500. + * Use a divide-by-16 counter if the tick rate is more than 32MHz. + * At 32 MHz, the timer (with 32 bit counter) can be programmed + * to wake-up at a max 127s a head in time. Dividing a 2.4 MHz timer + * with 16 gives too low timer resolution. */ - cr = MTU_CRn_32BITS;; rate = clk_get_rate(clk0); - if (rate > 16 << 20) { + if (rate > 32000000) { rate /= 16; cr |= MTU_CRn_PRESCALE_16; } else { @@ -170,15 +168,8 @@ void __init nmdk_timer_init(void) pr_err("timer: failed to initialize clock source %s\n", nmdk_clksrc.name); - /* Timer 1 is used for events, fix according to rate */ - cr = MTU_CRn_32BITS; - rate = clk_get_rate(clk1); - if (rate > 16 << 20) { - rate /= 16; - cr |= MTU_CRn_PRESCALE_16; - } else { - cr |= MTU_CRn_PRESCALE_1; - } + /* Timer 1 is used for events */ + clockevents_calc_mult_shift(&nmdk_clkevt, rate, MTU_MIN_RANGE); writel(cr | MTU_CRn_ONESHOT, mtu_base + MTU_CR(1)); /* off, currently */ diff --git a/arch/arm/plat-omap/Kconfig b/arch/arm/plat-omap/Kconfig index e39a417a368..a92cb499313 100644 --- a/arch/arm/plat-omap/Kconfig +++ b/arch/arm/plat-omap/Kconfig @@ -33,7 +33,7 @@ config OMAP_DEBUG_DEVICES config OMAP_DEBUG_LEDS bool depends on OMAP_DEBUG_DEVICES - default y if LEDS + default y if LEDS_CLASS config OMAP_RESET_CLOCKS bool "Reset unused clocks during boot" diff --git a/arch/arm/plat-omap/iommu.c b/arch/arm/plat-omap/iommu.c index a202a2ce6e3..6cd151b31bc 100644 --- a/arch/arm/plat-omap/iommu.c +++ b/arch/arm/plat-omap/iommu.c @@ -320,6 +320,7 @@ void flush_iotlb_page(struct iommu *obj, u32 da) if ((start <= da) && (da < start + bytes)) { dev_dbg(obj->dev, "%s: %08x<=%08x(%x)\n", __func__, start, da, bytes); + iotlb_load_cr(obj, &cr); iommu_write_reg(obj, 1, MMU_FLUSH_ENTRY); } } diff --git a/arch/arm/plat-omap/mcbsp.c b/arch/arm/plat-omap/mcbsp.c index e31496e35b0..0c8612fd831 100644 --- a/arch/arm/plat-omap/mcbsp.c +++ b/arch/arm/plat-omap/mcbsp.c @@ -156,7 +156,7 @@ static irqreturn_t omap_mcbsp_rx_irq_handler(int irq, void *dev_id) /* Writing zero to RSYNC_ERR clears the IRQ */ MCBSP_WRITE(mcbsp_rx, SPCR1, MCBSP_READ_CACHE(mcbsp_rx, SPCR1)); } else { - complete(&mcbsp_rx->tx_irq_completion); + complete(&mcbsp_rx->rx_irq_completion); } return IRQ_HANDLED; diff --git a/arch/arm/plat-omap/sram.c b/arch/arm/plat-omap/sram.c index 226b2e858d6..10b3b4c6337 100644 --- a/arch/arm/plat-omap/sram.c +++ b/arch/arm/plat-omap/sram.c @@ -220,20 +220,7 @@ void __init omap_map_sram(void) if (omap_sram_size == 0) return; - if (cpu_is_omap24xx()) { - omap_sram_io_desc[0].virtual = OMAP2_SRAM_VA; - - base = OMAP2_SRAM_PA; - base = ROUND_DOWN(base, PAGE_SIZE); - omap_sram_io_desc[0].pfn = __phys_to_pfn(base); - } - if (cpu_is_omap34xx()) { - omap_sram_io_desc[0].virtual = OMAP3_SRAM_VA; - base = OMAP3_SRAM_PA; - base = ROUND_DOWN(base, PAGE_SIZE); - omap_sram_io_desc[0].pfn = __phys_to_pfn(base); - /* * SRAM must be marked as non-cached on OMAP3 since the * CORE DPLL M2 divider change code (in SRAM) runs with the @@ -244,13 +231,11 @@ void __init omap_map_sram(void) omap_sram_io_desc[0].type = MT_MEMORY_NONCACHED; } - if (cpu_is_omap44xx()) { - omap_sram_io_desc[0].virtual = OMAP4_SRAM_VA; - base = OMAP4_SRAM_PA; - base = ROUND_DOWN(base, PAGE_SIZE); - omap_sram_io_desc[0].pfn = __phys_to_pfn(base); - } - omap_sram_io_desc[0].length = 1024 * 1024; /* Use section desc */ + omap_sram_io_desc[0].virtual = omap_sram_base; + base = omap_sram_start; + base = ROUND_DOWN(base, PAGE_SIZE); + omap_sram_io_desc[0].pfn = __phys_to_pfn(base); + omap_sram_io_desc[0].length = ROUND_DOWN(omap_sram_size, PAGE_SIZE); iotable_init(omap_sram_io_desc, ARRAY_SIZE(omap_sram_io_desc)); printk(KERN_INFO "SRAM: Mapped pa 0x%08lx to va 0x%08lx size: 0x%lx\n", diff --git a/arch/arm/plat-s5p/dev-fimc0.c b/arch/arm/plat-s5p/dev-fimc0.c index d3f1a9b5d2b..608770fc153 100644 --- a/arch/arm/plat-s5p/dev-fimc0.c +++ b/arch/arm/plat-s5p/dev-fimc0.c @@ -10,6 +10,7 @@ */ #include <linux/kernel.h> +#include <linux/dma-mapping.h> #include <linux/platform_device.h> #include <linux/interrupt.h> #include <linux/ioport.h> @@ -18,7 +19,7 @@ static struct resource s5p_fimc0_resource[] = { [0] = { .start = S5P_PA_FIMC0, - .end = S5P_PA_FIMC0 + SZ_1M - 1, + .end = S5P_PA_FIMC0 + SZ_4K - 1, .flags = IORESOURCE_MEM, }, [1] = { @@ -28,9 +29,15 @@ static struct resource s5p_fimc0_resource[] = { }, }; +static u64 s5p_fimc0_dma_mask = DMA_BIT_MASK(32); + struct platform_device s5p_device_fimc0 = { .name = "s5p-fimc", .id = 0, .num_resources = ARRAY_SIZE(s5p_fimc0_resource), .resource = s5p_fimc0_resource, + .dev = { + .dma_mask = &s5p_fimc0_dma_mask, + .coherent_dma_mask = DMA_BIT_MASK(32), + }, }; diff --git a/arch/arm/plat-s5p/dev-fimc1.c b/arch/arm/plat-s5p/dev-fimc1.c index 41bd6986d0a..76e3a97a87d 100644 --- a/arch/arm/plat-s5p/dev-fimc1.c +++ b/arch/arm/plat-s5p/dev-fimc1.c @@ -10,6 +10,7 @@ */ #include <linux/kernel.h> +#include <linux/dma-mapping.h> #include <linux/platform_device.h> #include <linux/interrupt.h> #include <linux/ioport.h> @@ -18,7 +19,7 @@ static struct resource s5p_fimc1_resource[] = { [0] = { .start = S5P_PA_FIMC1, - .end = S5P_PA_FIMC1 + SZ_1M - 1, + .end = S5P_PA_FIMC1 + SZ_4K - 1, .flags = IORESOURCE_MEM, }, [1] = { @@ -28,9 +29,15 @@ static struct resource s5p_fimc1_resource[] = { }, }; +static u64 s5p_fimc1_dma_mask = DMA_BIT_MASK(32); + struct platform_device s5p_device_fimc1 = { .name = "s5p-fimc", .id = 1, .num_resources = ARRAY_SIZE(s5p_fimc1_resource), .resource = s5p_fimc1_resource, + .dev = { + .dma_mask = &s5p_fimc1_dma_mask, + .coherent_dma_mask = DMA_BIT_MASK(32), + }, }; diff --git a/arch/arm/plat-s5p/dev-fimc2.c b/arch/arm/plat-s5p/dev-fimc2.c index dfddeda6d4a..24d29816fa2 100644 --- a/arch/arm/plat-s5p/dev-fimc2.c +++ b/arch/arm/plat-s5p/dev-fimc2.c @@ -10,6 +10,7 @@ */ #include <linux/kernel.h> +#include <linux/dma-mapping.h> #include <linux/platform_device.h> #include <linux/interrupt.h> #include <linux/ioport.h> @@ -18,7 +19,7 @@ static struct resource s5p_fimc2_resource[] = { [0] = { .start = S5P_PA_FIMC2, - .end = S5P_PA_FIMC2 + SZ_1M - 1, + .end = S5P_PA_FIMC2 + SZ_4K - 1, .flags = IORESOURCE_MEM, }, [1] = { @@ -28,9 +29,15 @@ static struct resource s5p_fimc2_resource[] = { }, }; +static u64 s5p_fimc2_dma_mask = DMA_BIT_MASK(32); + struct platform_device s5p_device_fimc2 = { .name = "s5p-fimc", .id = 2, .num_resources = ARRAY_SIZE(s5p_fimc2_resource), .resource = s5p_fimc2_resource, + .dev = { + .dma_mask = &s5p_fimc2_dma_mask, + .coherent_dma_mask = DMA_BIT_MASK(32), + }, }; diff --git a/arch/arm/plat-samsung/adc.c b/arch/arm/plat-samsung/adc.c index 04d9521ddc9..e8f2be2d67f 100644 --- a/arch/arm/plat-samsung/adc.c +++ b/arch/arm/plat-samsung/adc.c @@ -435,7 +435,6 @@ static int s3c_adc_suspend(struct platform_device *pdev, pm_message_t state) static int s3c_adc_resume(struct platform_device *pdev) { struct adc_device *adc = platform_get_drvdata(pdev); - unsigned long flags; clk_enable(adc->clk); enable_irq(adc->irq); diff --git a/arch/arm/plat-samsung/clock.c b/arch/arm/plat-samsung/clock.c index 90a20512d68..e8d20b0bc50 100644 --- a/arch/arm/plat-samsung/clock.c +++ b/arch/arm/plat-samsung/clock.c @@ -48,6 +48,9 @@ #include <plat/clock.h> #include <plat/cpu.h> +#include <linux/serial_core.h> +#include <plat/regs-serial.h> /* for s3c24xx_uart_devs */ + /* clock information */ static LIST_HEAD(clocks); @@ -65,6 +68,28 @@ static int clk_null_enable(struct clk *clk, int enable) return 0; } +static int dev_is_s3c_uart(struct device *dev) +{ + struct platform_device **pdev = s3c24xx_uart_devs; + int i; + for (i = 0; i < ARRAY_SIZE(s3c24xx_uart_devs); i++, pdev++) + if (*pdev && dev == &(*pdev)->dev) + return 1; + return 0; +} + +/* + * Serial drivers call get_clock() very early, before platform bus + * has been set up, this requires a special check to let them get + * a proper clock + */ + +static int dev_is_platform_device(struct device *dev) +{ + return dev->bus == &platform_bus_type || + (dev->bus == NULL && dev_is_s3c_uart(dev)); +} + /* Clock API calls */ struct clk *clk_get(struct device *dev, const char *id) @@ -73,7 +98,7 @@ struct clk *clk_get(struct device *dev, const char *id) struct clk *clk = ERR_PTR(-ENOENT); int idno; - if (dev == NULL || dev->bus != &platform_bus_type) + if (dev == NULL || !dev_is_platform_device(dev)) idno = -1; else idno = to_platform_device(dev)->id; diff --git a/arch/arm/plat-samsung/gpio-config.c b/arch/arm/plat-samsung/gpio-config.c index 57b68a50f45..e3d41eaed1f 100644 --- a/arch/arm/plat-samsung/gpio-config.c +++ b/arch/arm/plat-samsung/gpio-config.c @@ -273,13 +273,13 @@ s5p_gpio_drvstr_t s5p_gpio_get_drvstr(unsigned int pin) if (!chip) return -EINVAL; - off = chip->chip.base - pin; + off = pin - chip->chip.base; shift = off * 2; reg = chip->base + 0x0C; drvstr = __raw_readl(reg); - drvstr = 0xffff & (0x3 << shift); drvstr = drvstr >> shift; + drvstr &= 0x3; return (__force s5p_gpio_drvstr_t)drvstr; } @@ -296,11 +296,12 @@ int s5p_gpio_set_drvstr(unsigned int pin, s5p_gpio_drvstr_t drvstr) if (!chip) return -EINVAL; - off = chip->chip.base - pin; + off = pin - chip->chip.base; shift = off * 2; reg = chip->base + 0x0C; tmp = __raw_readl(reg); + tmp &= ~(0x3 << shift); tmp |= drvstr << shift; __raw_writel(tmp, reg); diff --git a/arch/arm/plat-samsung/include/plat/gpio-cfg.h b/arch/arm/plat-samsung/include/plat/gpio-cfg.h index db4112c6f2b..1c6b92947c5 100644 --- a/arch/arm/plat-samsung/include/plat/gpio-cfg.h +++ b/arch/arm/plat-samsung/include/plat/gpio-cfg.h @@ -143,12 +143,12 @@ extern s3c_gpio_pull_t s3c_gpio_getpull(unsigned int pin); /* Define values for the drvstr available for each gpio pin. * * These values control the value of the output signal driver strength, - * configurable on most pins on the S5C series. + * configurable on most pins on the S5P series. */ -#define S5P_GPIO_DRVSTR_LV1 ((__force s5p_gpio_drvstr_t)0x00) -#define S5P_GPIO_DRVSTR_LV2 ((__force s5p_gpio_drvstr_t)0x01) -#define S5P_GPIO_DRVSTR_LV3 ((__force s5p_gpio_drvstr_t)0x10) -#define S5P_GPIO_DRVSTR_LV4 ((__force s5p_gpio_drvstr_t)0x11) +#define S5P_GPIO_DRVSTR_LV1 ((__force s5p_gpio_drvstr_t)0x0) +#define S5P_GPIO_DRVSTR_LV2 ((__force s5p_gpio_drvstr_t)0x2) +#define S5P_GPIO_DRVSTR_LV3 ((__force s5p_gpio_drvstr_t)0x1) +#define S5P_GPIO_DRVSTR_LV4 ((__force s5p_gpio_drvstr_t)0x3) /** * s5c_gpio_get_drvstr() - get the driver streght value of a gpio pin diff --git a/arch/avr32/kernel/module.c b/arch/avr32/kernel/module.c index 98f94d041d9..a727f54d64d 100644 --- a/arch/avr32/kernel/module.c +++ b/arch/avr32/kernel/module.c @@ -314,10 +314,9 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, vfree(module->arch.syminfo); module->arch.syminfo = NULL; - return module_bug_finalize(hdr, sechdrs, module); + return 0; } void module_arch_cleanup(struct module *module) { - module_bug_cleanup(module); } diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index 16399bd2499..0f2417df632 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -7,6 +7,7 @@ config FRV default y select HAVE_IDE select HAVE_ARCH_TRACEHOOK + select HAVE_IRQ_WORK select HAVE_PERF_EVENTS config ZONE_DMA diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c index 0974c0ecc59..bab01298b58 100644 --- a/arch/frv/kernel/signal.c +++ b/arch/frv/kernel/signal.c @@ -121,6 +121,9 @@ static int restore_sigcontext(struct sigcontext __user *sc, int *_gr8) struct user_context *user = current->thread.user; unsigned long tbr, psr; + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + tbr = user->i.tbr; psr = user->i.psr; if (copy_from_user(user, &sc->sc_context, sizeof(sc->sc_context))) @@ -250,6 +253,8 @@ static int setup_frame(int sig, struct k_sigaction *ka, sigset_t *set) struct sigframe __user *frame; int rsig; + set_fs(USER_DS); + frame = get_sigframe(ka, sizeof(*frame)); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) @@ -293,22 +298,23 @@ static int setup_frame(int sig, struct k_sigaction *ka, sigset_t *set) (unsigned long) (frame->retcode + 2)); } - /* set up registers for signal handler */ - __frame->sp = (unsigned long) frame; - __frame->lr = (unsigned long) &frame->retcode; - __frame->gr8 = sig; - + /* Set up registers for the signal handler */ if (current->personality & FDPIC_FUNCPTRS) { struct fdpic_func_descriptor __user *funcptr = (struct fdpic_func_descriptor __user *) ka->sa.sa_handler; - __get_user(__frame->pc, &funcptr->text); - __get_user(__frame->gr15, &funcptr->GOT); + struct fdpic_func_descriptor desc; + if (copy_from_user(&desc, funcptr, sizeof(desc))) + goto give_sigsegv; + __frame->pc = desc.text; + __frame->gr15 = desc.GOT; } else { __frame->pc = (unsigned long) ka->sa.sa_handler; __frame->gr15 = 0; } - set_fs(USER_DS); + __frame->sp = (unsigned long) frame; + __frame->lr = (unsigned long) &frame->retcode; + __frame->gr8 = sig; /* the tracer may want to single-step inside the handler */ if (test_thread_flag(TIF_SINGLESTEP)) @@ -323,7 +329,7 @@ static int setup_frame(int sig, struct k_sigaction *ka, sigset_t *set) return 0; give_sigsegv: - force_sig(SIGSEGV, current); + force_sigsegv(sig, current); return -EFAULT; } /* end setup_frame() */ @@ -338,6 +344,8 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, struct rt_sigframe __user *frame; int rsig; + set_fs(USER_DS); + frame = get_sigframe(ka, sizeof(*frame)); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) @@ -392,22 +400,23 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, } /* Set up registers for signal handler */ - __frame->sp = (unsigned long) frame; - __frame->lr = (unsigned long) &frame->retcode; - __frame->gr8 = sig; - __frame->gr9 = (unsigned long) &frame->info; - if (current->personality & FDPIC_FUNCPTRS) { struct fdpic_func_descriptor __user *funcptr = (struct fdpic_func_descriptor __user *) ka->sa.sa_handler; - __get_user(__frame->pc, &funcptr->text); - __get_user(__frame->gr15, &funcptr->GOT); + struct fdpic_func_descriptor desc; + if (copy_from_user(&desc, funcptr, sizeof(desc))) + goto give_sigsegv; + __frame->pc = desc.text; + __frame->gr15 = desc.GOT; } else { __frame->pc = (unsigned long) ka->sa.sa_handler; __frame->gr15 = 0; } - set_fs(USER_DS); + __frame->sp = (unsigned long) frame; + __frame->lr = (unsigned long) &frame->retcode; + __frame->gr8 = sig; + __frame->gr9 = (unsigned long) &frame->info; /* the tracer may want to single-step inside the handler */ if (test_thread_flag(TIF_SINGLESTEP)) @@ -422,7 +431,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, return 0; give_sigsegv: - force_sig(SIGSEGV, current); + force_sigsegv(sig, current); return -EFAULT; } /* end setup_rt_frame() */ @@ -437,7 +446,7 @@ static int handle_signal(unsigned long sig, siginfo_t *info, int ret; /* Are we from a system call? */ - if (in_syscall(__frame)) { + if (__frame->syscallno != -1) { /* If so, check system call restarting.. */ switch (__frame->gr8) { case -ERESTART_RESTARTBLOCK: @@ -456,6 +465,7 @@ static int handle_signal(unsigned long sig, siginfo_t *info, __frame->gr8 = __frame->orig_gr8; __frame->pc -= 4; } + __frame->syscallno = -1; } /* Set up the stack frame */ @@ -538,10 +548,11 @@ no_signal: break; case -ERESTART_RESTARTBLOCK: - __frame->gr8 = __NR_restart_syscall; + __frame->gr7 = __NR_restart_syscall; __frame->pc -= 4; break; } + __frame->syscallno = -1; } /* if there's no signal to deliver, we just put the saved sigmask diff --git a/arch/frv/lib/Makefile b/arch/frv/lib/Makefile index f4709756d0d..4ff2fb1e6b1 100644 --- a/arch/frv/lib/Makefile +++ b/arch/frv/lib/Makefile @@ -5,4 +5,4 @@ lib-y := \ __ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \ checksum.o memcpy.o memset.o atomic-ops.o atomic64-ops.o \ - outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o perf_event.o + outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o diff --git a/arch/h8300/kernel/module.c b/arch/h8300/kernel/module.c index 0865e291c20..db4953dc4e1 100644 --- a/arch/h8300/kernel/module.c +++ b/arch/h8300/kernel/module.c @@ -112,10 +112,9 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/ia64/include/asm/hardirq.h b/arch/ia64/include/asm/hardirq.h index d514cd9edb4..8fb7d33a661 100644 --- a/arch/ia64/include/asm/hardirq.h +++ b/arch/ia64/include/asm/hardirq.h @@ -6,12 +6,6 @@ * David Mosberger-Tang <davidm@hpl.hp.com> */ - -#include <linux/threads.h> -#include <linux/irq.h> - -#include <asm/processor.h> - /* * No irq_cpustat_t for IA-64. The data is held in the per-CPU data structure. */ @@ -20,6 +14,11 @@ #define local_softirq_pending() (local_cpu_data->softirq_pending) +#include <linux/threads.h> +#include <linux/irq.h> + +#include <asm/processor.h> + extern void __iomem *ipi_base_addr; void ack_bad_irq(unsigned int irq); diff --git a/arch/ia64/include/asm/system.h b/arch/ia64/include/asm/system.h index 9f342a574ce..dd028f2b13b 100644 --- a/arch/ia64/include/asm/system.h +++ b/arch/ia64/include/asm/system.h @@ -272,10 +272,6 @@ void cpu_idle_wait(void); void default_idle(void); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING -extern void account_system_vtime(struct task_struct *); -#endif - #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ diff --git a/arch/m32r/include/asm/elf.h b/arch/m32r/include/asm/elf.h index 2f85412ef73..b8da7d0574d 100644 --- a/arch/m32r/include/asm/elf.h +++ b/arch/m32r/include/asm/elf.h @@ -82,9 +82,9 @@ typedef elf_fpreg_t elf_fpregset_t; * These are used to set parameters in the core dumps. */ #define ELF_CLASS ELFCLASS32 -#if defined(__LITTLE_ENDIAN) +#if defined(__LITTLE_ENDIAN__) #define ELF_DATA ELFDATA2LSB -#elif defined(__BIG_ENDIAN) +#elif defined(__BIG_ENDIAN__) #define ELF_DATA ELFDATA2MSB #else #error no endian defined diff --git a/arch/m32r/include/asm/signal.h b/arch/m32r/include/asm/signal.h index 9c1acb2b1a9..b2eeb0de1c8 100644 --- a/arch/m32r/include/asm/signal.h +++ b/arch/m32r/include/asm/signal.h @@ -157,7 +157,6 @@ typedef struct sigaltstack { #undef __HAVE_ARCH_SIG_BITOPS struct pt_regs; -extern int do_signal(struct pt_regs *regs, sigset_t *oldset); #define ptrace_signal_deliver(regs, cookie) do { } while (0) diff --git a/arch/m32r/include/asm/unistd.h b/arch/m32r/include/asm/unistd.h index 76125777483..c70545689da 100644 --- a/arch/m32r/include/asm/unistd.h +++ b/arch/m32r/include/asm/unistd.h @@ -351,6 +351,7 @@ #define __ARCH_WANT_SYS_OLD_GETRLIMIT /*will be unused*/ #define __ARCH_WANT_SYS_OLDUMOUNT #define __ARCH_WANT_SYS_RT_SIGACTION +#define __ARCH_WANT_SYS_RT_SIGSUSPEND #define __IGNORE_lchown #define __IGNORE_setuid diff --git a/arch/m32r/kernel/.gitignore b/arch/m32r/kernel/.gitignore new file mode 100644 index 00000000000..c5f676c3c22 --- /dev/null +++ b/arch/m32r/kernel/.gitignore @@ -0,0 +1 @@ +vmlinux.lds diff --git a/arch/m32r/kernel/entry.S b/arch/m32r/kernel/entry.S index 403869833b9..225412bc227 100644 --- a/arch/m32r/kernel/entry.S +++ b/arch/m32r/kernel/entry.S @@ -235,10 +235,9 @@ work_resched: work_notifysig: ; deal with pending signals and ; notify-resume requests mv r0, sp ; arg1 : struct pt_regs *regs - ldi r1, #0 ; arg2 : sigset_t *oldset - mv r2, r9 ; arg3 : __u32 thread_info_flags + mv r1, r9 ; arg2 : __u32 thread_info_flags bl do_notify_resume - bra restore_all + bra resume_userspace ; perform syscall exit tracing ALIGN diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c index e555091eb97..0021ade4cba 100644 --- a/arch/m32r/kernel/ptrace.c +++ b/arch/m32r/kernel/ptrace.c @@ -592,16 +592,17 @@ void user_enable_single_step(struct task_struct *child) if (access_process_vm(child, pc&~3, &insn, sizeof(insn), 0) != sizeof(insn)) - break; + return -EIO; compute_next_pc(insn, pc, &next_pc, child); if (next_pc & 0x80000000) - break; + return -EIO; if (embed_debug_trap(child, next_pc)) - break; + return -EIO; invalidate_cache(); + return 0; } void user_disable_single_step(struct task_struct *child) diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c index 144b0f124fc..a08697f0886 100644 --- a/arch/m32r/kernel/signal.c +++ b/arch/m32r/kernel/signal.c @@ -30,35 +30,6 @@ #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) -int do_signal(struct pt_regs *, sigset_t *); - -asmlinkage int -sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize, - unsigned long r2, unsigned long r3, unsigned long r4, - unsigned long r5, unsigned long r6, struct pt_regs *regs) -{ - sigset_t newset; - - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (sigsetsize != sizeof(sigset_t)) - return -EINVAL; - - if (copy_from_user(&newset, unewset, sizeof(newset))) - return -EFAULT; - sigdelsetmask(&newset, sigmask(SIGKILL)|sigmask(SIGSTOP)); - - spin_lock_irq(¤t->sighand->siglock); - current->saved_sigmask = current->blocked; - current->blocked = newset; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - - current->state = TASK_INTERRUPTIBLE; - schedule(); - set_thread_flag(TIF_RESTORE_SIGMASK); - return -ERESTARTNOHAND; -} - asmlinkage int sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, unsigned long r2, unsigned long r3, unsigned long r4, @@ -218,7 +189,7 @@ get_sigframe(struct k_sigaction *ka, unsigned long sp, size_t frame_size) return (void __user *)((sp - frame_size) & -8ul); } -static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *regs) { struct rt_sigframe __user *frame; @@ -275,22 +246,34 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, current->comm, current->pid, frame, regs->pc); #endif - return; + return 0; give_sigsegv: force_sigsegv(sig, current); + return -EFAULT; +} + +static int prev_insn(struct pt_regs *regs) +{ + u16 inst; + if (get_user(inst, (u16 __user *)(regs->bpc - 2))) + return -EFAULT; + if ((inst & 0xfff0) == 0x10f0) /* trap ? */ + regs->bpc -= 2; + else + regs->bpc -= 4; + regs->syscall_nr = -1; + return 0; } /* * OK, we're invoking a handler */ -static void +static int handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) { - unsigned short inst; - /* Are we from a system call? */ if (regs->syscall_nr >= 0) { /* If so, check system call restarting.. */ @@ -308,16 +291,14 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, /* fallthrough */ case -ERESTARTNOINTR: regs->r0 = regs->orig_r0; - inst = *(unsigned short *)(regs->bpc - 2); - if ((inst & 0xfff0) == 0x10f0) /* trap ? */ - regs->bpc -= 2; - else - regs->bpc -= 4; + if (prev_insn(regs) < 0) + return -EFAULT; } } /* Set up the stack frame */ - setup_rt_frame(sig, ka, info, oldset, regs); + if (setup_rt_frame(sig, ka, info, oldset, regs)) + return -EFAULT; spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); @@ -325,6 +306,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigaddset(¤t->blocked,sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); + return 0; } /* @@ -332,12 +314,12 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, * want to handle. Thus you cannot kill init even with a SIGKILL even by * mistake. */ -int do_signal(struct pt_regs *regs, sigset_t *oldset) +static void do_signal(struct pt_regs *regs) { siginfo_t info; int signr; struct k_sigaction ka; - unsigned short inst; + sigset_t *oldset; /* * We want the common case to go fast, which @@ -346,12 +328,14 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) * if so. */ if (!user_mode(regs)) - return 1; + return; if (try_to_freeze()) goto no_signal; - if (!oldset) + if (test_thread_flag(TIF_RESTORE_SIGMASK)) + oldset = ¤t->saved_sigmask; + else oldset = ¤t->blocked; signr = get_signal_to_deliver(&info, &ka, regs, NULL); @@ -363,8 +347,10 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) */ /* Whee! Actually deliver the signal. */ - handle_signal(signr, &ka, &info, oldset, regs); - return 1; + if (handle_signal(signr, &ka, &info, oldset, regs) == 0) + clear_thread_flag(TIF_RESTORE_SIGMASK); + + return; } no_signal: @@ -375,31 +361,24 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) regs->r0 == -ERESTARTSYS || regs->r0 == -ERESTARTNOINTR) { regs->r0 = regs->orig_r0; - inst = *(unsigned short *)(regs->bpc - 2); - if ((inst & 0xfff0) == 0x10f0) /* trap ? */ - regs->bpc -= 2; - else - regs->bpc -= 4; - } - if (regs->r0 == -ERESTART_RESTARTBLOCK){ + prev_insn(regs); + } else if (regs->r0 == -ERESTART_RESTARTBLOCK){ regs->r0 = regs->orig_r0; regs->r7 = __NR_restart_syscall; - inst = *(unsigned short *)(regs->bpc - 2); - if ((inst & 0xfff0) == 0x10f0) /* trap ? */ - regs->bpc -= 2; - else - regs->bpc -= 4; + prev_insn(regs); } } - return 0; + if (test_thread_flag(TIF_RESTORE_SIGMASK)) { + clear_thread_flag(TIF_RESTORE_SIGMASK); + sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); + } } /* * notification of userspace execution resumption * - triggered by current->work.notify_resume */ -void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, - __u32 thread_info_flags) +void do_notify_resume(struct pt_regs *regs, __u32 thread_info_flags) { /* Pending single-step? */ if (thread_info_flags & _TIF_SINGLESTEP) @@ -407,7 +386,7 @@ void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, /* deal with pending signal delivery */ if (thread_info_flags & _TIF_SIGPENDING) - do_signal(regs,oldset); + do_signal(regs); if (thread_info_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); diff --git a/arch/m68k/mac/macboing.c b/arch/m68k/mac/macboing.c index 8f0640847ad..05285d08e54 100644 --- a/arch/m68k/mac/macboing.c +++ b/arch/m68k/mac/macboing.c @@ -162,7 +162,7 @@ static void mac_init_asc( void ) void mac_mksound( unsigned int freq, unsigned int length ) { __u32 cfreq = ( freq << 5 ) / 468; - __u32 flags; + unsigned long flags; int i; if ( mac_special_bell == NULL ) @@ -224,7 +224,7 @@ static void mac_nosound( unsigned long ignored ) */ static void mac_quadra_start_bell( unsigned int freq, unsigned int length, unsigned int volume ) { - __u32 flags; + unsigned long flags; /* if the bell is already ringing, ring longer */ if ( mac_bell_duration > 0 ) @@ -271,7 +271,7 @@ static void mac_quadra_start_bell( unsigned int freq, unsigned int length, unsig static void mac_quadra_ring_bell( unsigned long ignored ) { int i, count = mac_asc_samplespersec / HZ; - __u32 flags; + unsigned long flags; /* * we neither want a sound buffer overflow nor underflow, so we need to match diff --git a/arch/mips/Kbuild b/arch/mips/Kbuild index e322d65f33a..7dd65cfae83 100644 --- a/arch/mips/Kbuild +++ b/arch/mips/Kbuild @@ -7,6 +7,10 @@ subdir-ccflags-y := -Werror include arch/mips/Kbuild.platforms obj-y := $(platform-y) +# make clean traverses $(obj-) without having included .config, so +# everything ends up here +obj- := $(platform-) + # mips object files # The object files are linked as core-y files would be linked diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 3ad59dde485..4c9f402295d 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -13,6 +13,7 @@ config MIPS select HAVE_KPROBES select HAVE_KRETPROBES select RTC_LIB if !MACH_LOONGSON + select GENERIC_ATOMIC64 if !64BIT mainmenu "Linux/MIPS Kernel Configuration" @@ -880,11 +881,15 @@ config NO_IOPORT config GENERIC_ISA_DMA bool select ZONE_DMA if GENERIC_ISA_DMA_SUPPORT_BROKEN=n + select ISA_DMA_API config GENERIC_ISA_DMA_SUPPORT_BROKEN bool select GENERIC_ISA_DMA +config ISA_DMA_API + bool + config GENERIC_GPIO bool @@ -1646,8 +1651,16 @@ config MIPS_MT_SMP select SYS_SUPPORTS_SMP select SMP_UP help - This is a kernel model which is also known a VSMP or lately - has been marketesed into SMVP. + This is a kernel model which is known a VSMP but lately has been + marketesed into SMVP. + Virtual SMP uses the processor's VPEs to implement virtual + processors. In currently available configuration of the 34K processor + this allows for a dual processor. Both processors will share the same + primary caches; each will obtain the half of the TLB for it's own + exclusive use. For a layman this model can be described as similar to + what Intel calls Hyperthreading. + + For further information see http://www.linux-mips.org/wiki/34K#VSMP config MIPS_MT_SMTC bool "SMTC: Use all TCs on all VPEs for SMP" @@ -1664,6 +1677,14 @@ config MIPS_MT_SMTC help This is a kernel model which is known a SMTC or lately has been marketesed into SMVP. + is presenting the available TC's of the core as processors to Linux. + On currently available 34K processors this means a Linux system will + see up to 5 processors. The implementation of the SMTC kernel differs + significantly from VSMP and cannot efficiently coexist in the same + kernel binary so the choice between VSMP and SMTC is a compile time + decision. + + For further information see http://www.linux-mips.org/wiki/34K#SMTC endchoice diff --git a/arch/mips/alchemy/common/prom.c b/arch/mips/alchemy/common/prom.c index c29511b11d4..53402105962 100644 --- a/arch/mips/alchemy/common/prom.c +++ b/arch/mips/alchemy/common/prom.c @@ -43,7 +43,7 @@ int prom_argc; char **prom_argv; char **prom_envp; -void prom_init_cmdline(void) +void __init prom_init_cmdline(void) { int i; @@ -104,7 +104,7 @@ static inline void str2eaddr(unsigned char *ea, unsigned char *str) } } -int prom_get_ethernet_addr(char *ethernet_addr) +int __init prom_get_ethernet_addr(char *ethernet_addr) { char *ethaddr_str; @@ -123,7 +123,6 @@ int prom_get_ethernet_addr(char *ethernet_addr) return 0; } -EXPORT_SYMBOL(prom_get_ethernet_addr); void __init prom_free_prom_memory(void) { diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index ed9bb709c9a..5042d51b051 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile @@ -59,7 +59,7 @@ $(obj)/piggy.o: $(obj)/dummy.o $(obj)/vmlinux.bin.z FORCE hostprogs-y := calc_vmlinuz_load_addr VMLINUZ_LOAD_ADDRESS = $(shell $(obj)/calc_vmlinuz_load_addr \ - $(objtree)/$(KBUILD_IMAGE) $(VMLINUX_LOAD_ADDRESS)) + $(obj)/vmlinux.bin $(VMLINUX_LOAD_ADDRESS)) vmlinuzobjs-y += $(obj)/piggy.o @@ -105,4 +105,4 @@ OBJCOPYFLAGS_vmlinuz.srec := $(OBJCOPYFLAGS) -S -O srec vmlinuz.srec: vmlinuz $(call cmd,objcopy) -clean-files := $(objtree)/vmlinuz.* +clean-files := $(objtree)/vmlinuz $(objtree)/vmlinuz.{32,ecoff,bin,srec} diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig index 094c17e38e1..47323ca452d 100644 --- a/arch/mips/cavium-octeon/Kconfig +++ b/arch/mips/cavium-octeon/Kconfig @@ -83,3 +83,7 @@ config ARCH_SPARSEMEM_ENABLE def_bool y select SPARSEMEM_STATIC depends on CPU_CAVIUM_OCTEON + +config CAVIUM_OCTEON_HELPER + def_bool y + depends on OCTEON_ETHERNET || PCI diff --git a/arch/mips/cavium-octeon/cpu.c b/arch/mips/cavium-octeon/cpu.c index c664c8cc2b4..a5b427909b5 100644 --- a/arch/mips/cavium-octeon/cpu.c +++ b/arch/mips/cavium-octeon/cpu.c @@ -41,7 +41,7 @@ static int cnmips_cu2_call(struct notifier_block *nfb, unsigned long action, return NOTIFY_OK; /* Let default notifier send signals */ } -static int cnmips_cu2_setup(void) +static int __init cnmips_cu2_setup(void) { return cu2_notifier(cnmips_cu2_call, 0); } diff --git a/arch/mips/cavium-octeon/executive/Makefile b/arch/mips/cavium-octeon/executive/Makefile index 2fd66db6939..7f41c5be219 100644 --- a/arch/mips/cavium-octeon/executive/Makefile +++ b/arch/mips/cavium-octeon/executive/Makefile @@ -11,4 +11,4 @@ obj-y += cvmx-bootmem.o cvmx-l2c.o cvmx-sysinfo.o octeon-model.o -obj-$(CONFIG_PCI) += cvmx-helper-errata.o cvmx-helper-jtag.o +obj-$(CONFIG_CAVIUM_OCTEON_HELPER) += cvmx-helper-errata.o cvmx-helper-jtag.o diff --git a/arch/mips/dec/Platform b/arch/mips/dec/Platform index 3adbcbd95db..cf55a6f4e72 100644 --- a/arch/mips/dec/Platform +++ b/arch/mips/dec/Platform @@ -1,7 +1,7 @@ # # DECstation family # -platform-$(CONFIG_MACH_DECSTATION) = dec/ +platform-$(CONFIG_MACH_DECSTATION) += dec/ cflags-$(CONFIG_MACH_DECSTATION) += \ -I$(srctree)/arch/mips/include/asm/mach-dec libs-$(CONFIG_MACH_DECSTATION) += arch/mips/dec/prom/ diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index c63c56bfd18..47d87da379f 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -782,6 +782,10 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) */ #define atomic64_add_negative(i, v) (atomic64_add_return(i, (v)) < 0) +#else /* !CONFIG_64BIT */ + +#include <asm-generic/atomic64.h> + #endif /* CONFIG_64BIT */ /* diff --git a/arch/mips/include/asm/cop2.h b/arch/mips/include/asm/cop2.h index 2cb2f0c2c4f..3532e2c5f09 100644 --- a/arch/mips/include/asm/cop2.h +++ b/arch/mips/include/asm/cop2.h @@ -24,7 +24,7 @@ extern int cu2_notifier_call_chain(unsigned long val, void *v); #define cu2_notifier(fn, pri) \ ({ \ - static struct notifier_block fn##_nb __cpuinitdata = { \ + static struct notifier_block fn##_nb = { \ .notifier_call = fn, \ .priority = pri \ }; \ diff --git a/arch/mips/include/asm/fcntl.h b/arch/mips/include/asm/fcntl.h index e482fe90fe8..75eddedcfc3 100644 --- a/arch/mips/include/asm/fcntl.h +++ b/arch/mips/include/asm/fcntl.h @@ -56,6 +56,7 @@ */ #ifdef CONFIG_32BIT +#include <linux/types.h> struct flock { short l_type; diff --git a/arch/mips/include/asm/gic.h b/arch/mips/include/asm/gic.h index 9b9436a4d81..86548da650e 100644 --- a/arch/mips/include/asm/gic.h +++ b/arch/mips/include/asm/gic.h @@ -321,6 +321,7 @@ struct gic_intrmask_regs { */ struct gic_intr_map { unsigned int cpunum; /* Directed to this CPU */ +#define GIC_UNUSED 0xdead /* Dummy data */ unsigned int pin; /* Directed to this Pin */ unsigned int polarity; /* Polarity : +/- */ unsigned int trigtype; /* Trigger : Edge/Levl */ diff --git a/arch/mips/include/asm/mach-tx49xx/kmalloc.h b/arch/mips/include/asm/mach-tx49xx/kmalloc.h index b74caf65482..ff9a8b86cb9 100644 --- a/arch/mips/include/asm/mach-tx49xx/kmalloc.h +++ b/arch/mips/include/asm/mach-tx49xx/kmalloc.h @@ -1,6 +1,6 @@ #ifndef __ASM_MACH_TX49XX_KMALLOC_H #define __ASM_MACH_TX49XX_KMALLOC_H -#define ARCH_KMALLOC_MINALIGN L1_CACHE_BYTES +#define ARCH_DMA_MINALIGN L1_CACHE_BYTES #endif /* __ASM_MACH_TX49XX_KMALLOC_H */ diff --git a/arch/mips/include/asm/mips-boards/maltaint.h b/arch/mips/include/asm/mips-boards/maltaint.h index cea872fc6f5..d11aa02a956 100644 --- a/arch/mips/include/asm/mips-boards/maltaint.h +++ b/arch/mips/include/asm/mips-boards/maltaint.h @@ -88,9 +88,6 @@ #define GIC_EXT_INTR(x) x -/* Dummy data */ -#define X 0xdead - /* External Interrupts used for IPI */ #define GIC_IPI_EXT_INTR_RESCHED_VPE0 16 #define GIC_IPI_EXT_INTR_CALLFNC_VPE0 17 diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h index a16beafcea9..e59cd1ac09c 100644 --- a/arch/mips/include/asm/page.h +++ b/arch/mips/include/asm/page.h @@ -150,6 +150,20 @@ typedef struct { unsigned long pgprot; } pgprot_t; ((unsigned long)(x) - PAGE_OFFSET + PHYS_OFFSET) #endif #define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET - PHYS_OFFSET)) + +/* + * RELOC_HIDE was originally added by 6007b903dfe5f1d13e0c711ac2894bdd4a61b1ad + * (lmo) rsp. 8431fd094d625b94d364fe393076ccef88e6ce18 (kernel.org). The + * discussion can be found in lkml posting + * <a2ebde260608230500o3407b108hc03debb9da6e62c@mail.gmail.com> which is + * archived at http://lists.linuxcoding.com/kernel/2006-q3/msg17360.html + * + * It is unclear if the misscompilations mentioned in + * http://lkml.org/lkml/2010/8/8/138 also affect MIPS so we keep this one + * until GCC 3.x has been retired before we can apply + * https://patchwork.linux-mips.org/patch/1541/ + */ + #define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x), 0)) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) diff --git a/arch/mips/include/asm/siginfo.h b/arch/mips/include/asm/siginfo.h index 96e28f18dad..1ca64b4d33d 100644 --- a/arch/mips/include/asm/siginfo.h +++ b/arch/mips/include/asm/siginfo.h @@ -88,6 +88,7 @@ typedef struct siginfo { #ifdef __ARCH_SI_TRAPNO int _trapno; /* TRAP # which caused the signal */ #endif + short _addr_lsb; } _sigfault; /* SIGPOLL, SIGXFSZ (To do ...) */ diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index 2376f2e06e4..70df9c0d3c5 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -146,7 +146,8 @@ register struct thread_info *__current_thread_info __asm__("$28"); #define _TIF_LOAD_WATCH (1<<TIF_LOAD_WATCH) /* work to do on interrupt/exception return */ -#define _TIF_WORK_MASK (0x0000ffef & ~_TIF_SECCOMP) +#define _TIF_WORK_MASK (0x0000ffef & \ + ~(_TIF_SECCOMP | _TIF_SYSCALL_AUDIT)) /* work to do on any return to u-space */ #define _TIF_ALLWORK_MASK (0x8000ffff & ~_TIF_SECCOMP) diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h index baa318a59c9..550725b881d 100644 --- a/arch/mips/include/asm/unistd.h +++ b/arch/mips/include/asm/unistd.h @@ -356,16 +356,19 @@ #define __NR_perf_event_open (__NR_Linux + 333) #define __NR_accept4 (__NR_Linux + 334) #define __NR_recvmmsg (__NR_Linux + 335) +#define __NR_fanotify_init (__NR_Linux + 336) +#define __NR_fanotify_mark (__NR_Linux + 337) +#define __NR_prlimit64 (__NR_Linux + 338) /* * Offset of the last Linux o32 flavoured syscall */ -#define __NR_Linux_syscalls 335 +#define __NR_Linux_syscalls 338 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */ #define __NR_O32_Linux 4000 -#define __NR_O32_Linux_syscalls 335 +#define __NR_O32_Linux_syscalls 338 #if _MIPS_SIM == _MIPS_SIM_ABI64 @@ -668,16 +671,19 @@ #define __NR_perf_event_open (__NR_Linux + 292) #define __NR_accept4 (__NR_Linux + 293) #define __NR_recvmmsg (__NR_Linux + 294) +#define __NR_fanotify_init (__NR_Linux + 295) +#define __NR_fanotify_mark (__NR_Linux + 296) +#define __NR_prlimit64 (__NR_Linux + 297) /* * Offset of the last Linux 64-bit flavoured syscall */ -#define __NR_Linux_syscalls 294 +#define __NR_Linux_syscalls 297 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */ #define __NR_64_Linux 5000 -#define __NR_64_Linux_syscalls 294 +#define __NR_64_Linux_syscalls 297 #if _MIPS_SIM == _MIPS_SIM_NABI32 @@ -985,16 +991,19 @@ #define __NR_accept4 (__NR_Linux + 297) #define __NR_recvmmsg (__NR_Linux + 298) #define __NR_getdents64 (__NR_Linux + 299) +#define __NR_fanotify_init (__NR_Linux + 300) +#define __NR_fanotify_mark (__NR_Linux + 301) +#define __NR_prlimit64 (__NR_Linux + 302) /* * Offset of the last N32 flavoured syscall */ -#define __NR_Linux_syscalls 299 +#define __NR_Linux_syscalls 302 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */ #define __NR_N32_Linux 6000 -#define __NR_N32_Linux_syscalls 299 +#define __NR_N32_Linux_syscalls 302 #ifdef __KERNEL__ diff --git a/arch/mips/jz4740/Platform b/arch/mips/jz4740/Platform index 6a97230e3d0..ba91be9c21e 100644 --- a/arch/mips/jz4740/Platform +++ b/arch/mips/jz4740/Platform @@ -1,3 +1,3 @@ -core-$(CONFIG_MACH_JZ4740) += arch/mips/jz4740/ +platform-$(CONFIG_MACH_JZ4740) += jz4740/ cflags-$(CONFIG_MACH_JZ4740) += -I$(srctree)/arch/mips/include/asm/mach-jz4740 load-$(CONFIG_MACH_JZ4740) += 0xffffffff80010000 diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c index 0176ed015c8..32103cc2a25 100644 --- a/arch/mips/kernel/branch.c +++ b/arch/mips/kernel/branch.c @@ -40,7 +40,6 @@ int __compute_return_epc(struct pt_regs *regs) return -EFAULT; } - regs->regs[0] = 0; switch (insn.i_format.opcode) { /* * jr and jalr are in r_format format. diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c index b181f2f0ea8..82ba9f62f49 100644 --- a/arch/mips/kernel/irq-gic.c +++ b/arch/mips/kernel/irq-gic.c @@ -7,7 +7,6 @@ #include <asm/io.h> #include <asm/gic.h> #include <asm/gcmpregs.h> -#include <asm/mips-boards/maltaint.h> #include <asm/irq.h> #include <linux/hardirq.h> #include <asm-generic/bitops/find.h> @@ -131,7 +130,7 @@ static int gic_set_affinity(unsigned int irq, const struct cpumask *cpumask) int i; irq -= _irqbase; - pr_debug(KERN_DEBUG "%s(%d) called\n", __func__, irq); + pr_debug("%s(%d) called\n", __func__, irq); cpumask_and(&tmp, cpumask, cpu_online_mask); if (cpus_empty(tmp)) return -1; @@ -222,7 +221,7 @@ static void __init gic_basic_init(int numintrs, int numvpes, /* Setup specifics */ for (i = 0; i < mapsize; i++) { cpu = intrmap[i].cpunum; - if (cpu == X) + if (cpu == GIC_UNUSED) continue; if (cpu == 0 && i != 0 && intrmap[i].flags == 0) continue; diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c index 1f4e2fa6414..f4546e97c60 100644 --- a/arch/mips/kernel/kgdb.c +++ b/arch/mips/kernel/kgdb.c @@ -283,7 +283,7 @@ static int kgdb_mips_notify(struct notifier_block *self, unsigned long cmd, struct pt_regs *regs = args->regs; int trap = (regs->cp0_cause & 0x7c) >> 2; - /* Userpace events, ignore. */ + /* Userspace events, ignore. */ if (user_mode(regs)) return NOTIFY_DONE; diff --git a/arch/mips/kernel/kspd.c b/arch/mips/kernel/kspd.c index 80e2ba694ba..29811f04339 100644 --- a/arch/mips/kernel/kspd.c +++ b/arch/mips/kernel/kspd.c @@ -251,7 +251,7 @@ void sp_work_handle_request(void) memset(&tz, 0, sizeof(tz)); if ((ret.retval = sp_syscall(__NR_gettimeofday, (int)&tv, (int)&tz, 0, 0)) == 0) - ret.retval = tv.tv_sec; + ret.retval = tv.tv_sec; break; case MTSP_SYSCALL_EXIT: diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index c2dab140dc9..6343b4a5b83 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -341,3 +341,10 @@ asmlinkage long sys32_lookup_dcookie(u32 a0, u32 a1, char __user *buf, { return sys_lookup_dcookie(merge_64(a0, a1), buf, len); } + +SYSCALL_DEFINE6(32_fanotify_mark, int, fanotify_fd, unsigned int, flags, + u64, a3, u64, a4, int, dfd, const char __user *, pathname) +{ + return sys_fanotify_mark(fanotify_fd, flags, merge_64(a3, a4), + dfd, pathname); +} diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c index 2340f11dc29..9a526ba6f25 100644 --- a/arch/mips/kernel/mips-mt-fpaff.c +++ b/arch/mips/kernel/mips-mt-fpaff.c @@ -103,7 +103,7 @@ asmlinkage long mipsmt_sys_sched_setaffinity(pid_t pid, unsigned int len, if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) goto out_unlock; - retval = security_task_setscheduler(p, 0, NULL); + retval = security_task_setscheduler(p) if (retval) goto out_unlock; diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index c51b95ff864..c8777333e19 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -536,7 +536,7 @@ asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit) { /* do the secure computing check first */ if (!entryexit) - secure_computing(regs->regs[0]); + secure_computing(regs->regs[2]); if (unlikely(current->audit_context) && entryexit) audit_syscall_exit(AUDITSC_RESULT(regs->regs[2]), @@ -565,7 +565,7 @@ asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit) out: if (unlikely(current->audit_context) && !entryexit) - audit_syscall_entry(audit_arch(), regs->regs[0], + audit_syscall_entry(audit_arch(), regs->regs[2], regs->regs[4], regs->regs[5], regs->regs[6], regs->regs[7]); } diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 17202bbe843..fbaabad0e6e 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -63,9 +63,9 @@ stack_done: sw t0, PT_R7(sp) # set error flag beqz t0, 1f + lw t1, PT_R2(sp) # syscall number negu v0 # error - sw v0, PT_R0(sp) # set flag for syscall - # restarting + sw t1, PT_R0(sp) # save it for syscall restarting 1: sw v0, PT_R2(sp) # result o32_syscall_exit: @@ -104,9 +104,9 @@ syscall_trace_entry: sw t0, PT_R7(sp) # set error flag beqz t0, 1f + lw t1, PT_R2(sp) # syscall number negu v0 # error - sw v0, PT_R0(sp) # set flag for syscall - # restarting + sw t1, PT_R0(sp) # save it for syscall restarting 1: sw v0, PT_R2(sp) # result j syscall_exit @@ -169,8 +169,7 @@ stackargs: * We probably should handle this case a bit more drastic. */ bad_stack: - negu v0 # error - sw v0, PT_R0(sp) + li v0, EFAULT sw v0, PT_R2(sp) li t0, 1 # set error flag sw t0, PT_R7(sp) @@ -583,7 +582,10 @@ einval: li v0, -ENOSYS sys sys_rt_tgsigqueueinfo 4 sys sys_perf_event_open 5 sys sys_accept4 4 - sys sys_recvmmsg 5 + sys sys_recvmmsg 5 /* 4335 */ + sys sys_fanotify_init 2 + sys sys_fanotify_mark 6 + sys sys_prlimit64 4 .endm /* We pre-compute the number of _instruction_ bytes needed to diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index a8a6c596eb0..3f417928320 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -66,9 +66,9 @@ NESTED(handle_sys64, PT_SIZE, sp) sd t0, PT_R7(sp) # set error flag beqz t0, 1f + ld t1, PT_R2(sp) # syscall number dnegu v0 # error - sd v0, PT_R0(sp) # set flag for syscall - # restarting + sd t1, PT_R0(sp) # save it for syscall restarting 1: sd v0, PT_R2(sp) # result n64_syscall_exit: @@ -109,8 +109,9 @@ syscall_trace_entry: sd t0, PT_R7(sp) # set error flag beqz t0, 1f + ld t1, PT_R2(sp) # syscall number dnegu v0 # error - sd v0, PT_R0(sp) # set flag for syscall restarting + sd t1, PT_R0(sp) # save it for syscall restarting 1: sd v0, PT_R2(sp) # result j syscall_exit @@ -416,9 +417,12 @@ sys_call_table: PTR sys_pipe2 PTR sys_inotify_init1 PTR sys_preadv - PTR sys_pwritev /* 5390 */ + PTR sys_pwritev /* 5290 */ PTR sys_rt_tgsigqueueinfo PTR sys_perf_event_open PTR sys_accept4 - PTR sys_recvmmsg + PTR sys_recvmmsg + PTR sys_fanotify_init /* 5295 */ + PTR sys_fanotify_mark + PTR sys_prlimit64 .size sys_call_table,.-sys_call_table diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index a3d66137731..f08ece6d8ac 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -65,8 +65,9 @@ NESTED(handle_sysn32, PT_SIZE, sp) sd t0, PT_R7(sp) # set error flag beqz t0, 1f + ld t1, PT_R2(sp) # syscall number dnegu v0 # error - sd v0, PT_R0(sp) # set flag for syscall restarting + sd t1, PT_R0(sp) # save it for syscall restarting 1: sd v0, PT_R2(sp) # result local_irq_disable # make sure need_resched and @@ -106,8 +107,9 @@ n32_syscall_trace_entry: sd t0, PT_R7(sp) # set error flag beqz t0, 1f + ld t1, PT_R2(sp) # syscall number dnegu v0 # error - sd v0, PT_R0(sp) # set flag for syscall restarting + sd t1, PT_R0(sp) # save it for syscall restarting 1: sd v0, PT_R2(sp) # result j syscall_exit @@ -320,10 +322,10 @@ EXPORT(sysn32_call_table) PTR sys_cacheflush PTR sys_cachectl PTR sys_sysmips - PTR sys_io_setup /* 6200 */ + PTR compat_sys_io_setup /* 6200 */ PTR sys_io_destroy - PTR sys_io_getevents - PTR sys_io_submit + PTR compat_sys_io_getevents + PTR compat_sys_io_submit PTR sys_io_cancel PTR sys_exit_group /* 6205 */ PTR sys_lookup_dcookie @@ -419,5 +421,8 @@ EXPORT(sysn32_call_table) PTR sys_perf_event_open PTR sys_accept4 PTR compat_sys_recvmmsg - PTR sys_getdents + PTR sys_getdents64 + PTR sys_fanotify_init /* 6300 */ + PTR sys_fanotify_mark + PTR sys_prlimit64 .size sysn32_call_table,.-sysn32_call_table diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 813689ef238..78d768a3e19 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -93,8 +93,9 @@ NESTED(handle_sys, PT_SIZE, sp) sd t0, PT_R7(sp) # set error flag beqz t0, 1f + ld t1, PT_R2(sp) # syscall number dnegu v0 # error - sd v0, PT_R0(sp) # flag for syscall restarting + sd t1, PT_R0(sp) # save it for syscall restarting 1: sd v0, PT_R2(sp) # result o32_syscall_exit: @@ -142,8 +143,9 @@ trace_a_syscall: sd t0, PT_R7(sp) # set error flag beqz t0, 1f + ld t1, PT_R2(sp) # syscall number dnegu v0 # error - sd v0, PT_R0(sp) # set flag for syscall restarting + sd t1, PT_R0(sp) # save it for syscall restarting 1: sd v0, PT_R2(sp) # result j syscall_exit @@ -154,8 +156,7 @@ trace_a_syscall: * The stackpointer for a call with more than 4 arguments is bad. */ bad_stack: - dnegu v0 # error - sd v0, PT_R0(sp) + li v0, EFAULT sd v0, PT_R2(sp) li t0, 1 # set error flag sd t0, PT_R7(sp) @@ -444,10 +445,10 @@ sys_call_table: PTR compat_sys_futex PTR compat_sys_sched_setaffinity PTR compat_sys_sched_getaffinity /* 4240 */ - PTR sys_io_setup + PTR compat_sys_io_setup PTR sys_io_destroy - PTR sys_io_getevents - PTR sys_io_submit + PTR compat_sys_io_getevents + PTR compat_sys_io_submit PTR sys_io_cancel /* 4245 */ PTR sys_exit_group PTR sys32_lookup_dcookie @@ -538,5 +539,8 @@ sys_call_table: PTR compat_sys_rt_tgsigqueueinfo PTR sys_perf_event_open PTR sys_accept4 - PTR compat_sys_recvmmsg + PTR compat_sys_recvmmsg /* 4335 */ + PTR sys_fanotify_init + PTR sys_32_fanotify_mark + PTR sys_prlimit64 .size sys_call_table,.-sys_call_table diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c index 2099d5a4c4b..5922342bca3 100644 --- a/arch/mips/kernel/signal.c +++ b/arch/mips/kernel/signal.c @@ -390,7 +390,6 @@ asmlinkage void sys_rt_sigreturn(nabi_no_regargs struct pt_regs regs) { struct rt_sigframe __user *frame; sigset_t set; - stack_t st; int sig; frame = (struct rt_sigframe __user *) regs.regs[29]; @@ -411,11 +410,9 @@ asmlinkage void sys_rt_sigreturn(nabi_no_regargs struct pt_regs regs) else if (sig) force_sig(sig, current); - if (__copy_from_user(&st, &frame->rs_uc.uc_stack, sizeof(st))) - goto badframe; /* It is more difficult to avoid calling this function than to call it and ignore errors. */ - do_sigaltstack((stack_t __user *)&st, NULL, regs.regs[29]); + do_sigaltstack(&frame->rs_uc.uc_stack, NULL, regs.regs[29]); /* * Don't let your children do this ... @@ -550,23 +547,26 @@ static int handle_signal(unsigned long sig, siginfo_t *info, struct mips_abi *abi = current->thread.abi; void *vdso = current->mm->context.vdso; - switch(regs->regs[0]) { - case ERESTART_RESTARTBLOCK: - case ERESTARTNOHAND: - regs->regs[2] = EINTR; - break; - case ERESTARTSYS: - if (!(ka->sa.sa_flags & SA_RESTART)) { + if (regs->regs[0]) { + switch(regs->regs[2]) { + case ERESTART_RESTARTBLOCK: + case ERESTARTNOHAND: regs->regs[2] = EINTR; break; + case ERESTARTSYS: + if (!(ka->sa.sa_flags & SA_RESTART)) { + regs->regs[2] = EINTR; + break; + } + /* fallthrough */ + case ERESTARTNOINTR: + regs->regs[7] = regs->regs[26]; + regs->regs[2] = regs->regs[0]; + regs->cp0_epc -= 4; } - /* fallthrough */ - case ERESTARTNOINTR: /* Userland will reload $v0. */ - regs->regs[7] = regs->regs[26]; - regs->cp0_epc -= 8; - } - regs->regs[0] = 0; /* Don't deal with this again. */ + regs->regs[0] = 0; /* Don't deal with this again. */ + } if (sig_uses_siginfo(ka)) ret = abi->setup_rt_frame(vdso + abi->rt_signal_return_offset, @@ -575,6 +575,9 @@ static int handle_signal(unsigned long sig, siginfo_t *info, ret = abi->setup_frame(vdso + abi->signal_return_offset, ka, regs, sig, oldset); + if (ret) + return ret; + spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); if (!(ka->sa.sa_flags & SA_NODEFER)) @@ -622,17 +625,13 @@ static void do_signal(struct pt_regs *regs) return; } - /* - * Who's code doesn't conform to the restartable syscall convention - * dies here!!! The li instruction, a single machine instruction, - * must directly be followed by the syscall instruction. - */ if (regs->regs[0]) { if (regs->regs[2] == ERESTARTNOHAND || regs->regs[2] == ERESTARTSYS || regs->regs[2] == ERESTARTNOINTR) { + regs->regs[2] = regs->regs[0]; regs->regs[7] = regs->regs[26]; - regs->cp0_epc -= 8; + regs->cp0_epc -= 4; } if (regs->regs[2] == ERESTART_RESTARTBLOCK) { regs->regs[2] = current->thread.abi->restart; diff --git a/arch/mips/kernel/signal_n32.c b/arch/mips/kernel/signal_n32.c index 2c5df818c65..ee24d814d5b 100644 --- a/arch/mips/kernel/signal_n32.c +++ b/arch/mips/kernel/signal_n32.c @@ -109,6 +109,7 @@ asmlinkage int sysn32_rt_sigsuspend(nabi_no_regargs struct pt_regs regs) asmlinkage void sysn32_rt_sigreturn(nabi_no_regargs struct pt_regs regs) { struct rt_sigframe_n32 __user *frame; + mm_segment_t old_fs; sigset_t set; stack_t st; s32 sp; @@ -143,7 +144,11 @@ asmlinkage void sysn32_rt_sigreturn(nabi_no_regargs struct pt_regs regs) /* It is more difficult to avoid calling this function than to call it and ignore errors. */ + old_fs = get_fs(); + set_fs(KERNEL_DS); do_sigaltstack((stack_t __user *)&st, NULL, regs.regs[29]); + set_fs(old_fs); + /* * Don't let your children do this ... diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c index 69b039ca8d8..33d5a5ce4a2 100644 --- a/arch/mips/kernel/unaligned.c +++ b/arch/mips/kernel/unaligned.c @@ -109,8 +109,6 @@ static void emulate_load_store_insn(struct pt_regs *regs, unsigned long value; unsigned int res; - regs->regs[0] = 0; - /* * This load never faults. */ diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index 7ba890860d9..469d4019f79 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -44,27 +44,39 @@ static inline int cpu_is_noncoherent_r10000(struct device *dev) static gfp_t massage_gfp_flags(const struct device *dev, gfp_t gfp) { + gfp_t dma_flag; + /* ignore region specifiers */ gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); -#ifdef CONFIG_ZONE_DMA +#ifdef CONFIG_ISA if (dev == NULL) - gfp |= __GFP_DMA; - else if (dev->coherent_dma_mask < DMA_BIT_MASK(24)) - gfp |= __GFP_DMA; + dma_flag = __GFP_DMA; else #endif -#ifdef CONFIG_ZONE_DMA32 +#if defined(CONFIG_ZONE_DMA32) && defined(CONFIG_ZONE_DMA) if (dev->coherent_dma_mask < DMA_BIT_MASK(32)) - gfp |= __GFP_DMA32; + dma_flag = __GFP_DMA; + else if (dev->coherent_dma_mask < DMA_BIT_MASK(64)) + dma_flag = __GFP_DMA32; + else +#endif +#if defined(CONFIG_ZONE_DMA32) && !defined(CONFIG_ZONE_DMA) + if (dev->coherent_dma_mask < DMA_BIT_MASK(64)) + dma_flag = __GFP_DMA32; + else +#endif +#if defined(CONFIG_ZONE_DMA) && !defined(CONFIG_ZONE_DMA32) + if (dev->coherent_dma_mask < DMA_BIT_MASK(64)) + dma_flag = __GFP_DMA; else #endif - ; + dma_flag = 0; /* Don't invoke OOM killer */ gfp |= __GFP_NORETRY; - return gfp; + return gfp | dma_flag; } void *dma_alloc_noncoherent(struct device *dev, size_t size, diff --git a/arch/mips/mm/sc-rm7k.c b/arch/mips/mm/sc-rm7k.c index 1ef75cd80a0..274af3be144 100644 --- a/arch/mips/mm/sc-rm7k.c +++ b/arch/mips/mm/sc-rm7k.c @@ -30,7 +30,7 @@ #define tc_lsize 32 extern unsigned long icache_way_size, dcache_way_size; -unsigned long tcache_size; +static unsigned long tcache_size; #include <asm/r4kcache.h> diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c index 15949b0be81..b79b24afe3a 100644 --- a/arch/mips/mti-malta/malta-int.c +++ b/arch/mips/mti-malta/malta-int.c @@ -385,6 +385,8 @@ static int __initdata msc_nr_eicirqs = ARRAY_SIZE(msc_eicirqmap); */ #define GIC_CPU_NMI GIC_MAP_TO_NMI_MSK +#define X GIC_UNUSED + static struct gic_intr_map gic_intr_map[GIC_NUM_INTRS] = { { X, X, X, X, 0 }, { X, X, X, X, 0 }, @@ -404,6 +406,7 @@ static struct gic_intr_map gic_intr_map[GIC_NUM_INTRS] = { { X, X, X, X, 0 }, /* The remainder of this table is initialised by fill_ipi_map */ }; +#undef X /* * GCMP needs to be detected before any SMP initialisation diff --git a/arch/mips/pci/pci-rc32434.c b/arch/mips/pci/pci-rc32434.c index 71f7d27b0d4..f31218e17d3 100644 --- a/arch/mips/pci/pci-rc32434.c +++ b/arch/mips/pci/pci-rc32434.c @@ -118,7 +118,7 @@ static int __init rc32434_pcibridge_init(void) if (!((pcicvalue == PCIM_H_EA) || (pcicvalue == PCIM_H_IA_FIX) || (pcicvalue == PCIM_H_IA_RR))) { - pr_err(KERN_ERR "PCI init error!!!\n"); + pr_err("PCI init error!!!\n"); /* Not in Host Mode, return ERROR */ return -1; } diff --git a/arch/mips/pnx8550/common/reset.c b/arch/mips/pnx8550/common/reset.c index fadd8744a6b..e7a12ff304b 100644 --- a/arch/mips/pnx8550/common/reset.c +++ b/arch/mips/pnx8550/common/reset.c @@ -22,29 +22,19 @@ */ #include <linux/kernel.h> +#include <asm/processor.h> #include <asm/reboot.h> #include <glb.h> void pnx8550_machine_restart(char *command) { - char head[] = "************* Machine restart *************"; - char foot[] = "*******************************************"; - - printk("\n\n"); - printk("%s\n", head); - if (command != NULL) - printk("* %s\n", command); - printk("%s\n", foot); - PNX8550_RST_CTL = PNX8550_RST_DO_SW_RST; } void pnx8550_machine_halt(void) { - printk("*** Machine halt. (Not implemented) ***\n"); -} - -void pnx8550_machine_power_off(void) -{ - printk("*** Machine power off. (Not implemented) ***\n"); + while (1) { + if (cpu_wait) + cpu_wait(); + } } diff --git a/arch/mips/pnx8550/common/setup.c b/arch/mips/pnx8550/common/setup.c index 64246c9c875..43cb3945fdb 100644 --- a/arch/mips/pnx8550/common/setup.c +++ b/arch/mips/pnx8550/common/setup.c @@ -44,7 +44,6 @@ extern void __init board_setup(void); extern void pnx8550_machine_restart(char *); extern void pnx8550_machine_halt(void); -extern void pnx8550_machine_power_off(void); extern struct resource ioport_resource; extern struct resource iomem_resource; extern char *prom_getcmdline(void); @@ -100,7 +99,7 @@ void __init plat_mem_setup(void) _machine_restart = pnx8550_machine_restart; _machine_halt = pnx8550_machine_halt; - pm_power_off = pnx8550_machine_power_off; + pm_power_off = pnx8550_machine_halt; /* Clear the Global 2 Register, PCI Inta Output Enable Registers Bit 1:Enable DAC Powerdown diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig index 444b9f918fd..7c2a2f7f8dc 100644 --- a/arch/mn10300/Kconfig +++ b/arch/mn10300/Kconfig @@ -8,7 +8,6 @@ mainmenu "Linux Kernel Configuration" config MN10300 def_bool y select HAVE_OPROFILE - select HAVE_ARCH_TRACEHOOK config AM33 def_bool y diff --git a/arch/mn10300/Kconfig.debug b/arch/mn10300/Kconfig.debug index ff80e86b9bd..ce83c74b3fd 100644 --- a/arch/mn10300/Kconfig.debug +++ b/arch/mn10300/Kconfig.debug @@ -101,7 +101,7 @@ config GDBSTUB_DEBUG_BREAKPOINT choice prompt "GDB stub port" - default GDBSTUB_TTYSM0 + default GDBSTUB_ON_TTYSM0 depends on GDBSTUB help Select the serial port used for GDB-stub. diff --git a/arch/mn10300/include/asm/bitops.h b/arch/mn10300/include/asm/bitops.h index f49ac49e09a..3f50e966107 100644 --- a/arch/mn10300/include/asm/bitops.h +++ b/arch/mn10300/include/asm/bitops.h @@ -229,9 +229,9 @@ int ffs(int x) #include <asm-generic/bitops/hweight.h> #define ext2_set_bit_atomic(lock, nr, addr) \ - test_and_set_bit((nr) ^ 0x18, (addr)) + test_and_set_bit((nr), (addr)) #define ext2_clear_bit_atomic(lock, nr, addr) \ - test_and_clear_bit((nr) ^ 0x18, (addr)) + test_and_clear_bit((nr), (addr)) #include <asm-generic/bitops/ext2-non-atomic.h> #include <asm-generic/bitops/minix-le.h> diff --git a/arch/mn10300/include/asm/signal.h b/arch/mn10300/include/asm/signal.h index 7e891fce237..1865d72a86f 100644 --- a/arch/mn10300/include/asm/signal.h +++ b/arch/mn10300/include/asm/signal.h @@ -78,7 +78,7 @@ typedef unsigned long sigset_t; /* These should not be considered constants from userland. */ #define SIGRTMIN 32 -#define SIGRTMAX (_NSIG-1) +#define SIGRTMAX _NSIG /* * SA_FLAGS values: diff --git a/arch/mn10300/kernel/module.c b/arch/mn10300/kernel/module.c index 6aea7fd7699..196a111e2e2 100644 --- a/arch/mn10300/kernel/module.c +++ b/arch/mn10300/kernel/module.c @@ -206,7 +206,7 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - return module_bug_finalize(hdr, sechdrs, me); + return 0; } /* @@ -214,5 +214,4 @@ int module_finalize(const Elf_Ehdr *hdr, */ void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c index 717db14c2cc..d4de05ab786 100644 --- a/arch/mn10300/kernel/signal.c +++ b/arch/mn10300/kernel/signal.c @@ -65,10 +65,10 @@ asmlinkage long sys_sigaction(int sig, old_sigset_t mask; if (verify_area(VERIFY_READ, act, sizeof(*act)) || __get_user(new_ka.sa.sa_handler, &act->sa_handler) || - __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) + __get_user(new_ka.sa.sa_restorer, &act->sa_restorer) || + __get_user(new_ka.sa.sa_flags, &act->sa_flags) || + __get_user(mask, &act->sa_mask)) return -EFAULT; - __get_user(new_ka.sa.sa_flags, &act->sa_flags); - __get_user(mask, &act->sa_mask); siginitset(&new_ka.sa.sa_mask, mask); } @@ -77,10 +77,10 @@ asmlinkage long sys_sigaction(int sig, if (!ret && oact) { if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) || __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || - __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) + __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer) || + __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) return -EFAULT; - __put_user(old_ka.sa.sa_flags, &oact->sa_flags); - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); } return ret; @@ -102,6 +102,9 @@ static int restore_sigcontext(struct pt_regs *regs, { unsigned int err = 0; + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + if (is_using_fpu(current)) fpu_kill_state(current); @@ -330,8 +333,6 @@ static int setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, regs->d0 = sig; regs->d1 = (unsigned long) &frame->sc; - set_fs(USER_DS); - /* the tracer may want to single-step inside the handler */ if (test_thread_flag(TIF_SINGLESTEP)) ptrace_notify(SIGTRAP); @@ -345,7 +346,7 @@ static int setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, return 0; give_sigsegv: - force_sig(SIGSEGV, current); + force_sigsegv(sig, current); return -EFAULT; } @@ -413,8 +414,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs->d0 = sig; regs->d1 = (long) &frame->info; - set_fs(USER_DS); - /* the tracer may want to single-step inside the handler */ if (test_thread_flag(TIF_SINGLESTEP)) ptrace_notify(SIGTRAP); @@ -428,10 +427,16 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, return 0; give_sigsegv: - force_sig(SIGSEGV, current); + force_sigsegv(sig, current); return -EFAULT; } +static inline void stepback(struct pt_regs *regs) +{ + regs->pc -= 2; + regs->orig_d0 = -1; +} + /* * handle the actual delivery of a signal to userspace */ @@ -459,7 +464,7 @@ static int handle_signal(int sig, /* fallthrough */ case -ERESTARTNOINTR: regs->d0 = regs->orig_d0; - regs->pc -= 2; + stepback(regs); } } @@ -527,12 +532,12 @@ static void do_signal(struct pt_regs *regs) case -ERESTARTSYS: case -ERESTARTNOINTR: regs->d0 = regs->orig_d0; - regs->pc -= 2; + stepback(regs); break; case -ERESTART_RESTARTBLOCK: regs->d0 = __NR_restart_syscall; - regs->pc -= 2; + stepback(regs); break; } } diff --git a/arch/mn10300/mm/Makefile b/arch/mn10300/mm/Makefile index 28b9d983db0..1557277fbc5 100644 --- a/arch/mn10300/mm/Makefile +++ b/arch/mn10300/mm/Makefile @@ -2,13 +2,11 @@ # Makefile for the MN10300-specific memory management code # +cacheflush-y := cache.o cache-mn10300.o +cacheflush-$(CONFIG_MN10300_CACHE_WBACK) += cache-flush-mn10300.o + +cacheflush-$(CONFIG_MN10300_CACHE_DISABLED) := cache-disabled.o + obj-y := \ init.o fault.o pgtable.o extable.o tlb-mn10300.o mmu-context.o \ - misalignment.o dma-alloc.o - -ifneq ($(CONFIG_MN10300_CACHE_DISABLED),y) -obj-y += cache.o cache-mn10300.o -ifeq ($(CONFIG_MN10300_CACHE_WBACK),y) -obj-y += cache-flush-mn10300.o -endif -endif + misalignment.o dma-alloc.o $(cacheflush-y) diff --git a/arch/frv/lib/perf_event.c b/arch/mn10300/mm/cache-disabled.c index 9ac5acfd2e9..f669ea42aba 100644 --- a/arch/frv/lib/perf_event.c +++ b/arch/mn10300/mm/cache-disabled.c @@ -1,6 +1,6 @@ -/* Performance event handling +/* Handle the cache being disabled * - * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2010 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -8,12 +8,14 @@ * as published by the Free Software Foundation; either version * 2 of the Licence, or (at your option) any later version. */ - -#include <linux/perf_event.h> +#include <linux/mm.h> /* - * mark the performance event as pending + * allow userspace to flush the instruction cache */ -void set_perf_event_pending(void) +asmlinkage long sys_cacheflush(unsigned long start, unsigned long end) { + if (end < start) + return -EINVAL; + return 0; } diff --git a/arch/mn10300/mm/cache.c b/arch/mn10300/mm/cache.c index 1b76719ec1c..9261217e8d2 100644 --- a/arch/mn10300/mm/cache.c +++ b/arch/mn10300/mm/cache.c @@ -54,13 +54,30 @@ EXPORT_SYMBOL(flush_icache_page); void flush_icache_range(unsigned long start, unsigned long end) { #ifdef CONFIG_MN10300_CACHE_WBACK - unsigned long addr, size, off; + unsigned long addr, size, base, off; struct page *page; pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *ppte, pte; + if (end > 0x80000000UL) { + /* addresses above 0xa0000000 do not go through the cache */ + if (end > 0xa0000000UL) { + end = 0xa0000000UL; + if (start >= end) + return; + } + + /* kernel addresses between 0x80000000 and 0x9fffffff do not + * require page tables, so we just map such addresses directly */ + base = (start >= 0x80000000UL) ? start : 0x80000000UL; + mn10300_dcache_flush_range(base, end); + if (base == start) + goto invalidate; + end = base; + } + for (; start < end; start += size) { /* work out how much of the page to flush */ off = start & (PAGE_SIZE - 1); @@ -104,6 +121,7 @@ void flush_icache_range(unsigned long start, unsigned long end) } #endif +invalidate: mn10300_icache_inv(); } EXPORT_SYMBOL(flush_icache_range); diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 907417d187e..79a04a9394d 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -16,6 +16,7 @@ config PARISC select RTC_DRV_GENERIC select INIT_ALL_POSSIBLE select BUG + select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select GENERIC_ATOMIC64 if !64BIT help diff --git a/arch/parisc/include/asm/perf_event.h b/arch/parisc/include/asm/perf_event.h index cc146427d8f..1e0fd8ba6c0 100644 --- a/arch/parisc/include/asm/perf_event.h +++ b/arch/parisc/include/asm/perf_event.h @@ -1,7 +1,6 @@ #ifndef __ASM_PARISC_PERF_EVENT_H #define __ASM_PARISC_PERF_EVENT_H -/* parisc only supports software events through this interface. */ -static inline void set_perf_event_pending(void) { } +/* Empty, just to avoid compiling error */ #endif /* __ASM_PARISC_PERF_EVENT_H */ diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index 159a2b81e90..6e81bb596e5 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -941,11 +941,10 @@ int module_finalize(const Elf_Ehdr *hdr, nsyms = newptr - (Elf_Sym *)symhdr->sh_addr; DEBUGP("NEW num_symtab %lu\n", nsyms); symhdr->sh_size = nsyms * sizeof(Elf_Sym); - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { deregister_unwind_table(mod); - module_bug_cleanup(mod); } diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 631e5a0fb6a..4b1e521d966 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -138,6 +138,7 @@ config PPC select HAVE_OPROFILE select HAVE_SYSCALL_WRAPPERS if PPC64 select GENERIC_ATOMIC64 if PPC32 + select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64 diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 1ff6662f7fa..9b287fdd8ea 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -129,7 +129,7 @@ struct paca_struct { u8 soft_enabled; /* irq soft-enable flag */ u8 hard_enabled; /* set if irqs are enabled in MSR */ u8 io_sync; /* writel() needs spin_unlock sync */ - u8 perf_event_pending; /* PM interrupt while soft-disabled */ + u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */ /* Stuff for accurate time accounting */ u64 user_time; /* accumulated usermode TB ticks */ diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h index 6c294acac84..9c3d160670b 100644 --- a/arch/powerpc/include/asm/system.h +++ b/arch/powerpc/include/asm/system.h @@ -542,10 +542,6 @@ extern void reloc_got2(unsigned long); #define PTRRELOC(x) ((typeof(x)) add_reloc_offset((unsigned long)(x))) -#ifdef CONFIG_VIRT_CPU_ACCOUNTING -extern void account_system_vtime(struct task_struct *); -#endif - extern struct dentry *powerpc_debugfs_root; #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 477c663e014..49cee9df225 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -63,11 +63,6 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { const Elf_Shdr *sect; - int err; - - err = module_bug_finalize(hdr, sechdrs, me); - if (err) - return err; /* Apply feature fixups */ sect = find_section(hdr, sechdrs, "__ftr_fixup"); @@ -101,5 +96,4 @@ int module_finalize(const Elf_Ehdr *hdr, void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c index 95ad9dad298..d05ae4204bb 100644 --- a/arch/powerpc/kernel/perf_callchain.c +++ b/arch/powerpc/kernel/perf_callchain.c @@ -23,18 +23,6 @@ #include "ppc32.h" #endif -/* - * Store another value in a callchain_entry. - */ -static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - unsigned int nr = entry->nr; - - if (nr < PERF_MAX_STACK_DEPTH) { - entry->ip[nr] = ip; - entry->nr = nr + 1; - } -} /* * Is sp valid as the address of the next kernel stack frame after prev_sp? @@ -58,8 +46,8 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp) return 0; } -static void perf_callchain_kernel(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { unsigned long sp, next_sp; unsigned long next_ip; @@ -69,8 +57,7 @@ static void perf_callchain_kernel(struct pt_regs *regs, lr = regs->link; sp = regs->gpr[1]; - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->nip); + perf_callchain_store(entry, regs->nip); if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) return; @@ -89,7 +76,7 @@ static void perf_callchain_kernel(struct pt_regs *regs, next_ip = regs->nip; lr = regs->link; level = 0; - callchain_store(entry, PERF_CONTEXT_KERNEL); + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); } else { if (level == 0) @@ -111,7 +98,7 @@ static void perf_callchain_kernel(struct pt_regs *regs, ++level; } - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); if (!valid_next_sp(next_sp, sp)) return; sp = next_sp; @@ -233,8 +220,8 @@ static int sane_signal_64_frame(unsigned long sp) puc == (unsigned long) &sf->uc; } -static void perf_callchain_user_64(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_64(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long sp, next_sp; unsigned long next_ip; @@ -246,8 +233,7 @@ static void perf_callchain_user_64(struct pt_regs *regs, next_ip = regs->nip; lr = regs->link; sp = regs->gpr[1]; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); for (;;) { fp = (unsigned long __user *) sp; @@ -276,14 +262,14 @@ static void perf_callchain_user_64(struct pt_regs *regs, read_user_stack_64(&uregs[PT_R1], &sp)) return; level = 0; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, next_ip); continue; } if (level == 0) next_ip = lr; - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); ++level; sp = next_sp; } @@ -315,8 +301,8 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) return __get_user_inatomic(*ret, ptr); } -static inline void perf_callchain_user_64(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static inline void perf_callchain_user_64(struct perf_callchain_entry *entry, + struct pt_regs *regs) { } @@ -435,8 +421,8 @@ static unsigned int __user *signal_frame_32_regs(unsigned int sp, return mctx->mc_gregs; } -static void perf_callchain_user_32(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_32(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned int sp, next_sp; unsigned int next_ip; @@ -447,8 +433,7 @@ static void perf_callchain_user_32(struct pt_regs *regs, next_ip = regs->nip; lr = regs->link; sp = regs->gpr[1]; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); while (entry->nr < PERF_MAX_STACK_DEPTH) { fp = (unsigned int __user *) (unsigned long) sp; @@ -470,45 +455,24 @@ static void perf_callchain_user_32(struct pt_regs *regs, read_user_stack_32(&uregs[PT_R1], &sp)) return; level = 0; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, next_ip); continue; } if (level == 0) next_ip = lr; - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); ++level; sp = next_sp; } } -/* - * Since we can't get PMU interrupts inside a PMU interrupt handler, - * we don't need separate irq and nmi entries here. - */ -static DEFINE_PER_CPU(struct perf_callchain_entry, cpu_perf_callchain); - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { - struct perf_callchain_entry *entry = &__get_cpu_var(cpu_perf_callchain); - - entry->nr = 0; - - if (!user_mode(regs)) { - perf_callchain_kernel(regs, entry); - if (current->mm) - regs = task_pt_regs(current); - else - regs = NULL; - } - - if (regs) { - if (current_is_64bit()) - perf_callchain_user_64(regs, entry); - else - perf_callchain_user_32(regs, entry); - } - - return entry; + if (current_is_64bit()) + perf_callchain_user_64(entry, regs); + else + perf_callchain_user_32(entry, regs); } diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index d301a30445e..3129c855933 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -402,6 +402,9 @@ static void power_pmu_read(struct perf_event *event) { s64 val, delta, prev; + if (event->hw.state & PERF_HES_STOPPED) + return; + if (!event->hw.idx) return; /* @@ -517,7 +520,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) * Disable all events to prevent PMU interrupts and to allow * events to be added or removed. */ -void hw_perf_disable(void) +static void power_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -565,7 +568,7 @@ void hw_perf_disable(void) * If we were previously disabled and events were added, then * put the new config on the PMU. */ -void hw_perf_enable(void) +static void power_pmu_enable(struct pmu *pmu) { struct perf_event *event; struct cpu_hw_events *cpuhw; @@ -672,6 +675,8 @@ void hw_perf_enable(void) } local64_set(&event->hw.prev_count, val); event->hw.idx = idx; + if (event->hw.state & PERF_HES_STOPPED) + val = 0; write_pmc(idx, val); perf_event_update_userpage(event); } @@ -727,7 +732,7 @@ static int collect_events(struct perf_event *group, int max_count, * re-enable the PMU in order to get hw_perf_enable to do the * actual work of reconfiguring the PMU. */ -static int power_pmu_enable(struct perf_event *event) +static int power_pmu_add(struct perf_event *event, int ef_flags) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -735,7 +740,7 @@ static int power_pmu_enable(struct perf_event *event) int ret = -EAGAIN; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); /* * Add the event to the list (if there is room) @@ -749,6 +754,9 @@ static int power_pmu_enable(struct perf_event *event) cpuhw->events[n0] = event->hw.config; cpuhw->flags[n0] = event->hw.event_base; + if (!(ef_flags & PERF_EF_START)) + event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + /* * If group events scheduling transaction was started, * skip the schedulability test here, it will be peformed @@ -769,7 +777,7 @@ nocheck: ret = 0; out: - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); return ret; } @@ -777,14 +785,14 @@ nocheck: /* * Remove a event from the PMU. */ -static void power_pmu_disable(struct perf_event *event) +static void power_pmu_del(struct perf_event *event, int ef_flags) { struct cpu_hw_events *cpuhw; long i; unsigned long flags; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); power_pmu_read(event); @@ -821,34 +829,60 @@ static void power_pmu_disable(struct perf_event *event) cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); } - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); } /* - * Re-enable interrupts on a event after they were throttled - * because they were coming too fast. + * POWER-PMU does not support disabling individual counters, hence + * program their cycle counter to their max value and ignore the interrupts. */ -static void power_pmu_unthrottle(struct perf_event *event) + +static void power_pmu_start(struct perf_event *event, int ef_flags) +{ + unsigned long flags; + s64 left; + + if (!event->hw.idx || !event->hw.sample_period) + return; + + if (!(event->hw.state & PERF_HES_STOPPED)) + return; + + if (ef_flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + + local_irq_save(flags); + perf_pmu_disable(event->pmu); + + event->hw.state = 0; + left = local64_read(&event->hw.period_left); + write_pmc(event->hw.idx, left); + + perf_event_update_userpage(event); + perf_pmu_enable(event->pmu); + local_irq_restore(flags); +} + +static void power_pmu_stop(struct perf_event *event, int ef_flags) { - s64 val, left; unsigned long flags; if (!event->hw.idx || !event->hw.sample_period) return; + + if (event->hw.state & PERF_HES_STOPPED) + return; + local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); + power_pmu_read(event); - left = event->hw.sample_period; - event->hw.last_period = left; - val = 0; - if (left < 0x80000000L) - val = 0x80000000L - left; - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); + event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + write_pmc(event->hw.idx, 0); + perf_event_update_userpage(event); - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); } @@ -857,10 +891,11 @@ static void power_pmu_unthrottle(struct perf_event *event) * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time */ -void power_pmu_start_txn(const struct pmu *pmu) +void power_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + perf_pmu_disable(pmu); cpuhw->group_flag |= PERF_EVENT_TXN; cpuhw->n_txn_start = cpuhw->n_events; } @@ -870,11 +905,12 @@ void power_pmu_start_txn(const struct pmu *pmu) * Clear the flag and pmu::enable() will perform the * schedulability test. */ -void power_pmu_cancel_txn(const struct pmu *pmu) +void power_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); cpuhw->group_flag &= ~PERF_EVENT_TXN; + perf_pmu_enable(pmu); } /* @@ -882,7 +918,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu) * Perform the group schedulability test as a whole * Return 0 if success */ -int power_pmu_commit_txn(const struct pmu *pmu) +int power_pmu_commit_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw; long i, n; @@ -901,19 +937,10 @@ int power_pmu_commit_txn(const struct pmu *pmu) cpuhw->event[i]->hw.config = cpuhw->events[i]; cpuhw->group_flag &= ~PERF_EVENT_TXN; + perf_pmu_enable(pmu); return 0; } -struct pmu power_pmu = { - .enable = power_pmu_enable, - .disable = power_pmu_disable, - .read = power_pmu_read, - .unthrottle = power_pmu_unthrottle, - .start_txn = power_pmu_start_txn, - .cancel_txn = power_pmu_cancel_txn, - .commit_txn = power_pmu_commit_txn, -}; - /* * Return 1 if we might be able to put event on a limited PMC, * or 0 if not. @@ -1014,7 +1041,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp) return 0; } -const struct pmu *hw_perf_event_init(struct perf_event *event) +static int power_pmu_event_init(struct perf_event *event) { u64 ev; unsigned long flags; @@ -1026,25 +1053,27 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) struct cpu_hw_events *cpuhw; if (!ppmu) - return ERR_PTR(-ENXIO); + return -ENOENT; + switch (event->attr.type) { case PERF_TYPE_HARDWARE: ev = event->attr.config; if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; ev = ppmu->generic_events[ev]; break; case PERF_TYPE_HW_CACHE: err = hw_perf_cache_event(event->attr.config, &ev); if (err) - return ERR_PTR(err); + return err; break; case PERF_TYPE_RAW: ev = event->attr.config; break; default: - return ERR_PTR(-EINVAL); + return -ENOENT; } + event->hw.config_base = ev; event->hw.idx = 0; @@ -1063,7 +1092,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) * XXX we should check if the task is an idle task. */ flags = 0; - if (event->ctx->task) + if (event->attach_state & PERF_ATTACH_TASK) flags |= PPMU_ONLY_COUNT_RUN; /* @@ -1081,7 +1110,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) */ ev = normal_pmc_alternative(ev, flags); if (!ev) - return ERR_PTR(-EINVAL); + return -EINVAL; } } @@ -1095,19 +1124,19 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) n = collect_events(event->group_leader, ppmu->n_counter - 1, ctrs, events, cflags); if (n < 0) - return ERR_PTR(-EINVAL); + return -EINVAL; } events[n] = ev; ctrs[n] = event; cflags[n] = flags; if (check_excludes(ctrs, cflags, n, 1)) - return ERR_PTR(-EINVAL); + return -EINVAL; cpuhw = &get_cpu_var(cpu_hw_events); err = power_check_constraints(cpuhw, events, cflags, n + 1); put_cpu_var(cpu_hw_events); if (err) - return ERR_PTR(-EINVAL); + return -EINVAL; event->hw.config = events[n]; event->hw.event_base = cflags[n]; @@ -1132,11 +1161,23 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) } event->destroy = hw_perf_event_destroy; - if (err) - return ERR_PTR(err); - return &power_pmu; + return err; } +struct pmu power_pmu = { + .pmu_enable = power_pmu_enable, + .pmu_disable = power_pmu_disable, + .event_init = power_pmu_event_init, + .add = power_pmu_add, + .del = power_pmu_del, + .start = power_pmu_start, + .stop = power_pmu_stop, + .read = power_pmu_read, + .start_txn = power_pmu_start_txn, + .cancel_txn = power_pmu_cancel_txn, + .commit_txn = power_pmu_commit_txn, +}; + /* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled @@ -1149,6 +1190,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, s64 prev, delta, left; int record = 0; + if (event->hw.state & PERF_HES_STOPPED) { + write_pmc(event->hw.idx, 0); + return; + } + /* we don't have to worry about interrupts here */ prev = local64_read(&event->hw.prev_count); delta = (val - prev) & 0xfffffffful; @@ -1171,6 +1217,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, val = 0x80000000LL - left; } + write_pmc(event->hw.idx, val); + local64_set(&event->hw.prev_count, val); + local64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); + /* * Finally record data if requested. */ @@ -1183,23 +1234,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (event->attr.sample_type & PERF_SAMPLE_ADDR) perf_get_data_addr(regs, &data.addr); - if (perf_event_overflow(event, nmi, &data, regs)) { - /* - * Interrupts are coming too fast - throttle them - * by setting the event to 0, so it will be - * at least 2^30 cycles until the next interrupt - * (assuming each event counts at most 2 counts - * per cycle). - */ - val = 0; - left = ~0ULL >> 1; - } + if (perf_event_overflow(event, nmi, &data, regs)) + power_pmu_stop(event, 0); } - - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); - perf_event_update_userpage(event); } /* @@ -1342,6 +1379,7 @@ int register_power_pmu(struct power_pmu *pmu) freeze_events_kernel = MMCR0_FCHV; #endif /* CONFIG_PPC64 */ + perf_pmu_register(&power_pmu); perf_cpu_notifier(power_pmu_notifier); return 0; diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index 1ba45471ae4..7ecca59ddf7 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -156,6 +156,9 @@ static void fsl_emb_pmu_read(struct perf_event *event) { s64 val, delta, prev; + if (event->hw.state & PERF_HES_STOPPED) + return; + /* * Performance monitor interrupts come even when interrupts * are soft-disabled, as long as interrupts are hard-enabled. @@ -177,7 +180,7 @@ static void fsl_emb_pmu_read(struct perf_event *event) * Disable all events to prevent PMU interrupts and to allow * events to be added or removed. */ -void hw_perf_disable(void) +static void fsl_emb_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -216,7 +219,7 @@ void hw_perf_disable(void) * If we were previously disabled and events were added, then * put the new config on the PMU. */ -void hw_perf_enable(void) +static void fsl_emb_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -262,8 +265,8 @@ static int collect_events(struct perf_event *group, int max_count, return n; } -/* perf must be disabled, context locked on entry */ -static int fsl_emb_pmu_enable(struct perf_event *event) +/* context locked on entry */ +static int fsl_emb_pmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuhw; int ret = -EAGAIN; @@ -271,6 +274,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event) u64 val; int i; + perf_pmu_disable(event->pmu); cpuhw = &get_cpu_var(cpu_hw_events); if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) @@ -301,6 +305,12 @@ static int fsl_emb_pmu_enable(struct perf_event *event) val = 0x80000000L - left; } local64_set(&event->hw.prev_count, val); + + if (!(flags & PERF_EF_START)) { + event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + val = 0; + } + write_pmc(i, val); perf_event_update_userpage(event); @@ -310,15 +320,17 @@ static int fsl_emb_pmu_enable(struct perf_event *event) ret = 0; out: put_cpu_var(cpu_hw_events); + perf_pmu_enable(event->pmu); return ret; } -/* perf must be disabled, context locked on entry */ -static void fsl_emb_pmu_disable(struct perf_event *event) +/* context locked on entry */ +static void fsl_emb_pmu_del(struct perf_event *event, int flags) { struct cpu_hw_events *cpuhw; int i = event->hw.idx; + perf_pmu_disable(event->pmu); if (i < 0) goto out; @@ -346,44 +358,57 @@ static void fsl_emb_pmu_disable(struct perf_event *event) cpuhw->n_events--; out: + perf_pmu_enable(event->pmu); put_cpu_var(cpu_hw_events); } -/* - * Re-enable interrupts on a event after they were throttled - * because they were coming too fast. - * - * Context is locked on entry, but perf is not disabled. - */ -static void fsl_emb_pmu_unthrottle(struct perf_event *event) +static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags) { - s64 val, left; unsigned long flags; + s64 left; if (event->hw.idx < 0 || !event->hw.sample_period) return; + + if (!(event->hw.state & PERF_HES_STOPPED)) + return; + + if (ef_flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + local_irq_save(flags); - perf_disable(); - fsl_emb_pmu_read(event); - left = event->hw.sample_period; - event->hw.last_period = left; - val = 0; - if (left < 0x80000000L) - val = 0x80000000L - left; - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); + perf_pmu_disable(event->pmu); + + event->hw.state = 0; + left = local64_read(&event->hw.period_left); + write_pmc(event->hw.idx, left); + perf_event_update_userpage(event); - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); } -static struct pmu fsl_emb_pmu = { - .enable = fsl_emb_pmu_enable, - .disable = fsl_emb_pmu_disable, - .read = fsl_emb_pmu_read, - .unthrottle = fsl_emb_pmu_unthrottle, -}; +static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags) +{ + unsigned long flags; + + if (event->hw.idx < 0 || !event->hw.sample_period) + return; + + if (event->hw.state & PERF_HES_STOPPED) + return; + + local_irq_save(flags); + perf_pmu_disable(event->pmu); + + fsl_emb_pmu_read(event); + event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + write_pmc(event->hw.idx, 0); + + perf_event_update_userpage(event); + perf_pmu_enable(event->pmu); + local_irq_restore(flags); +} /* * Release the PMU if this is the last perf_event. @@ -428,7 +453,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp) return 0; } -const struct pmu *hw_perf_event_init(struct perf_event *event) +static int fsl_emb_pmu_event_init(struct perf_event *event) { u64 ev; struct perf_event *events[MAX_HWEVENTS]; @@ -441,14 +466,14 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) case PERF_TYPE_HARDWARE: ev = event->attr.config; if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; ev = ppmu->generic_events[ev]; break; case PERF_TYPE_HW_CACHE: err = hw_perf_cache_event(event->attr.config, &ev); if (err) - return ERR_PTR(err); + return err; break; case PERF_TYPE_RAW: @@ -456,12 +481,12 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) break; default: - return ERR_PTR(-EINVAL); + return -ENOENT; } event->hw.config = ppmu->xlate_event(ev); if (!(event->hw.config & FSL_EMB_EVENT_VALID)) - return ERR_PTR(-EINVAL); + return -EINVAL; /* * If this is in a group, check if it can go on with all the @@ -473,7 +498,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) n = collect_events(event->group_leader, ppmu->n_counter - 1, events); if (n < 0) - return ERR_PTR(-EINVAL); + return -EINVAL; } if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) { @@ -484,7 +509,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) } if (num_restricted >= ppmu->n_restricted) - return ERR_PTR(-EINVAL); + return -EINVAL; } event->hw.idx = -1; @@ -497,7 +522,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) if (event->attr.exclude_kernel) event->hw.config_base |= PMLCA_FCS; if (event->attr.exclude_idle) - return ERR_PTR(-ENOTSUPP); + return -ENOTSUPP; event->hw.last_period = event->hw.sample_period; local64_set(&event->hw.period_left, event->hw.last_period); @@ -523,11 +548,20 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) } event->destroy = hw_perf_event_destroy; - if (err) - return ERR_PTR(err); - return &fsl_emb_pmu; + return err; } +static struct pmu fsl_emb_pmu = { + .pmu_enable = fsl_emb_pmu_enable, + .pmu_disable = fsl_emb_pmu_disable, + .event_init = fsl_emb_pmu_event_init, + .add = fsl_emb_pmu_add, + .del = fsl_emb_pmu_del, + .start = fsl_emb_pmu_start, + .stop = fsl_emb_pmu_stop, + .read = fsl_emb_pmu_read, +}; + /* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled @@ -540,6 +574,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, s64 prev, delta, left; int record = 0; + if (event->hw.state & PERF_HES_STOPPED) { + write_pmc(event->hw.idx, 0); + return; + } + /* we don't have to worry about interrupts here */ prev = local64_read(&event->hw.prev_count); delta = (val - prev) & 0xfffffffful; @@ -562,6 +601,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, val = 0x80000000LL - left; } + write_pmc(event->hw.idx, val); + local64_set(&event->hw.prev_count, val); + local64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); + /* * Finally record data if requested. */ @@ -571,23 +615,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val, perf_sample_data_init(&data, 0); data.period = event->hw.last_period; - if (perf_event_overflow(event, nmi, &data, regs)) { - /* - * Interrupts are coming too fast - throttle them - * by setting the event to 0, so it will be - * at least 2^30 cycles until the next interrupt - * (assuming each event counts at most 2 counts - * per cycle). - */ - val = 0; - left = ~0ULL >> 1; - } + if (perf_event_overflow(event, nmi, &data, regs)) + fsl_emb_pmu_stop(event, 0); } - - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); - perf_event_update_userpage(event); } static void perf_event_interrupt(struct pt_regs *regs) @@ -651,5 +681,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu) pr_info("%s performance monitor hardware support registered\n", pmu->name); + perf_pmu_register(&fsl_emb_pmu); + return 0; } diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 7109f5b1baa..2300426e531 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -138,6 +138,7 @@ static int do_signal_pending(sigset_t *oldset, struct pt_regs *regs) ti->local_flags &= ~_TLF_RESTORE_SIGMASK; sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); } + regs->trap = 0; return 0; /* no signals delivered */ } @@ -164,6 +165,7 @@ static int do_signal_pending(sigset_t *oldset, struct pt_regs *regs) ret = handle_rt_signal64(signr, &ka, &info, oldset, regs); } + regs->trap = 0; if (ret) { spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked, ¤t->blocked, diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 266610119f6..b96a3a010c2 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -511,6 +511,7 @@ static long restore_user_regs(struct pt_regs *regs, if (!sig) save_r2 = (unsigned int)regs->gpr[2]; err = restore_general_regs(regs, sr); + regs->trap = 0; err |= __get_user(msr, &sr->mc_gregs[PT_MSR]); if (!sig) regs->gpr[2] = (unsigned long) save_r2; @@ -884,7 +885,6 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, regs->nip = (unsigned long) ka->sa.sa_handler; /* enter the signal handler in big-endian mode */ regs->msr &= ~MSR_LE; - regs->trap = 0; return 1; badframe: @@ -1228,7 +1228,6 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, regs->nip = (unsigned long) ka->sa.sa_handler; /* enter the signal handler in big-endian mode */ regs->msr &= ~MSR_LE; - regs->trap = 0; return 1; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 2fe6fc64b61..27c4a4584f8 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -178,7 +178,7 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, err |= __get_user(regs->xer, &sc->gp_regs[PT_XER]); err |= __get_user(regs->ccr, &sc->gp_regs[PT_CCR]); /* skip SOFTE */ - err |= __get_user(regs->trap, &sc->gp_regs[PT_TRAP]); + regs->trap = 0; err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]); err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]); err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 8533b3b83f5..54888eb10c3 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -53,7 +53,7 @@ #include <linux/posix-timers.h> #include <linux/irq.h> #include <linux/delay.h> -#include <linux/perf_event.h> +#include <linux/irq_work.h> #include <asm/trace.h> #include <asm/io.h> @@ -493,60 +493,60 @@ void __init iSeries_time_init_early(void) } #endif /* CONFIG_PPC_ISERIES */ -#ifdef CONFIG_PERF_EVENTS +#ifdef CONFIG_IRQ_WORK /* * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... */ #ifdef CONFIG_PPC64 -static inline unsigned long test_perf_event_pending(void) +static inline unsigned long test_irq_work_pending(void) { unsigned long x; asm volatile("lbz %0,%1(13)" : "=r" (x) - : "i" (offsetof(struct paca_struct, perf_event_pending))); + : "i" (offsetof(struct paca_struct, irq_work_pending))); return x; } -static inline void set_perf_event_pending_flag(void) +static inline void set_irq_work_pending_flag(void) { asm volatile("stb %0,%1(13)" : : "r" (1), - "i" (offsetof(struct paca_struct, perf_event_pending))); + "i" (offsetof(struct paca_struct, irq_work_pending))); } -static inline void clear_perf_event_pending(void) +static inline void clear_irq_work_pending(void) { asm volatile("stb %0,%1(13)" : : "r" (0), - "i" (offsetof(struct paca_struct, perf_event_pending))); + "i" (offsetof(struct paca_struct, irq_work_pending))); } #else /* 32-bit */ -DEFINE_PER_CPU(u8, perf_event_pending); +DEFINE_PER_CPU(u8, irq_work_pending); -#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1 -#define test_perf_event_pending() __get_cpu_var(perf_event_pending) -#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 +#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1 +#define test_irq_work_pending() __get_cpu_var(irq_work_pending) +#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0 #endif /* 32 vs 64 bit */ -void set_perf_event_pending(void) +void set_irq_work_pending(void) { preempt_disable(); - set_perf_event_pending_flag(); + set_irq_work_pending_flag(); set_dec(1); preempt_enable(); } -#else /* CONFIG_PERF_EVENTS */ +#else /* CONFIG_IRQ_WORK */ -#define test_perf_event_pending() 0 -#define clear_perf_event_pending() +#define test_irq_work_pending() 0 +#define clear_irq_work_pending() -#endif /* CONFIG_PERF_EVENTS */ +#endif /* CONFIG_IRQ_WORK */ /* * For iSeries shared processors, we have to let the hypervisor @@ -587,9 +587,9 @@ void timer_interrupt(struct pt_regs * regs) calculate_steal_time(); - if (test_perf_event_pending()) { - clear_perf_event_pending(); - perf_event_do_pending(); + if (test_irq_work_pending()) { + clear_irq_work_pending(); + irq_work_run(); } #ifdef CONFIG_PPC_ISERIES diff --git a/arch/powerpc/platforms/512x/clock.c b/arch/powerpc/platforms/512x/clock.c index 5b243bd3eb3..3dc2a8d262b 100644 --- a/arch/powerpc/platforms/512x/clock.c +++ b/arch/powerpc/platforms/512x/clock.c @@ -57,7 +57,7 @@ static struct clk *mpc5121_clk_get(struct device *dev, const char *id) int id_match = 0; if (dev == NULL || id == NULL) - return NULL; + return clk; mutex_lock(&clocks_mutex); list_for_each_entry(p, &clocks, node) { diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c index 45c0cb9b67e..18c10482019 100644 --- a/arch/powerpc/platforms/52xx/efika.c +++ b/arch/powerpc/platforms/52xx/efika.c @@ -99,7 +99,7 @@ static void __init efika_pcisetup(void) if (bus_range == NULL || len < 2 * sizeof(int)) { printk(KERN_WARNING EFIKA_PLATFORM_NAME ": Can't get bus-range for %s\n", pcictrl->full_name); - return; + goto out_put; } if (bus_range[1] == bus_range[0]) @@ -111,12 +111,12 @@ static void __init efika_pcisetup(void) printk(" controlled by %s\n", pcictrl->full_name); printk("\n"); - hose = pcibios_alloc_controller(of_node_get(pcictrl)); + hose = pcibios_alloc_controller(pcictrl); if (!hose) { printk(KERN_WARNING EFIKA_PLATFORM_NAME ": Can't allocate PCI controller structure for %s\n", pcictrl->full_name); - return; + goto out_put; } hose->first_busno = bus_range[0]; @@ -124,6 +124,9 @@ static void __init efika_pcisetup(void) hose->ops = &rtas_pci_ops; pci_process_bridge_OF_ranges(hose, pcictrl, 0); + return; +out_put: + of_node_put(pcictrl); } #else diff --git a/arch/powerpc/platforms/52xx/mpc52xx_common.c b/arch/powerpc/platforms/52xx/mpc52xx_common.c index 6e905314ad5..41f3a7eda1d 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_common.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_common.c @@ -325,12 +325,16 @@ int mpc5200_psc_ac97_gpio_reset(int psc_number) clrbits32(&simple_gpio->simple_dvo, sync | out); clrbits8(&wkup_gpio->wkup_dvo, reset); - /* wait at lease 1 us */ - udelay(2); + /* wait for 1 us */ + udelay(1); /* Deassert reset */ setbits8(&wkup_gpio->wkup_dvo, reset); + /* wait at least 200ns */ + /* 7 ~= (200ns * timebase) / ns2sec */ + __delay(7); + /* Restore pin-muxing */ out_be32(&simple_gpio->port_config, mux); diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index f0777a47e3a..75976a14194 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -95,6 +95,7 @@ config S390 select HAVE_KVM if 64BIT select HAVE_ARCH_TRACEHOOK select INIT_ALL_POSSIBLE + select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select HAVE_KERNEL_GZIP select HAVE_KERNEL_BZIP2 @@ -198,6 +199,13 @@ config HOTPLUG_CPU can be controlled through /sys/devices/system/cpu/cpu#. Say N if you want to disable CPU hotplug. +config SCHED_BOOK + bool "Book scheduler support" + depends on SMP + help + Book scheduler support improves the CPU scheduler's decision making + when dealing with machines that have several books. + config MATHEMU bool "IEEE FPU emulation" depends on MARCH_G5 diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h index 498bc389238..881d94590ae 100644 --- a/arch/s390/include/asm/hardirq.h +++ b/arch/s390/include/asm/hardirq.h @@ -12,10 +12,6 @@ #ifndef __ASM_HARDIRQ_H #define __ASM_HARDIRQ_H -#include <linux/threads.h> -#include <linux/sched.h> -#include <linux/cache.h> -#include <linux/interrupt.h> #include <asm/lowcore.h> #define local_softirq_pending() (S390_lowcore.softirq_pending) diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 3840cbe7763..a75f168d271 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -4,7 +4,6 @@ * Copyright 2009 Martin Schwidefsky, IBM Corporation. */ -static inline void set_perf_event_pending(void) {} -static inline void clear_perf_event_pending(void) {} +/* Empty, just to avoid compiling error */ #define PERF_EVENT_INDEX_OFFSET 0 diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index cef66210c84..38ddd8a9a9e 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -97,7 +97,6 @@ static inline void restore_access_regs(unsigned int *acrs) extern void account_vtime(struct task_struct *, struct task_struct *); extern void account_tick_vtime(struct task_struct *); -extern void account_system_vtime(struct task_struct *); #ifdef CONFIG_PFAULT extern void pfault_irq_init(void); diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index 831bd033ea7..051107a2c5e 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -3,15 +3,32 @@ #include <linux/cpumask.h> -#define mc_capable() (1) - -const struct cpumask *cpu_coregroup_mask(unsigned int cpu); - extern unsigned char cpu_core_id[NR_CPUS]; extern cpumask_t cpu_core_map[NR_CPUS]; +static inline const struct cpumask *cpu_coregroup_mask(unsigned int cpu) +{ + return &cpu_core_map[cpu]; +} + #define topology_core_id(cpu) (cpu_core_id[cpu]) #define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) +#define mc_capable() (1) + +#ifdef CONFIG_SCHED_BOOK + +extern unsigned char cpu_book_id[NR_CPUS]; +extern cpumask_t cpu_book_map[NR_CPUS]; + +static inline const struct cpumask *cpu_book_mask(unsigned int cpu) +{ + return &cpu_book_map[cpu]; +} + +#define topology_book_id(cpu) (cpu_book_id[cpu]) +#define topology_book_cpumask(cpu) (&cpu_book_map[cpu]) + +#endif /* CONFIG_SCHED_BOOK */ int topology_set_cpu_management(int fc); void topology_schedule_update(void); @@ -30,6 +47,8 @@ static inline void s390_init_cpu_topology(void) }; #endif +#define SD_BOOK_INIT SD_CPU_INIT + #include <asm-generic/topology.h> #endif /* _ASM_S390_TOPOLOGY_H */ diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 22cfd634c35..f7167ee4604 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -407,10 +407,9 @@ int module_finalize(const Elf_Ehdr *hdr, { vfree(me->arch.syminfo); me->arch.syminfo = NULL; - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index bcef00766a6..13559c99384 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -57,8 +57,8 @@ struct tl_info { union tl_entry tle[0]; }; -struct core_info { - struct core_info *next; +struct mask_info { + struct mask_info *next; unsigned char id; cpumask_t mask; }; @@ -66,7 +66,6 @@ struct core_info { static int topology_enabled; static void topology_work_fn(struct work_struct *work); static struct tl_info *tl_info; -static struct core_info core_info; static int machine_has_topology; static struct timer_list topology_timer; static void set_topology_timer(void); @@ -74,38 +73,37 @@ static DECLARE_WORK(topology_work, topology_work_fn); /* topology_lock protects the core linked list */ static DEFINE_SPINLOCK(topology_lock); +static struct mask_info core_info; cpumask_t cpu_core_map[NR_CPUS]; unsigned char cpu_core_id[NR_CPUS]; -static cpumask_t cpu_coregroup_map(unsigned int cpu) +#ifdef CONFIG_SCHED_BOOK +static struct mask_info book_info; +cpumask_t cpu_book_map[NR_CPUS]; +unsigned char cpu_book_id[NR_CPUS]; +#endif + +static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) { - struct core_info *core = &core_info; - unsigned long flags; cpumask_t mask; cpus_clear(mask); if (!topology_enabled || !machine_has_topology) return cpu_possible_map; - spin_lock_irqsave(&topology_lock, flags); - while (core) { - if (cpu_isset(cpu, core->mask)) { - mask = core->mask; + while (info) { + if (cpu_isset(cpu, info->mask)) { + mask = info->mask; break; } - core = core->next; + info = info->next; } - spin_unlock_irqrestore(&topology_lock, flags); if (cpus_empty(mask)) mask = cpumask_of_cpu(cpu); return mask; } -const struct cpumask *cpu_coregroup_mask(unsigned int cpu) -{ - return &cpu_core_map[cpu]; -} - -static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core) +static void add_cpus_to_mask(struct tl_cpu *tl_cpu, struct mask_info *book, + struct mask_info *core) { unsigned int cpu; @@ -117,23 +115,35 @@ static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core) rcpu = CPU_BITS - 1 - cpu + tl_cpu->origin; for_each_present_cpu(lcpu) { - if (cpu_logical_map(lcpu) == rcpu) { - cpu_set(lcpu, core->mask); - cpu_core_id[lcpu] = core->id; - smp_cpu_polarization[lcpu] = tl_cpu->pp; - } + if (cpu_logical_map(lcpu) != rcpu) + continue; +#ifdef CONFIG_SCHED_BOOK + cpu_set(lcpu, book->mask); + cpu_book_id[lcpu] = book->id; +#endif + cpu_set(lcpu, core->mask); + cpu_core_id[lcpu] = core->id; + smp_cpu_polarization[lcpu] = tl_cpu->pp; } } } -static void clear_cores(void) +static void clear_masks(void) { - struct core_info *core = &core_info; + struct mask_info *info; - while (core) { - cpus_clear(core->mask); - core = core->next; + info = &core_info; + while (info) { + cpus_clear(info->mask); + info = info->next; + } +#ifdef CONFIG_SCHED_BOOK + info = &book_info; + while (info) { + cpus_clear(info->mask); + info = info->next; } +#endif } static union tl_entry *next_tle(union tl_entry *tle) @@ -146,29 +156,36 @@ static union tl_entry *next_tle(union tl_entry *tle) static void tl_to_cores(struct tl_info *info) { +#ifdef CONFIG_SCHED_BOOK + struct mask_info *book = &book_info; +#else + struct mask_info *book = NULL; +#endif + struct mask_info *core = &core_info; union tl_entry *tle, *end; - struct core_info *core = &core_info; + spin_lock_irq(&topology_lock); - clear_cores(); + clear_masks(); tle = info->tle; end = (union tl_entry *)((unsigned long)info + info->length); while (tle < end) { switch (tle->nl) { - case 5: - case 4: - case 3: +#ifdef CONFIG_SCHED_BOOK case 2: + book = book->next; + book->id = tle->container.id; break; +#endif case 1: core = core->next; core->id = tle->container.id; break; case 0: - add_cpus_to_core(&tle->cpu, core); + add_cpus_to_mask(&tle->cpu, book, core); break; default: - clear_cores(); + clear_masks(); machine_has_topology = 0; goto out; } @@ -221,10 +238,29 @@ int topology_set_cpu_management(int fc) static void update_cpu_core_map(void) { + unsigned long flags; int cpu; - for_each_possible_cpu(cpu) - cpu_core_map[cpu] = cpu_coregroup_map(cpu); + spin_lock_irqsave(&topology_lock, flags); + for_each_possible_cpu(cpu) { + cpu_core_map[cpu] = cpu_group_map(&core_info, cpu); +#ifdef CONFIG_SCHED_BOOK + cpu_book_map[cpu] = cpu_group_map(&book_info, cpu); +#endif + } + spin_unlock_irqrestore(&topology_lock, flags); +} + +static void store_topology(struct tl_info *info) +{ +#ifdef CONFIG_SCHED_BOOK + int rc; + + rc = stsi(info, 15, 1, 3); + if (rc != -ENOSYS) + return; +#endif + stsi(info, 15, 1, 2); } int arch_update_cpu_topology(void) @@ -238,7 +274,7 @@ int arch_update_cpu_topology(void) topology_update_polarization_simple(); return 0; } - stsi(info, 15, 1, 2); + store_topology(info); tl_to_cores(info); update_cpu_core_map(); for_each_online_cpu(cpu) { @@ -299,12 +335,24 @@ out: } __initcall(init_topology_update); +static void alloc_masks(struct tl_info *info, struct mask_info *mask, int offset) +{ + int i, nr_masks; + + nr_masks = info->mag[NR_MAG - offset]; + for (i = 0; i < info->mnest - offset; i++) + nr_masks *= info->mag[NR_MAG - offset - 1 - i]; + nr_masks = max(nr_masks, 1); + for (i = 0; i < nr_masks; i++) { + mask->next = alloc_bootmem(sizeof(struct mask_info)); + mask = mask->next; + } +} + void __init s390_init_cpu_topology(void) { unsigned long long facility_bits; struct tl_info *info; - struct core_info *core; - int nr_cores; int i; if (stfle(&facility_bits, 1) <= 0) @@ -315,25 +363,13 @@ void __init s390_init_cpu_topology(void) tl_info = alloc_bootmem_pages(PAGE_SIZE); info = tl_info; - stsi(info, 15, 1, 2); - - nr_cores = info->mag[NR_MAG - 2]; - for (i = 0; i < info->mnest - 2; i++) - nr_cores *= info->mag[NR_MAG - 3 - i]; - + store_topology(info); pr_info("The CPU configuration topology of the machine is:"); for (i = 0; i < NR_MAG; i++) printk(" %d", info->mag[i]); printk(" / %d\n", info->mnest); - - core = &core_info; - for (i = 0; i < nr_cores; i++) { - core->next = alloc_bootmem(sizeof(struct core_info)); - core = core->next; - if (!core) - goto error; - } - return; -error: - machine_has_topology = 0; + alloc_masks(info, &core_info, 2); +#ifdef CONFIG_SCHED_BOOK + alloc_masks(info, &book_info, 3); +#endif } diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 33990fa95af..35b6879628a 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -16,6 +16,7 @@ config SUPERH select HAVE_ARCH_TRACEHOOK select HAVE_DMA_API_DEBUG select HAVE_DMA_ATTRS + select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select PERF_USE_VMALLOC select HAVE_KERNEL_GZIP @@ -249,6 +250,11 @@ config ARCH_SHMOBILE select PM select PM_RUNTIME +config CPU_HAS_PMU + depends on CPU_SH4 || CPU_SH4A + default y + bool + if SUPERH32 choice @@ -738,6 +744,14 @@ config GUSA_RB LLSC, this should be more efficient than the other alternative of disabling interrupts around the atomic sequence. +config HW_PERF_EVENTS + bool "Enable hardware performance counter support for perf events" + depends on PERF_EVENTS && CPU_HAS_PMU + default y + help + Enable hardware performance counter support for perf events. If + disabled, perf events will use software events only. + source "drivers/sh/Kconfig" endmenu diff --git a/arch/sh/include/asm/perf_event.h b/arch/sh/include/asm/perf_event.h index 3d0c9f36d15..14308bed7ea 100644 --- a/arch/sh/include/asm/perf_event.h +++ b/arch/sh/include/asm/perf_event.h @@ -26,11 +26,4 @@ extern int register_sh_pmu(struct sh_pmu *); extern int reserve_pmc_hardware(void); extern void release_pmc_hardware(void); -static inline void set_perf_event_pending(void) -{ - /* Nothing to see here, move along. */ -} - -#define PERF_EVENT_INDEX_OFFSET 0 - #endif /* __ASM_SH_PERF_EVENT_H */ diff --git a/arch/sh/kernel/module.c b/arch/sh/kernel/module.c index 43adddfe4c0..ae0be697a89 100644 --- a/arch/sh/kernel/module.c +++ b/arch/sh/kernel/module.c @@ -149,13 +149,11 @@ int module_finalize(const Elf_Ehdr *hdr, int ret = 0; ret |= module_dwarf_finalize(hdr, sechdrs, me); - ret |= module_bug_finalize(hdr, sechdrs, me); return ret; } void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); module_dwarf_cleanup(mod); } diff --git a/arch/sh/kernel/perf_callchain.c b/arch/sh/kernel/perf_callchain.c index a9dd3abde28..d5ca1ef50fa 100644 --- a/arch/sh/kernel/perf_callchain.c +++ b/arch/sh/kernel/perf_callchain.c @@ -14,11 +14,6 @@ #include <asm/unwinder.h> #include <asm/ptrace.h> -static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} static void callchain_warning(void *data, char *msg) { @@ -39,7 +34,7 @@ static void callchain_address(void *data, unsigned long addr, int reliable) struct perf_callchain_entry *entry = data; if (reliable) - callchain_store(entry, addr); + perf_callchain_store(entry, addr); } static const struct stacktrace_ops callchain_ops = { @@ -49,47 +44,10 @@ static const struct stacktrace_ops callchain_ops = { .address = callchain_address, }; -static void -perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->pc); + perf_callchain_store(entry, regs->pc); unwind_stack(NULL, regs, NULL, &callchain_ops, entry); } - -static void -perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) -{ - int is_user; - - if (!regs) - return; - - is_user = user_mode(regs); - - if (is_user && current->state != TASK_RUNNING) - return; - - /* - * Only the kernel side is implemented for now. - */ - if (!is_user) - perf_callchain_kernel(regs, entry); -} - -/* - * No need for separate IRQ and NMI entries. - */ -static DEFINE_PER_CPU(struct perf_callchain_entry, callchain); - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) -{ - struct perf_callchain_entry *entry = &__get_cpu_var(callchain); - - entry->nr = 0; - - perf_do_callchain(regs, entry); - - return entry; -} diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 7a3dc356725..5a4b3343565 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c @@ -59,6 +59,24 @@ static inline int sh_pmu_initialized(void) return !!sh_pmu; } +const char *perf_pmu_name(void) +{ + if (!sh_pmu) + return NULL; + + return sh_pmu->name; +} +EXPORT_SYMBOL_GPL(perf_pmu_name); + +int perf_num_counters(void) +{ + if (!sh_pmu) + return 0; + + return sh_pmu->num_events; +} +EXPORT_SYMBOL_GPL(perf_num_counters); + /* * Release the PMU if this is the last perf_event. */ @@ -206,50 +224,80 @@ again: local64_add(delta, &event->count); } -static void sh_pmu_disable(struct perf_event *event) +static void sh_pmu_stop(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; - clear_bit(idx, cpuc->active_mask); - sh_pmu->disable(hwc, idx); + if (!(event->hw.state & PERF_HES_STOPPED)) { + sh_pmu->disable(hwc, idx); + cpuc->events[idx] = NULL; + event->hw.state |= PERF_HES_STOPPED; + } + + if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) { + sh_perf_event_update(event, &event->hw, idx); + event->hw.state |= PERF_HES_UPTODATE; + } +} + +static void sh_pmu_start(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (WARN_ON_ONCE(idx == -1)) + return; + + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); - barrier(); + cpuc->events[idx] = event; + event->hw.state = 0; + sh_pmu->enable(hwc, idx); +} - sh_perf_event_update(event, &event->hw, idx); +static void sh_pmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - cpuc->events[idx] = NULL; - clear_bit(idx, cpuc->used_mask); + sh_pmu_stop(event, PERF_EF_UPDATE); + __clear_bit(event->hw.idx, cpuc->used_mask); perf_event_update_userpage(event); } -static int sh_pmu_enable(struct perf_event *event) +static int sh_pmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; + int ret = -EAGAIN; + + perf_pmu_disable(event->pmu); - if (test_and_set_bit(idx, cpuc->used_mask)) { + if (__test_and_set_bit(idx, cpuc->used_mask)) { idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events); if (idx == sh_pmu->num_events) - return -EAGAIN; + goto out; - set_bit(idx, cpuc->used_mask); + __set_bit(idx, cpuc->used_mask); hwc->idx = idx; } sh_pmu->disable(hwc, idx); - cpuc->events[idx] = event; - set_bit(idx, cpuc->active_mask); - - sh_pmu->enable(hwc, idx); + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (flags & PERF_EF_START) + sh_pmu_start(event, PERF_EF_RELOAD); perf_event_update_userpage(event); - - return 0; + ret = 0; +out: + perf_pmu_enable(event->pmu); + return ret; } static void sh_pmu_read(struct perf_event *event) @@ -257,24 +305,56 @@ static void sh_pmu_read(struct perf_event *event) sh_perf_event_update(event, &event->hw, event->hw.idx); } -static const struct pmu pmu = { - .enable = sh_pmu_enable, - .disable = sh_pmu_disable, - .read = sh_pmu_read, -}; - -const struct pmu *hw_perf_event_init(struct perf_event *event) +static int sh_pmu_event_init(struct perf_event *event) { - int err = __hw_perf_event_init(event); + int err; + + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HW_CACHE: + case PERF_TYPE_HARDWARE: + err = __hw_perf_event_init(event); + break; + + default: + return -ENOENT; + } + if (unlikely(err)) { if (event->destroy) event->destroy(event); - return ERR_PTR(err); } - return &pmu; + return err; +} + +static void sh_pmu_enable(struct pmu *pmu) +{ + if (!sh_pmu_initialized()) + return; + + sh_pmu->enable_all(); +} + +static void sh_pmu_disable(struct pmu *pmu) +{ + if (!sh_pmu_initialized()) + return; + + sh_pmu->disable_all(); } +static struct pmu pmu = { + .pmu_enable = sh_pmu_enable, + .pmu_disable = sh_pmu_disable, + .event_init = sh_pmu_event_init, + .add = sh_pmu_add, + .del = sh_pmu_del, + .start = sh_pmu_start, + .stop = sh_pmu_stop, + .read = sh_pmu_read, +}; + static void sh_pmu_setup(int cpu) { struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); @@ -299,32 +379,17 @@ sh_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) return NOTIFY_OK; } -void hw_perf_enable(void) -{ - if (!sh_pmu_initialized()) - return; - - sh_pmu->enable_all(); -} - -void hw_perf_disable(void) -{ - if (!sh_pmu_initialized()) - return; - - sh_pmu->disable_all(); -} - -int __cpuinit register_sh_pmu(struct sh_pmu *pmu) +int __cpuinit register_sh_pmu(struct sh_pmu *_pmu) { if (sh_pmu) return -EBUSY; - sh_pmu = pmu; + sh_pmu = _pmu; - pr_info("Performance Events: %s support registered\n", pmu->name); + pr_info("Performance Events: %s support registered\n", _pmu->name); - WARN_ON(pmu->num_events > MAX_HWEVENTS); + WARN_ON(_pmu->num_events > MAX_HWEVENTS); + perf_pmu_register(&pmu); perf_cpu_notifier(sh_pmu_notifier); return 0; } diff --git a/arch/sh/oprofile/Makefile b/arch/sh/oprofile/Makefile index 4886c5c1786..e85aae73e3d 100644 --- a/arch/sh/oprofile/Makefile +++ b/arch/sh/oprofile/Makefile @@ -6,4 +6,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ oprofilefs.o oprofile_stats.o \ timer_int.o ) +ifeq ($(CONFIG_HW_PERF_EVENTS),y) +DRIVER_OBJS += $(addprefix ../../../drivers/oprofile/, oprofile_perf.o) +endif + oprofile-y := $(DRIVER_OBJS) common.o backtrace.o diff --git a/arch/sh/oprofile/common.c b/arch/sh/oprofile/common.c index ac604937f3e..e10d89376f9 100644 --- a/arch/sh/oprofile/common.c +++ b/arch/sh/oprofile/common.c @@ -17,114 +17,45 @@ #include <linux/init.h> #include <linux/errno.h> #include <linux/smp.h> +#include <linux/perf_event.h> #include <asm/processor.h> -#include "op_impl.h" - -static struct op_sh_model *model; - -static struct op_counter_config ctr[20]; +#ifdef CONFIG_HW_PERF_EVENTS extern void sh_backtrace(struct pt_regs * const regs, unsigned int depth); -static int op_sh_setup(void) -{ - /* Pre-compute the values to stuff in the hardware registers. */ - model->reg_setup(ctr); - - /* Configure the registers on all cpus. */ - on_each_cpu(model->cpu_setup, NULL, 1); - - return 0; -} - -static int op_sh_create_files(struct super_block *sb, struct dentry *root) +char *op_name_from_perf_id(void) { - int i, ret = 0; + const char *pmu; + char buf[20]; + int size; - for (i = 0; i < model->num_counters; i++) { - struct dentry *dir; - char buf[4]; + pmu = perf_pmu_name(); + if (!pmu) + return NULL; - snprintf(buf, sizeof(buf), "%d", i); - dir = oprofilefs_mkdir(sb, root, buf); + size = snprintf(buf, sizeof(buf), "sh/%s", pmu); + if (size > -1 && size < sizeof(buf)) + return buf; - ret |= oprofilefs_create_ulong(sb, dir, "enabled", &ctr[i].enabled); - ret |= oprofilefs_create_ulong(sb, dir, "event", &ctr[i].event); - ret |= oprofilefs_create_ulong(sb, dir, "kernel", &ctr[i].kernel); - ret |= oprofilefs_create_ulong(sb, dir, "user", &ctr[i].user); - - if (model->create_files) - ret |= model->create_files(sb, dir); - else - ret |= oprofilefs_create_ulong(sb, dir, "count", &ctr[i].count); - - /* Dummy entries */ - ret |= oprofilefs_create_ulong(sb, dir, "unit_mask", &ctr[i].unit_mask); - } - - return ret; + return NULL; } -static int op_sh_start(void) +int __init oprofile_arch_init(struct oprofile_operations *ops) { - /* Enable performance monitoring for all counters. */ - on_each_cpu(model->cpu_start, NULL, 1); + ops->backtrace = sh_backtrace; - return 0; + return oprofile_perf_init(ops); } -static void op_sh_stop(void) +void __exit oprofile_arch_exit(void) { - /* Disable performance monitoring for all counters. */ - on_each_cpu(model->cpu_stop, NULL, 1); + oprofile_perf_exit(); } - +#else int __init oprofile_arch_init(struct oprofile_operations *ops) { - struct op_sh_model *lmodel = NULL; - int ret; - - /* - * Always assign the backtrace op. If the counter initialization - * fails, we fall back to the timer which will still make use of - * this. - */ - ops->backtrace = sh_backtrace; - - /* - * XXX - * - * All of the SH7750/SH-4A counters have been converted to perf, - * this infrastructure hook is left for other users until they've - * had a chance to convert over, at which point all of this - * will be deleted. - */ - - if (!lmodel) - return -ENODEV; - if (!(current_cpu_data.flags & CPU_HAS_PERF_COUNTER)) - return -ENODEV; - - ret = lmodel->init(); - if (unlikely(ret != 0)) - return ret; - - model = lmodel; - - ops->setup = op_sh_setup; - ops->create_files = op_sh_create_files; - ops->start = op_sh_start; - ops->stop = op_sh_stop; - ops->cpu_type = lmodel->cpu_type; - - printk(KERN_INFO "oprofile: using %s performance monitoring.\n", - lmodel->cpu_type); - - return 0; -} - -void oprofile_arch_exit(void) -{ - if (model && model->exit) - model->exit(); + pr_info("oprofile: hardware counters not available\n"); + return -ENODEV; } +void __exit oprofile_arch_exit(void) {} +#endif /* CONFIG_HW_PERF_EVENTS */ diff --git a/arch/sh/oprofile/op_impl.h b/arch/sh/oprofile/op_impl.h deleted file mode 100644 index 1244479ceb2..00000000000 --- a/arch/sh/oprofile/op_impl.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef __OP_IMPL_H -#define __OP_IMPL_H - -/* Per-counter configuration as set via oprofilefs. */ -struct op_counter_config { - unsigned long enabled; - unsigned long event; - - unsigned long count; - - /* Dummy values for userspace tool compliance */ - unsigned long kernel; - unsigned long user; - unsigned long unit_mask; -}; - -/* Per-architecture configury and hooks. */ -struct op_sh_model { - void (*reg_setup)(struct op_counter_config *); - int (*create_files)(struct super_block *sb, struct dentry *dir); - void (*cpu_setup)(void *dummy); - int (*init)(void); - void (*exit)(void); - void (*cpu_start)(void *args); - void (*cpu_stop)(void *args); - char *cpu_type; - unsigned char num_counters; -}; - -/* arch/sh/oprofile/common.c */ -extern void sh_backtrace(struct pt_regs * const regs, unsigned int depth); - -#endif /* __OP_IMPL_H */ diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 491e9d6de19..3e9d31401fb 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -26,10 +26,12 @@ config SPARC select ARCH_WANT_OPTIONAL_GPIOLIB select RTC_CLASS select RTC_DRV_M48T59 + select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select PERF_USE_VMALLOC select HAVE_DMA_ATTRS select HAVE_DMA_API_DEBUG + select HAVE_ARCH_JUMP_LABEL config SPARC32 def_bool !64BIT @@ -53,6 +55,7 @@ config SPARC64 select RTC_DRV_BQ4802 select RTC_DRV_SUN4V select RTC_DRV_STARFIRE + select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select PERF_USE_VMALLOC diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h new file mode 100644 index 00000000000..62e66d7b2fb --- /dev/null +++ b/arch/sparc/include/asm/jump_label.h @@ -0,0 +1,32 @@ +#ifndef _ASM_SPARC_JUMP_LABEL_H +#define _ASM_SPARC_JUMP_LABEL_H + +#ifdef __KERNEL__ + +#include <linux/types.h> +#include <asm/system.h> + +#define JUMP_LABEL_NOP_SIZE 4 + +#define JUMP_LABEL(key, label) \ + do { \ + asm goto("1:\n\t" \ + "nop\n\t" \ + "nop\n\t" \ + ".pushsection __jump_table, \"a\"\n\t"\ + ".word 1b, %l[" #label "], %c0\n\t" \ + ".popsection \n\t" \ + : : "i" (key) : : label);\ + } while (0) + +#endif /* __KERNEL__ */ + +typedef u32 jump_label_t; + +struct jump_entry { + jump_label_t code; + jump_label_t target; + jump_label_t key; +}; + +#endif diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h index 727af70646c..6e8bfa1786d 100644 --- a/arch/sparc/include/asm/perf_event.h +++ b/arch/sparc/include/asm/perf_event.h @@ -1,10 +1,6 @@ #ifndef __ASM_SPARC_PERF_EVENT_H #define __ASM_SPARC_PERF_EVENT_H -extern void set_perf_event_pending(void); - -#define PERF_EVENT_INDEX_OFFSET 0 - #ifdef CONFIG_PERF_EVENTS #include <asm/ptrace.h> diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 0c2dc1f24a9..599398fbbc7 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -119,3 +119,5 @@ obj-$(CONFIG_COMPAT) += $(audit--y) pc--$(CONFIG_PERF_EVENTS) := perf_event.o obj-$(CONFIG_SPARC64) += $(pc--y) + +obj-$(CONFIG_SPARC64) += jump_label.o diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c new file mode 100644 index 00000000000..ea2dafc93d7 --- /dev/null +++ b/arch/sparc/kernel/jump_label.c @@ -0,0 +1,47 @@ +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/mutex.h> +#include <linux/cpu.h> + +#include <linux/jump_label.h> +#include <linux/memory.h> + +#ifdef HAVE_JUMP_LABEL + +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + u32 val; + u32 *insn = (u32 *) (unsigned long) entry->code; + + if (type == JUMP_LABEL_ENABLE) { + s32 off = (s32)entry->target - (s32)entry->code; + +#ifdef CONFIG_SPARC64 + /* ba,pt %xcc, . + (off << 2) */ + val = 0x10680000 | ((u32) off >> 2); +#else + /* ba . + (off << 2) */ + val = 0x10800000 | ((u32) off >> 2); +#endif + } else { + val = 0x01000000; + } + + get_online_cpus(); + mutex_lock(&text_mutex); + *insn = val; + flushi(insn); + mutex_unlock(&text_mutex); + put_online_cpus(); +} + +void arch_jump_label_text_poke_early(jump_label_t addr) +{ + u32 *insn_p = (u32 *) (unsigned long) addr; + + *insn_p = 0x01000000; + flushi(insn_p); +} + +#endif diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c index f848aadf54d..ee3c7dde8d9 100644 --- a/arch/sparc/kernel/module.c +++ b/arch/sparc/kernel/module.c @@ -18,6 +18,9 @@ #include <asm/spitfire.h> #ifdef CONFIG_SPARC64 + +#include <linux/jump_label.h> + static void *module_map(unsigned long size) { struct vm_struct *area; @@ -227,6 +230,9 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { + /* make jump label nops */ + jump_label_apply_nops(me); + /* Cheetah's I-cache is fully coherent. */ if (tlb_type == spitfire) { unsigned long va; diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c index c4a6a50b484..b87873c0e8e 100644 --- a/arch/sparc/kernel/pcr.c +++ b/arch/sparc/kernel/pcr.c @@ -7,7 +7,7 @@ #include <linux/init.h> #include <linux/irq.h> -#include <linux/perf_event.h> +#include <linux/irq_work.h> #include <linux/ftrace.h> #include <asm/pil.h> @@ -43,14 +43,14 @@ void __irq_entry deferred_pcr_work_irq(int irq, struct pt_regs *regs) old_regs = set_irq_regs(regs); irq_enter(); -#ifdef CONFIG_PERF_EVENTS - perf_event_do_pending(); +#ifdef CONFIG_IRQ_WORK + irq_work_run(); #endif irq_exit(); set_irq_regs(old_regs); } -void set_perf_event_pending(void) +void arch_irq_work_raise(void) { set_softint(1 << PIL_DEFERRED_PCR_WORK); } diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 357ced3c33f..0d6deb55a2a 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -658,13 +658,16 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr) enc = perf_event_get_enc(cpuc->events[i]); pcr &= ~mask_for_index(idx); - pcr |= event_encoding(enc, idx); + if (hwc->state & PERF_HES_STOPPED) + pcr |= nop_for_index(idx); + else + pcr |= event_encoding(enc, idx); } out: return pcr; } -void hw_perf_enable(void) +static void sparc_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); u64 pcr; @@ -691,7 +694,7 @@ void hw_perf_enable(void) pcr_ops->write(cpuc->pcr); } -void hw_perf_disable(void) +static void sparc_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); u64 val; @@ -710,19 +713,65 @@ void hw_perf_disable(void) pcr_ops->write(cpuc->pcr); } -static void sparc_pmu_disable(struct perf_event *event) +static int active_event_index(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + int i; + + for (i = 0; i < cpuc->n_events; i++) { + if (cpuc->event[i] == event) + break; + } + BUG_ON(i == cpuc->n_events); + return cpuc->current_idx[i]; +} + +static void sparc_pmu_start(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int idx = active_event_index(cpuc, event); + + if (flags & PERF_EF_RELOAD) { + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + sparc_perf_event_set_period(event, &event->hw, idx); + } + + event->hw.state = 0; + + sparc_pmu_enable_event(cpuc, &event->hw, idx); +} + +static void sparc_pmu_stop(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int idx = active_event_index(cpuc, event); + + if (!(event->hw.state & PERF_HES_STOPPED)) { + sparc_pmu_disable_event(cpuc, &event->hw, idx); + event->hw.state |= PERF_HES_STOPPED; + } + + if (!(event->hw.state & PERF_HES_UPTODATE) && (flags & PERF_EF_UPDATE)) { + sparc_perf_event_update(event, &event->hw, idx); + event->hw.state |= PERF_HES_UPTODATE; + } +} + +static void sparc_pmu_del(struct perf_event *event, int _flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; unsigned long flags; int i; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); for (i = 0; i < cpuc->n_events; i++) { if (event == cpuc->event[i]) { - int idx = cpuc->current_idx[i]; + /* Absorb the final count and turn off the + * event. + */ + sparc_pmu_stop(event, PERF_EF_UPDATE); /* Shift remaining entries down into * the existing slot. @@ -734,13 +783,6 @@ static void sparc_pmu_disable(struct perf_event *event) cpuc->current_idx[i]; } - /* Absorb the final count and turn off the - * event. - */ - sparc_pmu_disable_event(cpuc, hwc, idx); - barrier(); - sparc_perf_event_update(event, hwc, idx); - perf_event_update_userpage(event); cpuc->n_events--; @@ -748,23 +790,10 @@ static void sparc_pmu_disable(struct perf_event *event) } } - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); } -static int active_event_index(struct cpu_hw_events *cpuc, - struct perf_event *event) -{ - int i; - - for (i = 0; i < cpuc->n_events; i++) { - if (cpuc->event[i] == event) - break; - } - BUG_ON(i == cpuc->n_events); - return cpuc->current_idx[i]; -} - static void sparc_pmu_read(struct perf_event *event) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -774,15 +803,6 @@ static void sparc_pmu_read(struct perf_event *event) sparc_perf_event_update(event, hwc, idx); } -static void sparc_pmu_unthrottle(struct perf_event *event) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - int idx = active_event_index(cpuc, event); - struct hw_perf_event *hwc = &event->hw; - - sparc_pmu_enable_event(cpuc, hwc, idx); -} - static atomic_t active_events = ATOMIC_INIT(0); static DEFINE_MUTEX(pmc_grab_mutex); @@ -877,7 +897,7 @@ static int sparc_check_constraints(struct perf_event **evts, if (!n_ev) return 0; - if (n_ev > perf_max_events) + if (n_ev > MAX_HWEVENTS) return -1; msk0 = perf_event_get_msk(events[0]); @@ -984,23 +1004,27 @@ static int collect_events(struct perf_event *group, int max_count, return n; } -static int sparc_pmu_enable(struct perf_event *event) +static int sparc_pmu_add(struct perf_event *event, int ef_flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int n0, ret = -EAGAIN; unsigned long flags; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); n0 = cpuc->n_events; - if (n0 >= perf_max_events) + if (n0 >= MAX_HWEVENTS) goto out; cpuc->event[n0] = event; cpuc->events[n0] = event->hw.event_base; cpuc->current_idx[n0] = PIC_NO_INDEX; + event->hw.state = PERF_HES_UPTODATE; + if (!(ef_flags & PERF_EF_START)) + event->hw.state |= PERF_HES_STOPPED; + /* * If group events scheduling transaction was started, * skip the schedulability test here, it will be peformed @@ -1020,12 +1044,12 @@ nocheck: ret = 0; out: - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); return ret; } -static int __hw_perf_event_init(struct perf_event *event) +static int sparc_pmu_event_init(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; struct perf_event *evts[MAX_HWEVENTS]; @@ -1038,16 +1062,37 @@ static int __hw_perf_event_init(struct perf_event *event) if (atomic_read(&nmi_active) < 0) return -ENODEV; - if (attr->type == PERF_TYPE_HARDWARE) { + switch (attr->type) { + case PERF_TYPE_HARDWARE: if (attr->config >= sparc_pmu->max_events) return -EINVAL; pmap = sparc_pmu->event_map(attr->config); - } else if (attr->type == PERF_TYPE_HW_CACHE) { + break; + + case PERF_TYPE_HW_CACHE: pmap = sparc_map_cache_event(attr->config); if (IS_ERR(pmap)) return PTR_ERR(pmap); - } else - return -EOPNOTSUPP; + break; + + case PERF_TYPE_RAW: + pmap = NULL; + break; + + default: + return -ENOENT; + + } + + if (pmap) { + hwc->event_base = perf_event_encode(pmap); + } else { + /* + * User gives us "(encoding << 16) | pic_mask" for + * PERF_TYPE_RAW events. + */ + hwc->event_base = attr->config; + } /* We save the enable bits in the config_base. */ hwc->config_base = sparc_pmu->irq_bit; @@ -1058,12 +1103,10 @@ static int __hw_perf_event_init(struct perf_event *event) if (!attr->exclude_hv) hwc->config_base |= sparc_pmu->hv_bit; - hwc->event_base = perf_event_encode(pmap); - n = 0; if (event->group_leader != event) { n = collect_events(event->group_leader, - perf_max_events - 1, + MAX_HWEVENTS - 1, evts, events, current_idx_dmy); if (n < 0) return -EINVAL; @@ -1099,10 +1142,11 @@ static int __hw_perf_event_init(struct perf_event *event) * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time */ -static void sparc_pmu_start_txn(const struct pmu *pmu) +static void sparc_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + perf_pmu_disable(pmu); cpuhw->group_flag |= PERF_EVENT_TXN; } @@ -1111,11 +1155,12 @@ static void sparc_pmu_start_txn(const struct pmu *pmu) * Clear the flag and pmu::enable() will perform the * schedulability test. */ -static void sparc_pmu_cancel_txn(const struct pmu *pmu) +static void sparc_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); cpuhw->group_flag &= ~PERF_EVENT_TXN; + perf_pmu_enable(pmu); } /* @@ -1123,7 +1168,7 @@ static void sparc_pmu_cancel_txn(const struct pmu *pmu) * Perform the group schedulability test as a whole * Return 0 if success */ -static int sparc_pmu_commit_txn(const struct pmu *pmu) +static int sparc_pmu_commit_txn(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int n; @@ -1139,28 +1184,24 @@ static int sparc_pmu_commit_txn(const struct pmu *pmu) return -EAGAIN; cpuc->group_flag &= ~PERF_EVENT_TXN; + perf_pmu_enable(pmu); return 0; } -static const struct pmu pmu = { - .enable = sparc_pmu_enable, - .disable = sparc_pmu_disable, +static struct pmu pmu = { + .pmu_enable = sparc_pmu_enable, + .pmu_disable = sparc_pmu_disable, + .event_init = sparc_pmu_event_init, + .add = sparc_pmu_add, + .del = sparc_pmu_del, + .start = sparc_pmu_start, + .stop = sparc_pmu_stop, .read = sparc_pmu_read, - .unthrottle = sparc_pmu_unthrottle, .start_txn = sparc_pmu_start_txn, .cancel_txn = sparc_pmu_cancel_txn, .commit_txn = sparc_pmu_commit_txn, }; -const struct pmu *hw_perf_event_init(struct perf_event *event) -{ - int err = __hw_perf_event_init(event); - - if (err) - return ERR_PTR(err); - return &pmu; -} - void perf_event_print_debug(void) { unsigned long flags; @@ -1236,7 +1277,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, continue; if (perf_event_overflow(event, 1, &data, regs)) - sparc_pmu_disable_event(cpuc, hwc, idx); + sparc_pmu_stop(event, 0); } return NOTIFY_STOP; @@ -1277,28 +1318,21 @@ void __init init_hw_perf_events(void) pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); - /* All sparc64 PMUs currently have 2 events. */ - perf_max_events = 2; - + perf_pmu_register(&pmu); register_die_notifier(&perf_event_nmi_notifier); } -static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} - -static void perf_callchain_kernel(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long ksp, fp; #ifdef CONFIG_FUNCTION_GRAPH_TRACER int graph = 0; #endif - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->tpc); + stack_trace_flush(); + + perf_callchain_store(entry, regs->tpc); ksp = regs->u_regs[UREG_I6]; fp = ksp + STACK_BIAS; @@ -1322,13 +1356,13 @@ static void perf_callchain_kernel(struct pt_regs *regs, pc = sf->callers_pc; fp = (unsigned long)sf->fp + STACK_BIAS; } - callchain_store(entry, pc); + perf_callchain_store(entry, pc); #ifdef CONFIG_FUNCTION_GRAPH_TRACER if ((pc + 8UL) == (unsigned long) &return_to_handler) { int index = current->curr_ret_stack; if (current->ret_stack && index >= graph) { pc = current->ret_stack[index - graph].ret; - callchain_store(entry, pc); + perf_callchain_store(entry, pc); graph++; } } @@ -1336,13 +1370,12 @@ static void perf_callchain_kernel(struct pt_regs *regs, } while (entry->nr < PERF_MAX_STACK_DEPTH); } -static void perf_callchain_user_64(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_64(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long ufp; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, regs->tpc); + perf_callchain_store(entry, regs->tpc); ufp = regs->u_regs[UREG_I6] + STACK_BIAS; do { @@ -1355,17 +1388,16 @@ static void perf_callchain_user_64(struct pt_regs *regs, pc = sf.callers_pc; ufp = (unsigned long)sf.fp + STACK_BIAS; - callchain_store(entry, pc); + perf_callchain_store(entry, pc); } while (entry->nr < PERF_MAX_STACK_DEPTH); } -static void perf_callchain_user_32(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_32(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long ufp; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, regs->tpc); + perf_callchain_store(entry, regs->tpc); ufp = regs->u_regs[UREG_I6] & 0xffffffffUL; do { @@ -1378,34 +1410,16 @@ static void perf_callchain_user_32(struct pt_regs *regs, pc = sf.callers_pc; ufp = (unsigned long)sf.fp; - callchain_store(entry, pc); + perf_callchain_store(entry, pc); } while (entry->nr < PERF_MAX_STACK_DEPTH); } -/* Like powerpc we can't get PMU interrupts within the PMU handler, - * so no need for separate NMI and IRQ chains as on x86. - */ -static DEFINE_PER_CPU(struct perf_callchain_entry, callchain); - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { - struct perf_callchain_entry *entry = &__get_cpu_var(callchain); - - entry->nr = 0; - if (!user_mode(regs)) { - stack_trace_flush(); - perf_callchain_kernel(regs, entry); - if (current->mm) - regs = task_pt_regs(current); - else - regs = NULL; - } - if (regs) { - flushw_user(); - if (test_thread_flag(TIF_32BIT)) - perf_callchain_user_32(regs, entry); - else - perf_callchain_user_64(regs, entry); - } - return entry; + flushw_user(); + if (test_thread_flag(TIF_32BIT)) + perf_callchain_user_32(entry, regs); + else + perf_callchain_user_64(entry, regs); } diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index ea22cd373c6..75fad425e24 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -453,8 +453,66 @@ static int save_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) return err; } -static void setup_frame32(struct k_sigaction *ka, struct pt_regs *regs, - int signo, sigset_t *oldset) +/* The I-cache flush instruction only works in the primary ASI, which + * right now is the nucleus, aka. kernel space. + * + * Therefore we have to kick the instructions out using the kernel + * side linear mapping of the physical address backing the user + * instructions. + */ +static void flush_signal_insns(unsigned long address) +{ + unsigned long pstate, paddr; + pte_t *ptep, pte; + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp; + + /* Commit all stores of the instructions we are about to flush. */ + wmb(); + + /* Disable cross-call reception. In this way even a very wide + * munmap() on another cpu can't tear down the page table + * hierarchy from underneath us, since that can't complete + * until the IPI tlb flush returns. + */ + + __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); + __asm__ __volatile__("wrpr %0, %1, %%pstate" + : : "r" (pstate), "i" (PSTATE_IE)); + + pgdp = pgd_offset(current->mm, address); + if (pgd_none(*pgdp)) + goto out_irqs_on; + pudp = pud_offset(pgdp, address); + if (pud_none(*pudp)) + goto out_irqs_on; + pmdp = pmd_offset(pudp, address); + if (pmd_none(*pmdp)) + goto out_irqs_on; + + ptep = pte_offset_map(pmdp, address); + pte = *ptep; + if (!pte_present(pte)) + goto out_unmap; + + paddr = (unsigned long) page_address(pte_page(pte)); + + __asm__ __volatile__("flush %0 + %1" + : /* no outputs */ + : "r" (paddr), + "r" (address & (PAGE_SIZE - 1)) + : "memory"); + +out_unmap: + pte_unmap(ptep); +out_irqs_on: + __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate)); + +} + +static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs, + int signo, sigset_t *oldset) { struct signal_frame32 __user *sf; int sigframe_size; @@ -547,13 +605,7 @@ static void setup_frame32(struct k_sigaction *ka, struct pt_regs *regs, if (ka->ka_restorer) { regs->u_regs[UREG_I7] = (unsigned long)ka->ka_restorer; } else { - /* Flush instruction space. */ unsigned long address = ((unsigned long)&(sf->insns[0])); - pgd_t *pgdp = pgd_offset(current->mm, address); - pud_t *pudp = pud_offset(pgdp, address); - pmd_t *pmdp = pmd_offset(pudp, address); - pte_t *ptep; - pte_t pte; regs->u_regs[UREG_I7] = (unsigned long) (&(sf->insns[0]) - 2); @@ -562,34 +614,22 @@ static void setup_frame32(struct k_sigaction *ka, struct pt_regs *regs, if (err) goto sigsegv; - preempt_disable(); - ptep = pte_offset_map(pmdp, address); - pte = *ptep; - if (pte_present(pte)) { - unsigned long page = (unsigned long) - page_address(pte_page(pte)); - - wmb(); - __asm__ __volatile__("flush %0 + %1" - : /* no outputs */ - : "r" (page), - "r" (address & (PAGE_SIZE - 1)) - : "memory"); - } - pte_unmap(ptep); - preempt_enable(); + flush_signal_insns(address); } - return; + return 0; sigill: do_exit(SIGILL); + return -EINVAL; + sigsegv: force_sigsegv(signo, current); + return -EFAULT; } -static void setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs, - unsigned long signr, sigset_t *oldset, - siginfo_t *info) +static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs, + unsigned long signr, sigset_t *oldset, + siginfo_t *info) { struct rt_signal_frame32 __user *sf; int sigframe_size; @@ -687,12 +727,7 @@ static void setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs, if (ka->ka_restorer) regs->u_regs[UREG_I7] = (unsigned long)ka->ka_restorer; else { - /* Flush instruction space. */ unsigned long address = ((unsigned long)&(sf->insns[0])); - pgd_t *pgdp = pgd_offset(current->mm, address); - pud_t *pudp = pud_offset(pgdp, address); - pmd_t *pmdp = pmd_offset(pudp, address); - pte_t *ptep; regs->u_regs[UREG_I7] = (unsigned long) (&(sf->insns[0]) - 2); @@ -704,38 +739,32 @@ static void setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs, if (err) goto sigsegv; - preempt_disable(); - ptep = pte_offset_map(pmdp, address); - if (pte_present(*ptep)) { - unsigned long page = (unsigned long) - page_address(pte_page(*ptep)); - - wmb(); - __asm__ __volatile__("flush %0 + %1" - : /* no outputs */ - : "r" (page), - "r" (address & (PAGE_SIZE - 1)) - : "memory"); - } - pte_unmap(ptep); - preempt_enable(); + flush_signal_insns(address); } - return; + return 0; sigill: do_exit(SIGILL); + return -EINVAL; + sigsegv: force_sigsegv(signr, current); + return -EFAULT; } -static inline void handle_signal32(unsigned long signr, struct k_sigaction *ka, - siginfo_t *info, - sigset_t *oldset, struct pt_regs *regs) +static inline int handle_signal32(unsigned long signr, struct k_sigaction *ka, + siginfo_t *info, + sigset_t *oldset, struct pt_regs *regs) { + int err; + if (ka->sa.sa_flags & SA_SIGINFO) - setup_rt_frame32(ka, regs, signr, oldset, info); + err = setup_rt_frame32(ka, regs, signr, oldset, info); else - setup_frame32(ka, regs, signr, oldset); + err = setup_frame32(ka, regs, signr, oldset); + + if (err) + return err; spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); @@ -743,6 +772,10 @@ static inline void handle_signal32(unsigned long signr, struct k_sigaction *ka, sigaddset(¤t->blocked,signr); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); + + tracehook_signal_handler(signr, info, ka, regs, 0); + + return 0; } static inline void syscall_restart32(unsigned long orig_i0, struct pt_regs *regs, @@ -789,16 +822,14 @@ void do_signal32(sigset_t *oldset, struct pt_regs * regs, if (signr > 0) { if (restart_syscall) syscall_restart32(orig_i0, regs, &ka.sa); - handle_signal32(signr, &ka, &info, oldset, regs); - - /* A signal was successfully delivered; the saved - * sigmask will have been stored in the signal frame, - * and will be restored by sigreturn, so we can simply - * clear the TS_RESTORE_SIGMASK flag. - */ - current_thread_info()->status &= ~TS_RESTORE_SIGMASK; - - tracehook_signal_handler(signr, &info, &ka, regs, 0); + if (handle_signal32(signr, &ka, &info, oldset, regs) == 0) { + /* A signal was successfully delivered; the saved + * sigmask will have been stored in the signal frame, + * and will be restored by sigreturn, so we can simply + * clear the TS_RESTORE_SIGMASK flag. + */ + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; + } return; } if (restart_syscall && @@ -809,12 +840,14 @@ void do_signal32(sigset_t *oldset, struct pt_regs * regs, regs->u_regs[UREG_I0] = orig_i0; regs->tpc -= 4; regs->tnpc -= 4; + pt_regs_clear_syscall(regs); } if (restart_syscall && regs->u_regs[UREG_I0] == ERESTART_RESTARTBLOCK) { regs->u_regs[UREG_G1] = __NR_restart_syscall; regs->tpc -= 4; regs->tnpc -= 4; + pt_regs_clear_syscall(regs); } /* If there's no signal to deliver, we just put the saved sigmask diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index 9882df92ba0..5e5c5fd0378 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -315,8 +315,8 @@ save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) return err; } -static void setup_frame(struct k_sigaction *ka, struct pt_regs *regs, - int signo, sigset_t *oldset) +static int setup_frame(struct k_sigaction *ka, struct pt_regs *regs, + int signo, sigset_t *oldset) { struct signal_frame __user *sf; int sigframe_size, err; @@ -384,16 +384,19 @@ static void setup_frame(struct k_sigaction *ka, struct pt_regs *regs, /* Flush instruction space. */ flush_sig_insns(current->mm, (unsigned long) &(sf->insns[0])); } - return; + return 0; sigill_and_return: do_exit(SIGILL); + return -EINVAL; + sigsegv: force_sigsegv(signo, current); + return -EFAULT; } -static void setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs, - int signo, sigset_t *oldset, siginfo_t *info) +static int setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs, + int signo, sigset_t *oldset, siginfo_t *info) { struct rt_signal_frame __user *sf; int sigframe_size; @@ -466,22 +469,30 @@ static void setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs, /* Flush instruction space. */ flush_sig_insns(current->mm, (unsigned long) &(sf->insns[0])); } - return; + return 0; sigill: do_exit(SIGILL); + return -EINVAL; + sigsegv: force_sigsegv(signo, current); + return -EFAULT; } -static inline void +static inline int handle_signal(unsigned long signr, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) { + int err; + if (ka->sa.sa_flags & SA_SIGINFO) - setup_rt_frame(ka, regs, signr, oldset, info); + err = setup_rt_frame(ka, regs, signr, oldset, info); else - setup_frame(ka, regs, signr, oldset); + err = setup_frame(ka, regs, signr, oldset); + + if (err) + return err; spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); @@ -489,6 +500,10 @@ handle_signal(unsigned long signr, struct k_sigaction *ka, sigaddset(¤t->blocked, signr); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); + + tracehook_signal_handler(signr, info, ka, regs, 0); + + return 0; } static inline void syscall_restart(unsigned long orig_i0, struct pt_regs *regs, @@ -546,17 +561,15 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0) if (signr > 0) { if (restart_syscall) syscall_restart(orig_i0, regs, &ka.sa); - handle_signal(signr, &ka, &info, oldset, regs); - - /* a signal was successfully delivered; the saved - * sigmask will have been stored in the signal frame, - * and will be restored by sigreturn, so we can simply - * clear the TIF_RESTORE_SIGMASK flag. - */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) - clear_thread_flag(TIF_RESTORE_SIGMASK); - - tracehook_signal_handler(signr, &info, &ka, regs, 0); + if (handle_signal(signr, &ka, &info, oldset, regs) == 0) { + /* a signal was successfully delivered; the saved + * sigmask will have been stored in the signal frame, + * and will be restored by sigreturn, so we can simply + * clear the TIF_RESTORE_SIGMASK flag. + */ + if (test_thread_flag(TIF_RESTORE_SIGMASK)) + clear_thread_flag(TIF_RESTORE_SIGMASK); + } return; } if (restart_syscall && @@ -567,12 +580,14 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0) regs->u_regs[UREG_I0] = orig_i0; regs->pc -= 4; regs->npc -= 4; + pt_regs_clear_syscall(regs); } if (restart_syscall && regs->u_regs[UREG_I0] == ERESTART_RESTARTBLOCK) { regs->u_regs[UREG_G1] = __NR_restart_syscall; regs->pc -= 4; regs->npc -= 4; + pt_regs_clear_syscall(regs); } /* if there's no signal to deliver, we just put the saved sigmask diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index 9fa48c30037..006fe451588 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -409,7 +409,7 @@ static inline void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs * return (void __user *) sp; } -static inline void +static inline int setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs, int signo, sigset_t *oldset, siginfo_t *info) { @@ -483,26 +483,37 @@ setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs, } /* 4. return to kernel instructions */ regs->u_regs[UREG_I7] = (unsigned long)ka->ka_restorer; - return; + return 0; sigill: do_exit(SIGILL); + return -EINVAL; + sigsegv: force_sigsegv(signo, current); + return -EFAULT; } -static inline void handle_signal(unsigned long signr, struct k_sigaction *ka, - siginfo_t *info, - sigset_t *oldset, struct pt_regs *regs) +static inline int handle_signal(unsigned long signr, struct k_sigaction *ka, + siginfo_t *info, + sigset_t *oldset, struct pt_regs *regs) { - setup_rt_frame(ka, regs, signr, oldset, - (ka->sa.sa_flags & SA_SIGINFO) ? info : NULL); + int err; + + err = setup_rt_frame(ka, regs, signr, oldset, + (ka->sa.sa_flags & SA_SIGINFO) ? info : NULL); + if (err) + return err; spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); if (!(ka->sa.sa_flags & SA_NOMASK)) sigaddset(¤t->blocked,signr); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); + + tracehook_signal_handler(signr, info, ka, regs, 0); + + return 0; } static inline void syscall_restart(unsigned long orig_i0, struct pt_regs *regs, @@ -571,16 +582,14 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0) if (signr > 0) { if (restart_syscall) syscall_restart(orig_i0, regs, &ka.sa); - handle_signal(signr, &ka, &info, oldset, regs); - - /* A signal was successfully delivered; the saved - * sigmask will have been stored in the signal frame, - * and will be restored by sigreturn, so we can simply - * clear the TS_RESTORE_SIGMASK flag. - */ - current_thread_info()->status &= ~TS_RESTORE_SIGMASK; - - tracehook_signal_handler(signr, &info, &ka, regs, 0); + if (handle_signal(signr, &ka, &info, oldset, regs) == 0) { + /* A signal was successfully delivered; the saved + * sigmask will have been stored in the signal frame, + * and will be restored by sigreturn, so we can simply + * clear the TS_RESTORE_SIGMASK flag. + */ + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; + } return; } if (restart_syscall && @@ -591,12 +600,14 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0) regs->u_regs[UREG_I0] = orig_i0; regs->tpc -= 4; regs->tnpc -= 4; + pt_regs_clear_syscall(regs); } if (restart_syscall && regs->u_regs[UREG_I0] == ERESTART_RESTARTBLOCK) { regs->u_regs[UREG_G1] = __NR_restart_syscall; regs->tpc -= 4; regs->tnpc -= 4; + pt_regs_clear_syscall(regs); } /* If there's no signal to deliver, we just put the saved sigmask diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index 84f296ca9e6..8f58bdff20d 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -1506,13 +1506,6 @@ handle_ill: } STD_ENDPROC(handle_ill) - .pushsection .rodata, "a" - .align 8 -bpt_code: - bpt - ENDPROC(bpt_code) - .popsection - /* Various stub interrupt handlers and syscall handlers */ STD_ENTRY_LOCAL(_kernel_double_fault) diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c index 0c46e398cd8..63c740a85b4 100644 --- a/arch/um/drivers/hostaudio_kern.c +++ b/arch/um/drivers/hostaudio_kern.c @@ -40,6 +40,11 @@ static char *mixer = HOSTAUDIO_DEV_MIXER; " This is used to specify the host mixer device to the hostaudio driver.\n"\ " The default is \"" HOSTAUDIO_DEV_MIXER "\".\n\n" +module_param(dsp, charp, 0644); +MODULE_PARM_DESC(dsp, DSP_HELP); +module_param(mixer, charp, 0644); +MODULE_PARM_DESC(mixer, MIXER_HELP); + #ifndef MODULE static int set_dsp(char *name, int *add) { @@ -56,15 +61,6 @@ static int set_mixer(char *name, int *add) } __uml_setup("mixer=", set_mixer, "mixer=<mixer device>\n" MIXER_HELP); - -#else /*MODULE*/ - -module_param(dsp, charp, 0644); -MODULE_PARM_DESC(dsp, DSP_HELP); - -module_param(mixer, charp, 0644); -MODULE_PARM_DESC(mixer, MIXER_HELP); - #endif /* /dev/dsp file operations */ diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 2ab233ba32c..47d0c37897d 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -255,18 +255,6 @@ static void uml_net_tx_timeout(struct net_device *dev) netif_wake_queue(dev); } -static int uml_net_set_mac(struct net_device *dev, void *addr) -{ - struct uml_net_private *lp = netdev_priv(dev); - struct sockaddr *hwaddr = addr; - - spin_lock_irq(&lp->lock); - eth_mac_addr(dev, hwaddr->sa_data); - spin_unlock_irq(&lp->lock); - - return 0; -} - static int uml_net_change_mtu(struct net_device *dev, int new_mtu) { dev->mtu = new_mtu; @@ -373,7 +361,7 @@ static const struct net_device_ops uml_netdev_ops = { .ndo_start_xmit = uml_net_start_xmit, .ndo_set_multicast_list = uml_net_set_multicast_list, .ndo_tx_timeout = uml_net_tx_timeout, - .ndo_set_mac_address = uml_net_set_mac, + .ndo_set_mac_address = eth_mac_addr, .ndo_change_mtu = uml_net_change_mtu, .ndo_validate_addr = eth_validate_addr, }; @@ -472,7 +460,8 @@ static void eth_configure(int n, void *init, char *mac, ((*transport->user->init)(&lp->user, dev) != 0)) goto out_unregister; - eth_mac_addr(dev, device->mac); + /* don't use eth_mac_addr, it will not work here */ + memcpy(dev->dev_addr, device->mac, ETH_ALEN); dev->mtu = transport->user->mtu; dev->netdev_ops = ¨_netdev_ops; dev->ethtool_ops = ¨_net_ethtool_ops; diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 1bcd208c459..9734994cba1 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -163,6 +163,7 @@ struct ubd { struct scatterlist sg[MAX_SG]; struct request *request; int start_sg, end_sg; + sector_t rq_pos; }; #define DEFAULT_COW { \ @@ -187,6 +188,7 @@ struct ubd { .request = NULL, \ .start_sg = 0, \ .end_sg = 0, \ + .rq_pos = 0, \ } /* Protected by ubd_lock */ @@ -1228,7 +1230,6 @@ static void do_ubd_request(struct request_queue *q) { struct io_thread_req *io_req; struct request *req; - sector_t sector; int n; while(1){ @@ -1239,12 +1240,12 @@ static void do_ubd_request(struct request_queue *q) return; dev->request = req; + dev->rq_pos = blk_rq_pos(req); dev->start_sg = 0; dev->end_sg = blk_rq_map_sg(q, req, dev->sg); } req = dev->request; - sector = blk_rq_pos(req); while(dev->start_sg < dev->end_sg){ struct scatterlist *sg = &dev->sg[dev->start_sg]; @@ -1256,10 +1257,9 @@ static void do_ubd_request(struct request_queue *q) return; } prepare_request(req, io_req, - (unsigned long long)sector << 9, + (unsigned long long)dev->rq_pos << 9, sg->offset, sg->length, sg_page(sg)); - sector += sg->length >> 9; n = os_write_file(thread_fd, &io_req, sizeof(struct io_thread_req *)); if(n != sizeof(struct io_thread_req *)){ @@ -1272,6 +1272,7 @@ static void do_ubd_request(struct request_queue *q) return; } + dev->rq_pos += sg->length >> 9; dev->start_sg++; } dev->end_sg = 0; diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index cd145eda357..49b5e1eb326 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -62,7 +62,7 @@ static long execve1(const char *file, return error; } -long um_execve(const char *file, char __user *__user *argv, char __user *__user *env) +long um_execve(const char *file, const char __user *const __user *argv, const char __user *const __user *env) { long err; @@ -72,8 +72,8 @@ long um_execve(const char *file, char __user *__user *argv, char __user *__user return err; } -long sys_execve(const char __user *file, char __user *__user *argv, - char __user *__user *env) +long sys_execve(const char __user *file, const char __user *const __user *argv, + const char __user *const __user *env) { long error; char *filename; diff --git a/arch/um/kernel/internal.h b/arch/um/kernel/internal.h index 1303a105fe9..5bf97db24a0 100644 --- a/arch/um/kernel/internal.h +++ b/arch/um/kernel/internal.h @@ -1 +1 @@ -extern long um_execve(const char *file, char __user *__user *argv, char __user *__user *env); +extern long um_execve(const char *file, const char __user *const __user *argv, const char __user *const __user *env); diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c index 5ddb246626d..f958cb876ee 100644 --- a/arch/um/kernel/syscall.c +++ b/arch/um/kernel/syscall.c @@ -60,8 +60,8 @@ int kernel_execve(const char *filename, fs = get_fs(); set_fs(KERNEL_DS); - ret = um_execve(filename, (char __user *__user *)argv, - (char __user *__user *) envp); + ret = um_execve(filename, (const char __user *const __user *)argv, + (const char __user *const __user *) envp); set_fs(fs); return ret; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7fd41f0d754..a1bd569f6c5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -25,6 +25,7 @@ config X86 select HAVE_IDE select HAVE_OPROFILE select HAVE_PERF_EVENTS if (!M386 && !M486) + select HAVE_IRQ_WORK select HAVE_IOREMAP_PROT select HAVE_KPROBES select ARCH_WANT_OPTIONAL_GPIOLIB @@ -33,6 +34,7 @@ config X86 select HAVE_KRETPROBES select HAVE_OPTPROBES select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_C_RECORDMCOUNT select HAVE_DYNAMIC_FTRACE select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER @@ -59,6 +61,8 @@ config X86 select ANON_INODES select HAVE_ARCH_KMEMCHECK select HAVE_USER_RETURN_NOTIFIER + select HAVE_ARCH_JUMP_LABEL + select HAVE_TEXT_POKE_SMP config INSTRUCTION_DECODER def_bool (KPROBES || PERF_EVENTS) @@ -795,6 +799,17 @@ config SCHED_MC making when dealing with multi-core CPU chips at a cost of slightly increased overhead in some places. If unsure say N here. +config IRQ_TIME_ACCOUNTING + bool "Fine granularity task level IRQ time accounting" + default n + ---help--- + Select this option to enable fine granularity task irq time + accounting. This is done by reading a timestamp on each + transitions between softirq and hardirq state, so there can be a + small performance impact. + + If in doubt, say N here. + source "kernel/Kconfig.preempt" config X86_UP_APIC @@ -2125,6 +2140,10 @@ config HAVE_ATOMIC_IOMAP def_bool y depends on X86_32 +config HAVE_TEXT_POKE_SMP + bool + select STOP_MACHINE if SMP + source "net/Kconfig" source "drivers/Kconfig" diff --git a/arch/x86/boot/early_serial_console.c b/arch/x86/boot/early_serial_console.c index 030f4b93e25..5df2869c874 100644 --- a/arch/x86/boot/early_serial_console.c +++ b/arch/x86/boot/early_serial_console.c @@ -58,7 +58,19 @@ static void parse_earlyprintk(void) if (arg[pos] == ',') pos++; - if (!strncmp(arg, "ttyS", 4)) { + /* + * make sure we have + * "serial,0x3f8,115200" + * "serial,ttyS0,115200" + * "ttyS0,115200" + */ + if (pos == 7 && !strncmp(arg + pos, "0x", 2)) { + port = simple_strtoull(arg + pos, &e, 16); + if (port == 0 || arg + pos == e) + port = DEFAULT_SERIAL_PORT; + else + pos = e - arg; + } else if (!strncmp(arg + pos, "ttyS", 4)) { static const int bases[] = { 0x3f8, 0x2f8 }; int idx = 0; diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 0350311906a..2d93bdbc9ac 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -34,7 +34,7 @@ #include <asm/ia32.h> #undef WARN_OLD -#undef CORE_DUMP /* probably broken */ +#undef CORE_DUMP /* definitely broken */ static int load_aout_binary(struct linux_binprm *, struct pt_regs *regs); static int load_aout_library(struct file *); @@ -131,21 +131,15 @@ static void set_brk(unsigned long start, unsigned long end) * macros to write out all the necessary info. */ -static int dump_write(struct file *file, const void *addr, int nr) -{ - return file->f_op->write(file, addr, nr, &file->f_pos) == nr; -} +#include <linux/coredump.h> #define DUMP_WRITE(addr, nr) \ if (!dump_write(file, (void *)(addr), (nr))) \ goto end_coredump; -#define DUMP_SEEK(offset) \ - if (file->f_op->llseek) { \ - if (file->f_op->llseek(file, (offset), 0) != (offset)) \ - goto end_coredump; \ - } else \ - file->f_pos = (offset) +#define DUMP_SEEK(offset) \ + if (!dump_seek(file, offset)) \ + goto end_coredump; #define START_DATA() (u.u_tsize << PAGE_SHIFT) #define START_STACK(u) (u.start_stack) @@ -217,12 +211,6 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, dump_size = dump.u_ssize << PAGE_SHIFT; DUMP_WRITE(dump_start, dump_size); } - /* - * Finally dump the task struct. Not be used by gdb, but - * could be useful - */ - set_fs(KERNEL_DS); - DUMP_WRITE(current, sizeof(*current)); end_coredump: set_fs(fs); return has_dumped; diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index bc6abb7bc7e..76561d20ea2 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -4,6 +4,7 @@ #include <linux/types.h> #include <linux/stddef.h> #include <linux/stringify.h> +#include <linux/jump_label.h> #include <asm/asm.h> /* @@ -160,6 +161,8 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, #define __parainstructions_end NULL #endif +extern void *text_poke_early(void *addr, const void *opcode, size_t len); + /* * Clear and restore the kernel write-protection flag on the local CPU. * Allows the kernel to edit read-only pages. @@ -180,4 +183,12 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, extern void *text_poke(void *addr, const void *opcode, size_t len); extern void *text_poke_smp(void *addr, const void *opcode, size_t len); +#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) +#define IDEAL_NOP_SIZE_5 5 +extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5]; +extern void arch_init_ideal_nop5(void); +#else +static inline void arch_init_ideal_nop5(void) {} +#endif + #endif /* _ASM_X86_ALTERNATIVE_H */ diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index 5af2982133b..f16a2caca1e 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. + * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. * Author: Joerg Roedel <joerg.roedel@amd.com> * Leo Duran <leo.duran@amd.com> * diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h index d2544f1d705..916bc8111a0 100644 --- a/arch/x86/include/asm/amd_iommu_proto.h +++ b/arch/x86/include/asm/amd_iommu_proto.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009 Advanced Micro Devices, Inc. + * Copyright (C) 2009-2010 Advanced Micro Devices, Inc. * Author: Joerg Roedel <joerg.roedel@amd.com> * * This program is free software; you can redistribute it and/or modify it @@ -38,4 +38,10 @@ static inline void amd_iommu_stats_init(void) { } #endif /* !CONFIG_AMD_IOMMU_STATS */ +static inline bool is_rd890_iommu(struct pci_dev *pdev) +{ + return (pdev->vendor == PCI_VENDOR_ID_ATI) && + (pdev->device == PCI_DEVICE_ID_RD890_IOMMU); +} + #endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 7014e88bc77..e3509fc303b 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. + * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. * Author: Joerg Roedel <joerg.roedel@amd.com> * Leo Duran <leo.duran@amd.com> * @@ -368,6 +368,9 @@ struct amd_iommu { /* capabilities of that IOMMU read from ACPI */ u32 cap; + /* flags read from acpi table */ + u8 acpi_flags; + /* * Capability pointer. There could be more than one IOMMU per PCI * device function if there are more than one AMD IOMMU capability @@ -411,6 +414,24 @@ struct amd_iommu { /* default dma_ops domain for that IOMMU */ struct dma_ops_domain *default_dom; + + /* + * We can't rely on the BIOS to restore all values on reinit, so we + * need to stash them + */ + + /* The iommu BAR */ + u32 stored_addr_lo; + u32 stored_addr_hi; + + /* + * Each iommu has 6 l1s, each of which is documented as having 0x12 + * registers + */ + u32 stored_l1[6][0x12]; + + /* The l2 indirect registers */ + u32 stored_l2[0x83]; }; /* diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 545776efeb1..bafd80defa4 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -309,7 +309,7 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr) static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr) { return ((1UL << (nr % BITS_PER_LONG)) & - (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; + (addr[nr / BITS_PER_LONG])) != 0; } static inline int variable_test_bit(int nr, volatile const unsigned long *addr) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 0450a2842f4..220e2ea08e8 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -172,6 +172,7 @@ #define X86_FEATURE_XSAVEOPT (7*32+ 4) /* Optimized Xsave */ #define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */ #define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */ +#define X86_FEATURE_DTS (7*32+ 7) /* Digital Thermal Sensor */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 8e8ec663a98..b8e96a18676 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -49,8 +49,8 @@ BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) -#ifdef CONFIG_PERF_EVENTS -BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) +#ifdef CONFIG_IRQ_WORK +BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR) #endif #ifdef CONFIG_X86_THERMAL_VECTOR diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h index 4ac5b0f33fc..bf357f9b25f 100644 --- a/arch/x86/include/asm/gart.h +++ b/arch/x86/include/asm/gart.h @@ -17,6 +17,7 @@ extern int fix_aperture; #define GARTEN (1<<0) #define DISGARTCPU (1<<4) #define DISGARTIO (1<<5) +#define DISTLBWALKPRB (1<<6) /* GART cache control register bits. */ #define INVGART (1<<0) @@ -27,7 +28,6 @@ extern int fix_aperture; #define AMD64_GARTAPERTUREBASE 0x94 #define AMD64_GARTTABLEBASE 0x98 #define AMD64_GARTCACHECTL 0x9c -#define AMD64_GARTEN (1<<0) #ifdef CONFIG_GART_IOMMU extern int gart_iommu_aperture; @@ -57,6 +57,19 @@ static inline void gart_iommu_hole_init(void) extern int agp_amd64_init(void); +static inline void gart_set_size_and_enable(struct pci_dev *dev, u32 order) +{ + u32 ctl; + + /* + * Don't enable translation but enable GART IO and CPU accesses. + * Also, set DISTLBWALKPRB since GART tables memory is UC. + */ + ctl = DISTLBWALKPRB | order << 1; + + pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); +} + static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) { u32 tmp, ctl; diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index aeab29aee61..55e4de613f0 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -14,7 +14,7 @@ typedef struct { #endif unsigned int x86_platform_ipis; /* arch dependent */ unsigned int apic_perf_irqs; - unsigned int apic_pending_irqs; + unsigned int apic_irq_work_irqs; #ifdef CONFIG_SMP unsigned int irq_resched_count; unsigned int irq_call_count; diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 528a11e8d3e..824ca07860d 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -20,7 +20,7 @@ struct arch_hw_breakpoint { #include <linux/list.h> /* Available HW breakpoint length encodings */ -#define X86_BREAKPOINT_LEN_X 0x00 +#define X86_BREAKPOINT_LEN_X 0x40 #define X86_BREAKPOINT_LEN_1 0x40 #define X86_BREAKPOINT_LEN_2 0x44 #define X86_BREAKPOINT_LEN_4 0x4c diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 46c0fe05f23..3a54a1ca1a0 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -29,7 +29,7 @@ extern void apic_timer_interrupt(void); extern void x86_platform_ipi(void); extern void error_interrupt(void); -extern void perf_pending_interrupt(void); +extern void irq_work_interrupt(void); extern void spurious_interrupt(void); extern void thermal_interrupt(void); diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index e2ca3009255..6af0894dafb 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -114,9 +114,9 @@ #define X86_PLATFORM_IPI_VECTOR 0xed /* - * Performance monitoring pending work vector: + * IRQ work vector: */ -#define LOCAL_PENDING_VECTOR 0xec +#define IRQ_WORK_VECTOR 0xec #define UV_BAU_MESSAGE 0xea diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h new file mode 100644 index 00000000000..f52d42e8058 --- /dev/null +++ b/arch/x86/include/asm/jump_label.h @@ -0,0 +1,37 @@ +#ifndef _ASM_X86_JUMP_LABEL_H +#define _ASM_X86_JUMP_LABEL_H + +#ifdef __KERNEL__ + +#include <linux/types.h> +#include <asm/nops.h> + +#define JUMP_LABEL_NOP_SIZE 5 + +# define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" + +# define JUMP_LABEL(key, label) \ + do { \ + asm goto("1:" \ + JUMP_LABEL_INITIAL_NOP \ + ".pushsection __jump_table, \"a\" \n\t"\ + _ASM_PTR "1b, %l[" #label "], %c0 \n\t" \ + ".popsection \n\t" \ + : : "i" (key) : : label); \ + } while (0) + +#endif /* __KERNEL__ */ + +#ifdef CONFIG_X86_64 +typedef u64 jump_label_t; +#else +typedef u32 jump_label_t; +#endif + +struct jump_entry { + jump_label_t code; + jump_label_t target; + jump_label_t key; +}; + +#endif diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 502e53f999c..c52e2eb40a1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -652,20 +652,6 @@ static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) return (struct kvm_mmu_page *)page_private(page); } -static inline u16 kvm_read_fs(void) -{ - u16 seg; - asm("mov %%fs, %0" : "=g"(seg)); - return seg; -} - -static inline u16 kvm_read_gs(void) -{ - u16 seg; - asm("mov %%gs, %0" : "=g"(seg)); - return seg; -} - static inline u16 kvm_read_ldt(void) { u16 ldt; @@ -673,16 +659,6 @@ static inline u16 kvm_read_ldt(void) return ldt; } -static inline void kvm_load_fs(u16 sel) -{ - asm("mov %0, %%fs" : : "rm"(sel)); -} - -static inline void kvm_load_gs(u16 sel) -{ - asm("mov %0, %%gs" : : "rm"(sel)); -} - static inline void kvm_load_ldt(u16 sel) { asm("lldt %0" : : "rm"(sel)); diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index def500776b1..a70cd216be5 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -36,19 +36,6 @@ #define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT) #define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT) -/* Non HT mask */ -#define P4_ESCR_MASK \ - (P4_ESCR_EVENT_MASK | \ - P4_ESCR_EVENTMASK_MASK | \ - P4_ESCR_TAG_MASK | \ - P4_ESCR_TAG_ENABLE | \ - P4_ESCR_T0_OS | \ - P4_ESCR_T0_USR) - -/* HT mask */ -#define P4_ESCR_MASK_HT \ - (P4_ESCR_MASK | P4_ESCR_T1_OS | P4_ESCR_T1_USR) - #define P4_CCCR_OVF 0x80000000U #define P4_CCCR_CASCADE 0x40000000U #define P4_CCCR_OVF_PMI_T0 0x04000000U @@ -70,23 +57,6 @@ #define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) #define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) -/* Non HT mask */ -#define P4_CCCR_MASK \ - (P4_CCCR_OVF | \ - P4_CCCR_CASCADE | \ - P4_CCCR_OVF_PMI_T0 | \ - P4_CCCR_FORCE_OVF | \ - P4_CCCR_EDGE | \ - P4_CCCR_THRESHOLD_MASK | \ - P4_CCCR_COMPLEMENT | \ - P4_CCCR_COMPARE | \ - P4_CCCR_ESCR_SELECT_MASK | \ - P4_CCCR_ENABLE) - -/* HT mask */ -#define P4_CCCR_MASK_HT \ - (P4_CCCR_MASK | P4_CCCR_OVF_PMI_T1 | P4_CCCR_THREAD_ANY) - #define P4_GEN_ESCR_EMASK(class, name, bit) \ class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) #define P4_ESCR_EMASK_BIT(class, name) class##__##name @@ -127,6 +97,28 @@ #define P4_CONFIG_HT_SHIFT 63 #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) +/* + * The bits we allow to pass for RAW events + */ +#define P4_CONFIG_MASK_ESCR \ + P4_ESCR_EVENT_MASK | \ + P4_ESCR_EVENTMASK_MASK | \ + P4_ESCR_TAG_MASK | \ + P4_ESCR_TAG_ENABLE + +#define P4_CONFIG_MASK_CCCR \ + P4_CCCR_EDGE | \ + P4_CCCR_THRESHOLD_MASK | \ + P4_CCCR_COMPLEMENT | \ + P4_CCCR_COMPARE | \ + P4_CCCR_THREAD_ANY | \ + P4_CCCR_RESERVED + +/* some dangerous bits are reserved for kernel internals */ +#define P4_CONFIG_MASK \ + (p4_config_pack_escr(P4_CONFIG_MASK_ESCR)) | \ + (p4_config_pack_cccr(P4_CONFIG_MASK_CCCR)) + static inline bool p4_is_event_cascaded(u64 config) { u32 cccr = p4_config_unpack_cccr(config); diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 25dc82d3e76..81f37703379 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -11,6 +11,8 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_tsc.o = -pg CFLAGS_REMOVE_rtc.o = -pg CFLAGS_REMOVE_paravirt-spinlocks.o = -pg +CFLAGS_REMOVE_pvclock.o = -pg +CFLAGS_REMOVE_kvmclock.o = -pg CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_early_printk.o = -pg endif @@ -32,7 +34,8 @@ GCOV_PROFILE_paravirt.o := n obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time.o ioport.o ldt.o dumpstack.o -obj-y += setup.o x86_init.o i8259.o irqinit.o +obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o +obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-$(CONFIG_X86_VISWS) += visws_quirks.o obj-$(CONFIG_X86_32) += probe_roms_32.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index fb7a5f052e2..fb16f17e59b 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -61,7 +61,7 @@ struct cstate_entry { unsigned int ecx; } states[ACPI_PROCESSOR_MAX_POWER]; }; -static struct cstate_entry *cpu_cstate_entry; /* per CPU ptr */ +static struct cstate_entry __percpu *cpu_cstate_entry; /* per CPU ptr */ static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index f65ab8b014c..a36bb90aef5 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -195,7 +195,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern s32 __smp_locks[], __smp_locks_end[]; -static void *text_poke_early(void *addr, const void *opcode, size_t len); +void *text_poke_early(void *addr, const void *opcode, size_t len); /* Replace instructions with better alternatives for this CPU type. This runs before SMP is initialized to avoid SMP problems with @@ -522,7 +522,7 @@ void __init alternative_instructions(void) * instructions. And on the local CPU you need to be protected again NMI or MCE * handlers seeing an inconsistent instruction while you patch. */ -static void *__init_or_module text_poke_early(void *addr, const void *opcode, +void *__init_or_module text_poke_early(void *addr, const void *opcode, size_t len) { unsigned long flags; @@ -637,7 +637,72 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) tpp.len = len; atomic_set(&stop_machine_first, 1); wrote_text = 0; - stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); + /* Use __stop_machine() because the caller already got online_cpus. */ + __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); return addr; } +#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) + +unsigned char ideal_nop5[IDEAL_NOP_SIZE_5]; + +void __init arch_init_ideal_nop5(void) +{ + extern const unsigned char ftrace_test_p6nop[]; + extern const unsigned char ftrace_test_nop5[]; + extern const unsigned char ftrace_test_jmp[]; + int faulted = 0; + + /* + * There is no good nop for all x86 archs. + * We will default to using the P6_NOP5, but first we + * will test to make sure that the nop will actually + * work on this CPU. If it faults, we will then + * go to a lesser efficient 5 byte nop. If that fails + * we then just use a jmp as our nop. This isn't the most + * efficient nop, but we can not use a multi part nop + * since we would then risk being preempted in the middle + * of that nop, and if we enabled tracing then, it might + * cause a system crash. + * + * TODO: check the cpuid to determine the best nop. + */ + asm volatile ( + "ftrace_test_jmp:" + "jmp ftrace_test_p6nop\n" + "nop\n" + "nop\n" + "nop\n" /* 2 byte jmp + 3 bytes */ + "ftrace_test_p6nop:" + P6_NOP5 + "jmp 1f\n" + "ftrace_test_nop5:" + ".byte 0x66,0x66,0x66,0x66,0x90\n" + "1:" + ".section .fixup, \"ax\"\n" + "2: movl $1, %0\n" + " jmp ftrace_test_nop5\n" + "3: movl $2, %0\n" + " jmp 1b\n" + ".previous\n" + _ASM_EXTABLE(ftrace_test_p6nop, 2b) + _ASM_EXTABLE(ftrace_test_nop5, 3b) + : "=r"(faulted) : "0" (faulted)); + + switch (faulted) { + case 0: + pr_info("converting mcount calls to 0f 1f 44 00 00\n"); + memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5); + break; + case 1: + pr_info("converting mcount calls to 66 66 66 66 90\n"); + memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5); + break; + case 2: + pr_info("converting mcount calls to jmp . + 5\n"); + memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5); + break; + } + +} +#endif diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index fa044e1e30a..d2fdb0826df 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. + * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. * Author: Joerg Roedel <joerg.roedel@amd.com> * Leo Duran <leo.duran@amd.com> * @@ -1953,6 +1953,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, size_t size, int dir) { + dma_addr_t flush_addr; dma_addr_t i, start; unsigned int pages; @@ -1960,6 +1961,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, (dma_addr + size > dma_dom->aperture_size)) return; + flush_addr = dma_addr; pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); dma_addr &= PAGE_MASK; start = dma_addr; @@ -1974,7 +1976,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, dma_ops_free_addresses(dma_dom, dma_addr, pages); if (amd_iommu_unmap_flush || dma_dom->need_flush) { - iommu_flush_pages(&dma_dom->domain, dma_addr, size); + iommu_flush_pages(&dma_dom->domain, flush_addr, size); dma_dom->need_flush = false; } } diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 3cc63e2b8dd..3cb482e123d 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. + * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. * Author: Joerg Roedel <joerg.roedel@amd.com> * Leo Duran <leo.duran@amd.com> * @@ -194,6 +194,39 @@ static inline unsigned long tbl_size(int entry_size) return 1UL << shift; } +/* Access to l1 and l2 indexed register spaces */ + +static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address) +{ + u32 val; + + pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); + pci_read_config_dword(iommu->dev, 0xfc, &val); + return val; +} + +static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val) +{ + pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31)); + pci_write_config_dword(iommu->dev, 0xfc, val); + pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); +} + +static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address) +{ + u32 val; + + pci_write_config_dword(iommu->dev, 0xf0, address); + pci_read_config_dword(iommu->dev, 0xf4, &val); + return val; +} + +static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val) +{ + pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8)); + pci_write_config_dword(iommu->dev, 0xf4, val); +} + /**************************************************************************** * * AMD IOMMU MMIO register space handling functions @@ -619,6 +652,7 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu) { int cap_ptr = iommu->cap_ptr; u32 range, misc; + int i, j; pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, &iommu->cap); @@ -632,6 +666,30 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu) iommu->last_device = calc_devid(MMIO_GET_BUS(range), MMIO_GET_LD(range)); iommu->evt_msi_num = MMIO_MSI_NUM(misc); + + if (!is_rd890_iommu(iommu->dev)) + return; + + /* + * Some rd890 systems may not be fully reconfigured by the BIOS, so + * it's necessary for us to store this information so it can be + * reprogrammed on resume + */ + + pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4, + &iommu->stored_addr_lo); + pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8, + &iommu->stored_addr_hi); + + /* Low bit locks writes to configuration space */ + iommu->stored_addr_lo &= ~1; + + for (i = 0; i < 6; i++) + for (j = 0; j < 0x12; j++) + iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j); + + for (i = 0; i < 0x83; i++) + iommu->stored_l2[i] = iommu_read_l2(iommu, i); } /* @@ -649,29 +707,9 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, struct ivhd_entry *e; /* - * First set the recommended feature enable bits from ACPI - * into the IOMMU control registers - */ - h->flags & IVHD_FLAG_HT_TUN_EN_MASK ? - iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : - iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); - - h->flags & IVHD_FLAG_PASSPW_EN_MASK ? - iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : - iommu_feature_disable(iommu, CONTROL_PASSPW_EN); - - h->flags & IVHD_FLAG_RESPASSPW_EN_MASK ? - iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : - iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); - - h->flags & IVHD_FLAG_ISOC_EN_MASK ? - iommu_feature_enable(iommu, CONTROL_ISOC_EN) : - iommu_feature_disable(iommu, CONTROL_ISOC_EN); - - /* - * make IOMMU memory accesses cache coherent + * First save the recommended feature enable bits from ACPI */ - iommu_feature_enable(iommu, CONTROL_COHERENT_EN); + iommu->acpi_flags = h->flags; /* * Done. Now parse the device entries @@ -1116,6 +1154,79 @@ static void init_device_table(void) } } +static void iommu_init_flags(struct amd_iommu *iommu) +{ + iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ? + iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : + iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); + + iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ? + iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : + iommu_feature_disable(iommu, CONTROL_PASSPW_EN); + + iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ? + iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : + iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); + + iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ? + iommu_feature_enable(iommu, CONTROL_ISOC_EN) : + iommu_feature_disable(iommu, CONTROL_ISOC_EN); + + /* + * make IOMMU memory accesses cache coherent + */ + iommu_feature_enable(iommu, CONTROL_COHERENT_EN); +} + +static void iommu_apply_resume_quirks(struct amd_iommu *iommu) +{ + int i, j; + u32 ioc_feature_control; + struct pci_dev *pdev = NULL; + + /* RD890 BIOSes may not have completely reconfigured the iommu */ + if (!is_rd890_iommu(iommu->dev)) + return; + + /* + * First, we need to ensure that the iommu is enabled. This is + * controlled by a register in the northbridge + */ + pdev = pci_get_bus_and_slot(iommu->dev->bus->number, PCI_DEVFN(0, 0)); + + if (!pdev) + return; + + /* Select Northbridge indirect register 0x75 and enable writing */ + pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7)); + pci_read_config_dword(pdev, 0x64, &ioc_feature_control); + + /* Enable the iommu */ + if (!(ioc_feature_control & 0x1)) + pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1); + + pci_dev_put(pdev); + + /* Restore the iommu BAR */ + pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, + iommu->stored_addr_lo); + pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8, + iommu->stored_addr_hi); + + /* Restore the l1 indirect regs for each of the 6 l1s */ + for (i = 0; i < 6; i++) + for (j = 0; j < 0x12; j++) + iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]); + + /* Restore the l2 indirect regs */ + for (i = 0; i < 0x83; i++) + iommu_write_l2(iommu, i, iommu->stored_l2[i]); + + /* Lock PCI setup registers */ + pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, + iommu->stored_addr_lo | 1); +} + /* * This function finally enables all IOMMUs found in the system after * they have been initialized @@ -1126,6 +1237,7 @@ static void enable_iommus(void) for_each_iommu(iommu) { iommu_disable(iommu); + iommu_init_flags(iommu); iommu_set_device_table(iommu); iommu_enable_command_buffer(iommu); iommu_enable_event_buffer(iommu); @@ -1150,6 +1262,11 @@ static void disable_iommus(void) static int amd_iommu_resume(struct sys_device *dev) { + struct amd_iommu *iommu; + + for_each_iommu(iommu) + iommu_apply_resume_quirks(iommu); + /* re-load the hardware */ enable_iommus(); diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index e91e042a5c8..377f5db3b8b 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -307,7 +307,7 @@ void __init early_gart_iommu_check(void) continue; ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); - aper_enabled = ctl & AMD64_GARTEN; + aper_enabled = ctl & GARTEN; aper_order = (ctl >> 1) & 7; aper_size = (32 * 1024 * 1024) << aper_order; aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; @@ -362,7 +362,7 @@ void __init early_gart_iommu_check(void) continue; ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); - ctl &= ~AMD64_GARTEN; + ctl &= ~GARTEN; write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); } } @@ -505,8 +505,13 @@ out: /* Fix up the north bridges */ for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { - int bus; - int dev_base, dev_limit; + int bus, dev_base, dev_limit; + + /* + * Don't enable translation yet but enable GART IO and CPU + * accesses and set DISTLBWALKPRB since GART table memory is UC. + */ + u32 ctl = DISTLBWALKPRB | aper_order << 1; bus = bus_dev_ranges[i].bus; dev_base = bus_dev_ranges[i].dev_base; @@ -515,10 +520,7 @@ out: if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) continue; - /* Don't enable translation yet. That is done later. - Assume this BIOS didn't initialise the GART so - just overwrite all previous bits */ - write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, aper_order << 1); + write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); write_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE, aper_alloc >> 25); } } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index f1efebaf551..5c5b8f3dddb 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -306,14 +306,19 @@ void arch_init_copy_chip_data(struct irq_desc *old_desc, old_cfg = old_desc->chip_data; - memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); + cfg->vector = old_cfg->vector; + cfg->move_in_progress = old_cfg->move_in_progress; + cpumask_copy(cfg->domain, old_cfg->domain); + cpumask_copy(cfg->old_domain, old_cfg->old_domain); init_copy_irq_2_pin(old_cfg, cfg, node); } -static void free_irq_cfg(struct irq_cfg *old_cfg) +static void free_irq_cfg(struct irq_cfg *cfg) { - kfree(old_cfg); + free_cpumask_var(cfg->domain); + free_cpumask_var(cfg->old_domain); + kfree(cfg); } void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 490dac63c2d..f2f9ac7da25 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -545,7 +545,7 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c) } } -static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) +void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) { u32 tfms, xlvl; u32 ebx; diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 3624e8a0f71..f668bb1f7d4 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -33,5 +33,6 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[], *const __x86_cpu_dev_end[]; extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); +extern void get_cpu_cap(struct cpuinfo_x86 *c); #endif diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c index 994230d4dc4..4f6f679f279 100644 --- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c @@ -368,16 +368,22 @@ static int __init pcc_cpufreq_do_osc(acpi_handle *handle) return -ENODEV; out_obj = output.pointer; - if (out_obj->type != ACPI_TYPE_BUFFER) - return -ENODEV; + if (out_obj->type != ACPI_TYPE_BUFFER) { + ret = -ENODEV; + goto out_free; + } errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); - if (errors) - return -ENODEV; + if (errors) { + ret = -ENODEV; + goto out_free; + } supported = *((u32 *)(out_obj->buffer.pointer + 4)); - if (!(supported & 0x1)) - return -ENODEV; + if (!(supported & 0x1)) { + ret = -ENODEV; + goto out_free; + } out_free: kfree(output.pointer); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 85f69cdeae1..b4389441efb 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -39,6 +39,7 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID; wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); c->cpuid_level = cpuid_eax(0); + get_cpu_cap(c); } } diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 5e975298fa8..39aaee5c1ab 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -141,6 +141,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) address = (low & MASK_BLKPTR_LO) >> 21; if (!address) break; + address += MCG_XBLK_ADDR; } else ++address; @@ -148,12 +149,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) if (rdmsr_safe(address, &low, &high)) break; - if (!(high & MASK_VALID_HI)) { - if (block) - continue; - else - break; - } + if (!(high & MASK_VALID_HI)) + continue; if (!(high & MASK_CNTP_HI) || (high & MASK_LOCKED_HI)) diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index d9368eeda30..169d8804a9f 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -216,7 +216,7 @@ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev, err = sysfs_add_file_to_group(&sys_dev->kobj, &attr_core_power_limit_count.attr, thermal_attr_group.name); - if (cpu_has(c, X86_FEATURE_PTS)) + if (cpu_has(c, X86_FEATURE_PTS)) { err = sysfs_add_file_to_group(&sys_dev->kobj, &attr_package_throttle_count.attr, thermal_attr_group.name); @@ -224,6 +224,7 @@ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev, err = sysfs_add_file_to_group(&sys_dev->kobj, &attr_package_power_limit_count.attr, thermal_attr_group.name); + } return err; } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 3efdf2870a3..fe73c1844a9 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -102,6 +102,7 @@ struct cpu_hw_events { */ struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; int enabled; int n_events; @@ -530,7 +531,7 @@ static int x86_pmu_hw_config(struct perf_event *event) /* * Setup the hardware configuration for a given attr_type */ -static int __hw_perf_event_init(struct perf_event *event) +static int __x86_pmu_event_init(struct perf_event *event) { int err; @@ -583,7 +584,7 @@ static void x86_pmu_disable_all(void) } } -void hw_perf_disable(void) +static void x86_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -618,7 +619,7 @@ static void x86_pmu_enable_all(int added) } } -static const struct pmu pmu; +static struct pmu pmu; static inline int is_x86_event(struct perf_event *event) { @@ -800,10 +801,10 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc, hwc->last_tag == cpuc->tags[i]; } -static int x86_pmu_start(struct perf_event *event); -static void x86_pmu_stop(struct perf_event *event); +static void x86_pmu_start(struct perf_event *event, int flags); +static void x86_pmu_stop(struct perf_event *event, int flags); -void hw_perf_enable(void) +static void x86_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct perf_event *event; @@ -839,7 +840,14 @@ void hw_perf_enable(void) match_prev_assignment(hwc, cpuc, i)) continue; - x86_pmu_stop(event); + /* + * Ensure we don't accidentally enable a stopped + * counter simply because we rescheduled. + */ + if (hwc->state & PERF_HES_STOPPED) + hwc->state |= PERF_HES_ARCH; + + x86_pmu_stop(event, PERF_EF_UPDATE); } for (i = 0; i < cpuc->n_events; i++) { @@ -851,7 +859,10 @@ void hw_perf_enable(void) else if (i < n_running) continue; - x86_pmu_start(event); + if (hwc->state & PERF_HES_ARCH) + continue; + + x86_pmu_start(event, PERF_EF_RELOAD); } cpuc->n_added = 0; perf_events_lapic_init(); @@ -952,15 +963,12 @@ static void x86_pmu_enable_event(struct perf_event *event) } /* - * activate a single event + * Add a single event to the PMU. * * The event is added to the group of enabled events * but only if it can be scehduled with existing events. - * - * Called with PMU disabled. If successful and return value 1, - * then guaranteed to call perf_enable() and hw_perf_enable() */ -static int x86_pmu_enable(struct perf_event *event) +static int x86_pmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc; @@ -969,57 +977,67 @@ static int x86_pmu_enable(struct perf_event *event) hwc = &event->hw; + perf_pmu_disable(event->pmu); n0 = cpuc->n_events; - n = collect_events(cpuc, event, false); - if (n < 0) - return n; + ret = n = collect_events(cpuc, event, false); + if (ret < 0) + goto out; + + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (!(flags & PERF_EF_START)) + hwc->state |= PERF_HES_ARCH; /* * If group events scheduling transaction was started, * skip the schedulability test here, it will be peformed - * at commit time(->commit_txn) as a whole + * at commit time (->commit_txn) as a whole */ if (cpuc->group_flag & PERF_EVENT_TXN) - goto out; + goto done_collect; ret = x86_pmu.schedule_events(cpuc, n, assign); if (ret) - return ret; + goto out; /* * copy new assignment, now we know it is possible * will be used by hw_perf_enable() */ memcpy(cpuc->assign, assign, n*sizeof(int)); -out: +done_collect: cpuc->n_events = n; cpuc->n_added += n - n0; cpuc->n_txn += n - n0; - return 0; + ret = 0; +out: + perf_pmu_enable(event->pmu); + return ret; } -static int x86_pmu_start(struct perf_event *event) +static void x86_pmu_start(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx = event->hw.idx; - if (idx == -1) - return -EAGAIN; + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + if (WARN_ON_ONCE(idx == -1)) + return; + + if (flags & PERF_EF_RELOAD) { + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + x86_perf_event_set_period(event); + } + + event->hw.state = 0; - x86_perf_event_set_period(event); cpuc->events[idx] = event; __set_bit(idx, cpuc->active_mask); + __set_bit(idx, cpuc->running); x86_pmu.enable(event); perf_event_update_userpage(event); - - return 0; -} - -static void x86_pmu_unthrottle(struct perf_event *event) -{ - int ret = x86_pmu_start(event); - WARN_ON_ONCE(ret); } void perf_event_print_debug(void) @@ -1076,27 +1094,29 @@ void perf_event_print_debug(void) local_irq_restore(flags); } -static void x86_pmu_stop(struct perf_event *event) +static void x86_pmu_stop(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - if (!__test_and_clear_bit(idx, cpuc->active_mask)) - return; - - x86_pmu.disable(event); - - /* - * Drain the remaining delta count out of a event - * that we are disabling: - */ - x86_perf_event_update(event); + if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) { + x86_pmu.disable(event); + cpuc->events[hwc->idx] = NULL; + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + } - cpuc->events[idx] = NULL; + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + /* + * Drain the remaining delta count out of a event + * that we are disabling: + */ + x86_perf_event_update(event); + hwc->state |= PERF_HES_UPTODATE; + } } -static void x86_pmu_disable(struct perf_event *event) +static void x86_pmu_del(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int i; @@ -1109,7 +1129,7 @@ static void x86_pmu_disable(struct perf_event *event) if (cpuc->group_flag & PERF_EVENT_TXN) return; - x86_pmu_stop(event); + x86_pmu_stop(event, PERF_EF_UPDATE); for (i = 0; i < cpuc->n_events; i++) { if (event == cpuc->event_list[i]) { @@ -1132,7 +1152,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) struct perf_sample_data data; struct cpu_hw_events *cpuc; struct perf_event *event; - struct hw_perf_event *hwc; int idx, handled = 0; u64 val; @@ -1141,11 +1160,18 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) cpuc = &__get_cpu_var(cpu_hw_events); for (idx = 0; idx < x86_pmu.num_counters; idx++) { - if (!test_bit(idx, cpuc->active_mask)) + if (!test_bit(idx, cpuc->active_mask)) { + /* + * Though we deactivated the counter some cpus + * might still deliver spurious interrupts still + * in flight. Catch them: + */ + if (__test_and_clear_bit(idx, cpuc->running)) + handled++; continue; + } event = cpuc->events[idx]; - hwc = &event->hw; val = x86_perf_event_update(event); if (val & (1ULL << (x86_pmu.cntval_bits - 1))) @@ -1161,7 +1187,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) continue; if (perf_event_overflow(event, 1, &data, regs)) - x86_pmu_stop(event); + x86_pmu_stop(event, 0); } if (handled) @@ -1170,25 +1196,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) return handled; } -void smp_perf_pending_interrupt(struct pt_regs *regs) -{ - irq_enter(); - ack_APIC_irq(); - inc_irq_stat(apic_pending_irqs); - perf_event_do_pending(); - irq_exit(); -} - -void set_perf_event_pending(void) -{ -#ifdef CONFIG_X86_LOCAL_APIC - if (!x86_pmu.apic || !x86_pmu_initialized()) - return; - - apic->send_IPI_self(LOCAL_PENDING_VECTOR); -#endif -} - void perf_events_lapic_init(void) { if (!x86_pmu.apic || !x86_pmu_initialized()) @@ -1378,7 +1385,6 @@ void __init init_hw_perf_events(void) x86_pmu.num_counters = X86_PMC_MAX_GENERIC; } x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; - perf_max_events = x86_pmu.num_counters; if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", @@ -1414,6 +1420,7 @@ void __init init_hw_perf_events(void) pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); + perf_pmu_register(&pmu); perf_cpu_notifier(x86_pmu_notifier); } @@ -1427,10 +1434,11 @@ static inline void x86_pmu_read(struct perf_event *event) * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time */ -static void x86_pmu_start_txn(const struct pmu *pmu) +static void x86_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + perf_pmu_disable(pmu); cpuc->group_flag |= PERF_EVENT_TXN; cpuc->n_txn = 0; } @@ -1440,7 +1448,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu) * Clear the flag and pmu::enable() will perform the * schedulability test. */ -static void x86_pmu_cancel_txn(const struct pmu *pmu) +static void x86_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -1450,6 +1458,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu) */ cpuc->n_added -= cpuc->n_txn; cpuc->n_events -= cpuc->n_txn; + perf_pmu_enable(pmu); } /* @@ -1457,7 +1466,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu) * Perform the group schedulability test as a whole * Return 0 if success */ -static int x86_pmu_commit_txn(const struct pmu *pmu) +static int x86_pmu_commit_txn(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int assign[X86_PMC_IDX_MAX]; @@ -1479,22 +1488,10 @@ static int x86_pmu_commit_txn(const struct pmu *pmu) memcpy(cpuc->assign, assign, n*sizeof(int)); cpuc->group_flag &= ~PERF_EVENT_TXN; - + perf_pmu_enable(pmu); return 0; } -static const struct pmu pmu = { - .enable = x86_pmu_enable, - .disable = x86_pmu_disable, - .start = x86_pmu_start, - .stop = x86_pmu_stop, - .read = x86_pmu_read, - .unthrottle = x86_pmu_unthrottle, - .start_txn = x86_pmu_start_txn, - .cancel_txn = x86_pmu_cancel_txn, - .commit_txn = x86_pmu_commit_txn, -}; - /* * validate that we can schedule this event */ @@ -1569,12 +1566,22 @@ out: return ret; } -const struct pmu *hw_perf_event_init(struct perf_event *event) +int x86_pmu_event_init(struct perf_event *event) { - const struct pmu *tmp; + struct pmu *tmp; int err; - err = __hw_perf_event_init(event); + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + + default: + return -ENOENT; + } + + err = __x86_pmu_event_init(event); if (!err) { /* * we temporarily connect event to its pmu @@ -1594,26 +1601,31 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) if (err) { if (event->destroy) event->destroy(event); - return ERR_PTR(err); } - return &pmu; + return err; } -/* - * callchain support - */ +static struct pmu pmu = { + .pmu_enable = x86_pmu_enable, + .pmu_disable = x86_pmu_disable, -static inline -void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} + .event_init = x86_pmu_event_init, -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); + .add = x86_pmu_add, + .del = x86_pmu_del, + .start = x86_pmu_start, + .stop = x86_pmu_stop, + .read = x86_pmu_read, + .start_txn = x86_pmu_start_txn, + .cancel_txn = x86_pmu_cancel_txn, + .commit_txn = x86_pmu_commit_txn, +}; + +/* + * callchain support + */ static void backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) @@ -1635,7 +1647,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) { struct perf_callchain_entry *entry = data; - callchain_store(entry, addr); + perf_callchain_store(entry, addr); } static const struct stacktrace_ops backtrace_ops = { @@ -1646,11 +1658,15 @@ static const struct stacktrace_ops backtrace_ops = { .walk_stack = print_context_stack_bp, }; -static void -perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->ip); + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* TODO: We don't support guest os callchain now */ + return; + } + + perf_callchain_store(entry, regs->ip); dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); } @@ -1679,7 +1695,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) if (fp < compat_ptr(regs->sp)) break; - callchain_store(entry, frame.return_address); + perf_callchain_store(entry, frame.return_address); fp = compat_ptr(frame.next_frame); } return 1; @@ -1692,19 +1708,20 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) } #endif -static void -perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { struct stack_frame frame; const void __user *fp; - if (!user_mode(regs)) - regs = task_pt_regs(current); + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* TODO: We don't support guest os callchain now */ + return; + } fp = (void __user *)regs->bp; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, regs->ip); + perf_callchain_store(entry, regs->ip); if (perf_callchain_user32(regs, entry)) return; @@ -1721,52 +1738,11 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) if ((unsigned long)fp < regs->sp) break; - callchain_store(entry, frame.return_address); + perf_callchain_store(entry, frame.return_address); fp = frame.next_frame; } } -static void -perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) -{ - int is_user; - - if (!regs) - return; - - is_user = user_mode(regs); - - if (is_user && current->state != TASK_RUNNING) - return; - - if (!is_user) - perf_callchain_kernel(regs, entry); - - if (current->mm) - perf_callchain_user(regs, entry); -} - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) -{ - struct perf_callchain_entry *entry; - - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - /* TODO: We don't support guest os callchain now */ - return NULL; - } - - if (in_nmi()) - entry = &__get_cpu_var(pmc_nmi_entry); - else - entry = &__get_cpu_var(pmc_irq_entry); - - entry->nr = 0; - - perf_do_callchain(regs, entry); - - return entry; -} - unsigned long perf_instruction_pointer(struct pt_regs *regs) { unsigned long ip; diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index c2897b7b4a3..46d58448c3a 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -52,7 +52,7 @@ static __initconst const u64 amd_hw_cache_event_ids [ C(DTLB) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ - [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ + [ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */ }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = 0, @@ -66,7 +66,7 @@ static __initconst const u64 amd_hw_cache_event_ids [ C(ITLB) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ - [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ + [ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */ }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = -1, diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index ee05c90012d..c8f5c088cad 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -713,18 +713,18 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) struct cpu_hw_events *cpuc; int bit, loops; u64 status; - int handled = 0; + int handled; perf_sample_data_init(&data, 0); cpuc = &__get_cpu_var(cpu_hw_events); intel_pmu_disable_all(); - intel_pmu_drain_bts_buffer(); + handled = intel_pmu_drain_bts_buffer(); status = intel_pmu_get_status(); if (!status) { intel_pmu_enable_all(0); - return 0; + return handled; } loops = 0; @@ -763,7 +763,7 @@ again: data.period = event->hw.last_period; if (perf_event_overflow(event, 1, &data, regs)) - x86_pmu_stop(event); + x86_pmu_stop(event, 0); } /* diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 18018d1311c..4977f9c400e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -214,7 +214,7 @@ static void intel_pmu_disable_bts(void) update_debugctlmsr(debugctlmsr); } -static void intel_pmu_drain_bts_buffer(void) +static int intel_pmu_drain_bts_buffer(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct debug_store *ds = cpuc->ds; @@ -231,16 +231,16 @@ static void intel_pmu_drain_bts_buffer(void) struct pt_regs regs; if (!event) - return; + return 0; if (!ds) - return; + return 0; at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; top = (struct bts_record *)(unsigned long)ds->bts_index; if (top <= at) - return; + return 0; ds->bts_index = ds->bts_buffer_base; @@ -256,7 +256,7 @@ static void intel_pmu_drain_bts_buffer(void) perf_prepare_sample(&header, &data, event, ®s); if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) - return; + return 1; for (; at < top; at++) { data.ip = at->from; @@ -270,6 +270,7 @@ static void intel_pmu_drain_bts_buffer(void) /* There's new data available. */ event->hw.interrupts++; event->pending_kill = POLL_IN; + return 1; } /* @@ -491,7 +492,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, regs.flags &= ~PERF_EFLAGS_EXACT; if (perf_event_overflow(event, 1, &data, ®s)) - x86_pmu_stop(event); + x86_pmu_stop(event, 0); } static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index b560db3305b..81400b93e69 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -18,6 +18,8 @@ struct p4_event_bind { unsigned int opcode; /* Event code and ESCR selector */ unsigned int escr_msr[2]; /* ESCR MSR for this event */ + unsigned int escr_emask; /* valid ESCR EventMask bits */ + unsigned int shared; /* event is shared across threads */ char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ }; @@ -66,231 +68,435 @@ static struct p4_event_bind p4_event_bind_map[] = { [P4_EVENT_TC_DELIVER_MODE] = { .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID), + .shared = 1, .cntr = { {4, 5, -1}, {6, 7, -1} }, }, [P4_EVENT_BPU_FETCH_REQUEST] = { .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_ITLB_REFERENCE] = { .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT) | + P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_MEMORY_CANCEL] = { .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL) | + P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF), .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_MEMORY_COMPLETE] = { .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC) | + P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC), .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_LOAD_PORT_REPLAY] = { .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD), .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_STORE_PORT_REPLAY] = { .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST), .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_MOB_LOAD_REPLAY] = { .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA) | + P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD) | + P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA) | + P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_PAGE_WALK_TYPE] = { .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS), + .shared = 1, .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_BSQ_CACHE_REFERENCE] = { .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_IOQ_ALLOCATION] = { .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH), .cntr = { {2, -1, -1}, {3, -1, -1} }, }, [P4_EVENT_FSB_DATA_ACTIVITY] = { .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER), + .shared = 1, .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2), .cntr = { {0, -1, -1}, {1, -1, -1} }, }, [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2), .cntr = { {2, -1, -1}, {3, -1, -1} }, }, [P4_EVENT_SSE_INPUT_ASSIST] = { .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_PACKED_SP_UOP] = { .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_PACKED_DP_UOP] = { .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_SCALAR_SP_UOP] = { .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_SCALAR_DP_UOP] = { .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_64BIT_MMX_UOP] = { .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_128BIT_MMX_UOP] = { .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_X87_FP_UOP] = { .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_TC_MISC] = { .opcode = P4_OPCODE(P4_EVENT_TC_MISC), .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH), .cntr = { {4, 5, -1}, {6, 7, -1} }, }, [P4_EVENT_GLOBAL_POWER_EVENTS] = { .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_TC_MS_XFER] = { .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC), .cntr = { {4, 5, -1}, {6, 7, -1} }, }, [P4_EVENT_UOP_QUEUE_WRITES] = { .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD) | + P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER) | + P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM), .cntr = { {4, 5, -1}, {6, 7, -1} }, }, [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT), .cntr = { {4, 5, -1}, {6, 7, -1} }, }, [P4_EVENT_RETIRED_BRANCH_TYPE] = { .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT), .cntr = { {4, 5, -1}, {6, 7, -1} }, }, [P4_EVENT_RESOURCE_STALL] = { .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_WC_BUFFER] = { .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS) | + P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_B2B_CYCLES] = { .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = 0, .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_BNR] = { .opcode = P4_OPCODE(P4_EVENT_BNR), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = 0, .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_SNOOP] = { .opcode = P4_OPCODE(P4_EVENT_SNOOP), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = 0, .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_RESPONSE] = { .opcode = P4_OPCODE(P4_EVENT_RESPONSE), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = 0, .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_FRONT_END_EVENT] = { .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS) | + P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_EXECUTION_EVENT] = { .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_REPLAY_EVENT] = { .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS) | + P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_INSTR_RETIRED] = { .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) | + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG) | + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG) | + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_UOPS_RETIRED] = { .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS) | + P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_UOP_TYPE] = { .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS) | + P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_BRANCH_RETIRED] = { .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP) | + P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM) | + P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP) | + P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_MISPRED_BRANCH_RETIRED] = { .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_X87_ASSIST] = { .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU) | + P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO) | + P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO) | + P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU) | + P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_MACHINE_CLEAR] = { .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR) | + P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR) | + P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_INSTR_COMPLETED] = { .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS) | + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, }; @@ -428,29 +634,73 @@ static u64 p4_pmu_event_map(int hw_event) return config; } +/* check cpu model specifics */ +static bool p4_event_match_cpu_model(unsigned int event_idx) +{ + /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */ + if (event_idx == P4_EVENT_INSTR_COMPLETED) { + if (boot_cpu_data.x86_model != 3 && + boot_cpu_data.x86_model != 4 && + boot_cpu_data.x86_model != 6) + return false; + } + + /* + * For info + * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2 + */ + + return true; +} + static int p4_validate_raw_event(struct perf_event *event) { - unsigned int v; + unsigned int v, emask; - /* user data may have out-of-bound event index */ + /* User data may have out-of-bound event index */ v = p4_config_unpack_event(event->attr.config); - if (v >= ARRAY_SIZE(p4_event_bind_map)) { - pr_warning("P4 PMU: Unknown event code: %d\n", v); + if (v >= ARRAY_SIZE(p4_event_bind_map)) + return -EINVAL; + + /* It may be unsupported: */ + if (!p4_event_match_cpu_model(v)) return -EINVAL; + + /* + * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as + * in Architectural Performance Monitoring, it means not + * on _which_ logical cpu to count but rather _when_, ie it + * depends on logical cpu state -- count event if one cpu active, + * none, both or any, so we just allow user to pass any value + * desired. + * + * In turn we always set Tx_OS/Tx_USR bits bound to logical + * cpu without their propagation to another cpu + */ + + /* + * if an event is shared accross the logical threads + * the user needs special permissions to be able to use it + */ + if (p4_event_bind_map[v].shared) { + if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) + return -EACCES; } + /* ESCR EventMask bits may be invalid */ + emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK; + if (emask & ~p4_event_bind_map[v].escr_emask) + return -EINVAL; + /* - * it may have some screwed PEBS bits + * it may have some invalid PEBS bits */ - if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) { - pr_warning("P4 PMU: PEBS are not supported yet\n"); + if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) return -EINVAL; - } + v = p4_config_unpack_metric(event->attr.config); - if (v >= ARRAY_SIZE(p4_pebs_bind_map)) { - pr_warning("P4 PMU: Unknown metric code: %d\n", v); + if (v >= ARRAY_SIZE(p4_pebs_bind_map)) return -EINVAL; - } return 0; } @@ -478,27 +728,21 @@ static int p4_hw_config(struct perf_event *event) if (event->attr.type == PERF_TYPE_RAW) { + /* + * Clear bits we reserve to be managed by kernel itself + * and never allowed from a user space + */ + event->attr.config &= P4_CONFIG_MASK; + rc = p4_validate_raw_event(event); if (rc) goto out; /* - * We don't control raw events so it's up to the caller - * to pass sane values (and we don't count the thread number - * on HT machine but allow HT-compatible specifics to be - * passed on) - * * Note that for RAW events we allow user to use P4_CCCR_RESERVED * bits since we keep additional info here (for cache events and etc) - * - * XXX: HT wide things should check perf_paranoid_cpu() && - * CAP_SYS_ADMIN */ - event->hw.config |= event->attr.config & - (p4_config_pack_escr(P4_ESCR_MASK_HT) | - p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED)); - - event->hw.config &= ~P4_CCCR_FORCE_OVF; + event->hw.config |= event->attr.config; } rc = x86_setup_perfctr(event); @@ -660,8 +904,12 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) for (idx = 0; idx < x86_pmu.num_counters; idx++) { int overflow; - if (!test_bit(idx, cpuc->active_mask)) + if (!test_bit(idx, cpuc->active_mask)) { + /* catch in-flight IRQs */ + if (__test_and_clear_bit(idx, cpuc->running)) + handled++; continue; + } event = cpuc->events[idx]; hwc = &event->hw; diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 2c77931473f..c7f64e6f537 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -31,6 +31,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) const struct cpuid_bit *cb; static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { + { X86_FEATURE_DTS, CR_EAX, 0, 0x00000006, 0 }, { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 }, { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 }, { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 }, diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 17be5ec7cbb..c375c79065f 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1023,9 +1023,9 @@ apicinterrupt ERROR_APIC_VECTOR \ apicinterrupt SPURIOUS_APIC_VECTOR \ spurious_interrupt smp_spurious_interrupt -#ifdef CONFIG_PERF_EVENTS -apicinterrupt LOCAL_PENDING_VECTOR \ - perf_pending_interrupt smp_perf_pending_interrupt +#ifdef CONFIG_IRQ_WORK +apicinterrupt IRQ_WORK_VECTOR \ + irq_work_interrupt smp_irq_work_interrupt #endif /* diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index cd37469b54e..3afb33f14d2 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -257,14 +257,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code) return mod_code_status; } - - - -static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; - static unsigned char *ftrace_nop_replace(void) { - return ftrace_nop; + return ideal_nop5; } static int @@ -338,62 +333,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func) int __init ftrace_dyn_arch_init(void *data) { - extern const unsigned char ftrace_test_p6nop[]; - extern const unsigned char ftrace_test_nop5[]; - extern const unsigned char ftrace_test_jmp[]; - int faulted = 0; - - /* - * There is no good nop for all x86 archs. - * We will default to using the P6_NOP5, but first we - * will test to make sure that the nop will actually - * work on this CPU. If it faults, we will then - * go to a lesser efficient 5 byte nop. If that fails - * we then just use a jmp as our nop. This isn't the most - * efficient nop, but we can not use a multi part nop - * since we would then risk being preempted in the middle - * of that nop, and if we enabled tracing then, it might - * cause a system crash. - * - * TODO: check the cpuid to determine the best nop. - */ - asm volatile ( - "ftrace_test_jmp:" - "jmp ftrace_test_p6nop\n" - "nop\n" - "nop\n" - "nop\n" /* 2 byte jmp + 3 bytes */ - "ftrace_test_p6nop:" - P6_NOP5 - "jmp 1f\n" - "ftrace_test_nop5:" - ".byte 0x66,0x66,0x66,0x66,0x90\n" - "1:" - ".section .fixup, \"ax\"\n" - "2: movl $1, %0\n" - " jmp ftrace_test_nop5\n" - "3: movl $2, %0\n" - " jmp 1b\n" - ".previous\n" - _ASM_EXTABLE(ftrace_test_p6nop, 2b) - _ASM_EXTABLE(ftrace_test_nop5, 3b) - : "=r"(faulted) : "0" (faulted)); - - switch (faulted) { - case 0: - pr_info("converting mcount calls to 0f 1f 44 00 00\n"); - memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE); - break; - case 1: - pr_info("converting mcount calls to 66 66 66 66 90\n"); - memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE); - break; - case 2: - pr_info("converting mcount calls to jmp . + 5\n"); - memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE); - break; - } - /* The return code is retured via data */ *(unsigned long *)data = 0; diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 410fdb3f193..7494999141b 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -506,7 +506,7 @@ static int hpet_assign_irq(struct hpet_dev *dev) { unsigned int irq; - irq = create_irq(); + irq = create_irq_nr(0, -1); if (!irq) return -EINVAL; diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index a474ec37c32..ff15c9dcc25 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -206,11 +206,27 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp) int arch_bp_generic_fields(int x86_len, int x86_type, int *gen_len, int *gen_type) { - /* Len */ - switch (x86_len) { - case X86_BREAKPOINT_LEN_X: + /* Type */ + switch (x86_type) { + case X86_BREAKPOINT_EXECUTE: + if (x86_len != X86_BREAKPOINT_LEN_X) + return -EINVAL; + + *gen_type = HW_BREAKPOINT_X; *gen_len = sizeof(long); + return 0; + case X86_BREAKPOINT_WRITE: + *gen_type = HW_BREAKPOINT_W; break; + case X86_BREAKPOINT_RW: + *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; + break; + default: + return -EINVAL; + } + + /* Len */ + switch (x86_len) { case X86_BREAKPOINT_LEN_1: *gen_len = HW_BREAKPOINT_LEN_1; break; @@ -229,21 +245,6 @@ int arch_bp_generic_fields(int x86_len, int x86_type, return -EINVAL; } - /* Type */ - switch (x86_type) { - case X86_BREAKPOINT_EXECUTE: - *gen_type = HW_BREAKPOINT_X; - break; - case X86_BREAKPOINT_WRITE: - *gen_type = HW_BREAKPOINT_W; - break; - case X86_BREAKPOINT_RW: - *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; - break; - default: - return -EINVAL; - } - return 0; } @@ -316,9 +317,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) ret = -EINVAL; switch (info->len) { - case X86_BREAKPOINT_LEN_X: - align = sizeof(long) -1; - break; case X86_BREAKPOINT_LEN_1: align = 0; break; diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 91fd0c70a18..44edb03fc9e 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -67,10 +67,10 @@ static int show_other_interrupts(struct seq_file *p, int prec) for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); seq_printf(p, " Performance monitoring interrupts\n"); - seq_printf(p, "%*s: ", prec, "PND"); + seq_printf(p, "%*s: ", prec, "IWI"); for_each_online_cpu(j) - seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); - seq_printf(p, " Performance pending work\n"); + seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs); + seq_printf(p, " IRQ work interrupts\n"); #endif if (x86_platform_ipi_callback) { seq_printf(p, "%*s: ", prec, "PLT"); @@ -185,7 +185,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) sum += irq_stats(cpu)->apic_timer_irqs; sum += irq_stats(cpu)->irq_spurious_count; sum += irq_stats(cpu)->apic_perf_irqs; - sum += irq_stats(cpu)->apic_pending_irqs; + sum += irq_stats(cpu)->apic_irq_work_irqs; #endif if (x86_platform_ipi_callback) sum += irq_stats(cpu)->x86_platform_ipis; diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c new file mode 100644 index 00000000000..ca8f703a1e7 --- /dev/null +++ b/arch/x86/kernel/irq_work.c @@ -0,0 +1,30 @@ +/* + * x86 specific code for irq_work + * + * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> + */ + +#include <linux/kernel.h> +#include <linux/irq_work.h> +#include <linux/hardirq.h> +#include <asm/apic.h> + +void smp_irq_work_interrupt(struct pt_regs *regs) +{ + irq_enter(); + ack_APIC_irq(); + inc_irq_stat(apic_irq_work_irqs); + irq_work_run(); + irq_exit(); +} + +void arch_irq_work_raise(void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + if (!cpu_has_apic) + return; + + apic->send_IPI_self(IRQ_WORK_VECTOR); + apic_wait_icr_idle(); +#endif +} diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 990ae7cfc57..713969b9266 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -224,9 +224,9 @@ static void __init apic_intr_init(void) alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); - /* Performance monitoring interrupts: */ -# ifdef CONFIG_PERF_EVENTS - alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); + /* IRQ work interrupts: */ +# ifdef CONFIG_IRQ_WORK + alloc_intr_gate(IRQ_WORK_VECTOR, irq_work_interrupt); # endif #endif diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c new file mode 100644 index 00000000000..961b6b30ba9 --- /dev/null +++ b/arch/x86/kernel/jump_label.c @@ -0,0 +1,50 @@ +/* + * jump label x86 support + * + * Copyright (C) 2009 Jason Baron <jbaron@redhat.com> + * + */ +#include <linux/jump_label.h> +#include <linux/memory.h> +#include <linux/uaccess.h> +#include <linux/module.h> +#include <linux/list.h> +#include <linux/jhash.h> +#include <linux/cpu.h> +#include <asm/kprobes.h> +#include <asm/alternative.h> + +#ifdef HAVE_JUMP_LABEL + +union jump_code_union { + char code[JUMP_LABEL_NOP_SIZE]; + struct { + char jump; + int offset; + } __attribute__((packed)); +}; + +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + union jump_code_union code; + + if (type == JUMP_LABEL_ENABLE) { + code.jump = 0xe9; + code.offset = entry->target - + (entry->code + JUMP_LABEL_NOP_SIZE); + } else + memcpy(&code, ideal_nop5, JUMP_LABEL_NOP_SIZE); + get_online_cpus(); + mutex_lock(&text_mutex); + text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE); + mutex_unlock(&text_mutex); + put_online_cpus(); +} + +void arch_jump_label_text_poke_early(jump_label_t addr) +{ + text_poke_early((void *)addr, ideal_nop5, JUMP_LABEL_NOP_SIZE); +} + +#endif diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 770ebfb349e..1cbd54c0df9 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -230,9 +230,6 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) return 0; } -/* Dummy buffers for kallsyms_lookup */ -static char __dummy_buf[KSYM_NAME_LEN]; - /* Check if paddr is at an instruction boundary */ static int __kprobes can_probe(unsigned long paddr) { @@ -241,7 +238,7 @@ static int __kprobes can_probe(unsigned long paddr) struct insn insn; kprobe_opcode_t buf[MAX_INSN_SIZE]; - if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf)) + if (!kallsyms_lookup_size_offset(paddr, NULL, &offset)) return 0; /* Decode instructions */ @@ -1129,7 +1126,7 @@ static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, *(unsigned long *)addr = val; } -void __kprobes kprobes_optinsn_template_holder(void) +static void __used __kprobes kprobes_optinsn_template_holder(void) { asm volatile ( ".global optprobe_template_entry\n" @@ -1221,7 +1218,8 @@ static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) } /* Check whether the address range is reserved */ if (ftrace_text_reserved(src, src + len - 1) || - alternatives_text_reserved(src, src + len - 1)) + alternatives_text_reserved(src, src + len - 1) || + jump_label_text_reserved(src, src + len - 1)) return -EBUSY; return len; @@ -1269,11 +1267,9 @@ static int __kprobes can_optimize(unsigned long paddr) unsigned long addr, size = 0, offset = 0; struct insn insn; kprobe_opcode_t buf[MAX_INSN_SIZE]; - /* Dummy buffers for lookup_symbol_attrs */ - static char __dummy_buf[KSYM_NAME_LEN]; /* Lookup symbol including addr */ - if (!kallsyms_lookup(paddr, &size, &offset, NULL, __dummy_buf)) + if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) return 0; /* Check there is enough space for a relative jump. */ diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index e0bc186d750..8f295609173 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -239,11 +239,13 @@ int module_finalize(const Elf_Ehdr *hdr, apply_paravirt(pseg, pseg + para->sh_size); } - return module_bug_finalize(hdr, sechdrs, me); + /* make jump label nops */ + jump_label_apply_nops(me); + + return 0; } void module_arch_cleanup(struct module *mod) { alternatives_smp_module_del(mod); - module_bug_cleanup(mod); } diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 67e5665ce42..c562207b1b3 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -607,7 +607,7 @@ static void gart_fixup_northbridges(struct sys_device *dev) * Don't enable translations just yet. That is the next * step. Restore the pre-suspend aperture settings. */ - pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, aperture_order << 1); + gart_set_size_and_enable(dev, aperture_order); pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25); } } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 77eea0362a4..df770ad99b3 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -112,6 +112,7 @@ #include <asm/numa_64.h> #endif #include <asm/mce.h> +#include <asm/alternative.h> /* * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. @@ -726,6 +727,7 @@ void __init setup_arch(char **cmdline_p) { int acpi = 0; int k8 = 0; + unsigned long flags; #ifdef CONFIG_X86_32 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); @@ -1071,6 +1073,10 @@ void __init setup_arch(char **cmdline_p) x86_init.oem.banner(); mcheck_init(); + + local_irq_save(flags); + arch_init_ideal_nop5(); + local_irq_restore(flags); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 4496315eb22..0c40d8b7241 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -104,10 +104,14 @@ int __init notsc_setup(char *str) __setup("notsc", notsc_setup); +static int no_sched_irq_time; + static int __init tsc_setup(char *str) { if (!strcmp(str, "reliable")) tsc_clocksource_reliable = 1; + if (!strncmp(str, "noirqtime", 9)) + no_sched_irq_time = 1; return 1; } @@ -801,6 +805,7 @@ void mark_tsc_unstable(char *reason) if (!tsc_unstable) { tsc_unstable = 1; sched_clock_stable = 0; + disable_sched_clock_irqtime(); printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); /* Change only the rating, when not registered */ if (clocksource_tsc.mult) @@ -929,6 +934,9 @@ void __init tsc_init(void) /* now allow native_sched_clock() to use rdtsc */ tsc_disabled = 0; + if (!no_sched_irq_time) + enable_sched_clock_irqtime(); + lpj = ((u64)tsc_khz * 1000); do_div(lpj, HZ); lpj_fine = lpj; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index bc5b9b8d4a3..8a3f9f64f86 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -766,7 +766,6 @@ static void init_vmcb(struct vcpu_svm *svm) control->iopm_base_pa = iopm_base; control->msrpm_base_pa = __pa(svm->msrpm); - control->tsc_offset = 0; control->int_ctl = V_INTR_MASKING_MASK; init_seg(&save->es); @@ -902,6 +901,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; svm->asid_generation = 0; init_vmcb(svm); + svm->vmcb->control.tsc_offset = 0-native_read_tsc(); err = fx_init(&svm->vcpu); if (err) @@ -3163,8 +3163,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) sync_lapic_to_cr8(vcpu); save_host_msrs(vcpu); - fs_selector = kvm_read_fs(); - gs_selector = kvm_read_gs(); + savesegment(fs, fs_selector); + savesegment(gs, gs_selector); ldt_selector = kvm_read_ldt(); svm->vmcb->save.cr2 = vcpu->arch.cr2; /* required for live migration with NPT */ @@ -3251,10 +3251,15 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; - kvm_load_fs(fs_selector); - kvm_load_gs(gs_selector); - kvm_load_ldt(ldt_selector); load_host_msrs(vcpu); + loadsegment(fs, fs_selector); +#ifdef CONFIG_X86_64 + load_gs_index(gs_selector); + wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); +#else + loadsegment(gs, gs_selector); +#endif + kvm_load_ldt(ldt_selector); reload_tss(vcpu); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 49b25eee25a..7bddfab1201 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -803,7 +803,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) */ vmx->host_state.ldt_sel = kvm_read_ldt(); vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel; - vmx->host_state.fs_sel = kvm_read_fs(); + savesegment(fs, vmx->host_state.fs_sel); if (!(vmx->host_state.fs_sel & 7)) { vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel); vmx->host_state.fs_reload_needed = 0; @@ -811,7 +811,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) vmcs_write16(HOST_FS_SELECTOR, 0); vmx->host_state.fs_reload_needed = 1; } - vmx->host_state.gs_sel = kvm_read_gs(); + savesegment(gs, vmx->host_state.gs_sel); if (!(vmx->host_state.gs_sel & 7)) vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel); else { @@ -841,27 +841,21 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) static void __vmx_load_host_state(struct vcpu_vmx *vmx) { - unsigned long flags; - if (!vmx->host_state.loaded) return; ++vmx->vcpu.stat.host_state_reload; vmx->host_state.loaded = 0; if (vmx->host_state.fs_reload_needed) - kvm_load_fs(vmx->host_state.fs_sel); + loadsegment(fs, vmx->host_state.fs_sel); if (vmx->host_state.gs_ldt_reload_needed) { kvm_load_ldt(vmx->host_state.ldt_sel); - /* - * If we have to reload gs, we must take care to - * preserve our gs base. - */ - local_irq_save(flags); - kvm_load_gs(vmx->host_state.gs_sel); #ifdef CONFIG_X86_64 - wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); + load_gs_index(vmx->host_state.gs_sel); + wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); +#else + loadsegment(gs, vmx->host_state.gs_sel); #endif - local_irq_restore(flags); } reload_tss(); #ifdef CONFIG_X86_64 @@ -2589,8 +2583,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ - vmcs_write16(HOST_FS_SELECTOR, kvm_read_fs()); /* 22.2.4 */ - vmcs_write16(HOST_GS_SELECTOR, kvm_read_gs()); /* 22.2.4 */ + vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */ + vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */ vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ #ifdef CONFIG_X86_64 rdmsrl(MSR_FS_BASE, a); diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 9257510b483..9d5f5584845 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -324,9 +324,8 @@ static void lguest_load_gdt(const struct desc_ptr *desc) } /* - * For a single GDT entry which changes, we do the lazy thing: alter our GDT, - * then tell the Host to reload the entire thing. This operation is so rare - * that this naive implementation is reasonable. + * For a single GDT entry which changes, we simply change our copy and + * then tell the host about it. */ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum, const void *desc, int type) @@ -338,9 +337,13 @@ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum, } /* - * OK, I lied. There are three "thread local storage" GDT entries which change + * There are three "thread local storage" GDT entries which change * on every context switch (these three entries are how glibc implements - * __thread variables). So we have a hypercall specifically for this case. + * __thread variables). As an optimization, we have a hypercall + * specifically for this case. + * + * Wouldn't it be nicer to have a general LOAD_GDT_ENTRIES hypercall + * which took a range of entries? */ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu) { diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 4c4508e8a20..a24c6cfdccc 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -251,6 +251,8 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) if (!(address >= VMALLOC_START && address < VMALLOC_END)) return -1; + WARN_ON_ONCE(in_nmi()); + /* * Synchronize this task's top level page-table * with the 'reference' page table. @@ -369,6 +371,8 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) if (!(address >= VMALLOC_START && address < VMALLOC_END)) return -1; + WARN_ON_ONCE(in_nmi()); + /* * Copy kernel mappings over when needed. This can also * happen within a race in page table update. In the later diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c index b3b531a4f8e..d87dd6d042d 100644 --- a/arch/x86/mm/kmemcheck/kmemcheck.c +++ b/arch/x86/mm/kmemcheck/kmemcheck.c @@ -631,6 +631,8 @@ bool kmemcheck_fault(struct pt_regs *regs, unsigned long address, if (!pte) return false; + WARN_ON_ONCE(in_nmi()); + if (error_code & 2) kmemcheck_access(regs, address, KMEMCHECK_WRITE); else diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index f9897f7a9ef..9c0d0d399c3 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -420,9 +420,11 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) return -1; } - for_each_node_mask(i, nodes_parsed) - e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, - nodes[i].end >> PAGE_SHIFT); + for (i = 0; i < num_node_memblks; i++) + e820_register_active_regions(memblk_nodeid[i], + node_memblk_range[i].start >> PAGE_SHIFT, + node_memblk_range[i].end >> PAGE_SHIFT); + /* for out of order entries in SRAT */ sort_node_map(); if (!nodes_cover_memory(nodes)) { diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index 3855096c59b..2d49d4e19a3 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -14,6 +14,7 @@ #include <asm/ptrace.h> #include <asm/uaccess.h> #include <asm/stacktrace.h> +#include <linux/compat.h> static void backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) @@ -48,14 +49,12 @@ static struct stacktrace_ops backtrace_ops = { .walk_stack = print_context_stack, }; -struct frame_head { - struct frame_head *bp; - unsigned long ret; -} __attribute__((packed)); - -static struct frame_head *dump_user_backtrace(struct frame_head *head) +#ifdef CONFIG_COMPAT +static struct stack_frame_ia32 * +dump_user_backtrace_32(struct stack_frame_ia32 *head) { - struct frame_head bufhead[2]; + struct stack_frame_ia32 bufhead[2]; + struct stack_frame_ia32 *fp; /* Also check accessibility of one struct frame_head beyond */ if (!access_ok(VERIFY_READ, head, sizeof(bufhead))) @@ -63,20 +62,66 @@ static struct frame_head *dump_user_backtrace(struct frame_head *head) if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead))) return NULL; - oprofile_add_trace(bufhead[0].ret); + fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame); + + oprofile_add_trace(bufhead[0].return_address); + + /* frame pointers should strictly progress back up the stack + * (towards higher addresses) */ + if (head >= fp) + return NULL; + + return fp; +} + +static inline int +x86_backtrace_32(struct pt_regs * const regs, unsigned int depth) +{ + struct stack_frame_ia32 *head; + + /* User process is 32-bit */ + if (!current || !test_thread_flag(TIF_IA32)) + return 0; + + head = (struct stack_frame_ia32 *) regs->bp; + while (depth-- && head) + head = dump_user_backtrace_32(head); + + return 1; +} + +#else +static inline int +x86_backtrace_32(struct pt_regs * const regs, unsigned int depth) +{ + return 0; +} +#endif /* CONFIG_COMPAT */ + +static struct stack_frame *dump_user_backtrace(struct stack_frame *head) +{ + struct stack_frame bufhead[2]; + + /* Also check accessibility of one struct stack_frame beyond */ + if (!access_ok(VERIFY_READ, head, sizeof(bufhead))) + return NULL; + if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead))) + return NULL; + + oprofile_add_trace(bufhead[0].return_address); /* frame pointers should strictly progress back up the stack * (towards higher addresses) */ - if (head >= bufhead[0].bp) + if (head >= bufhead[0].next_frame) return NULL; - return bufhead[0].bp; + return bufhead[0].next_frame; } void x86_backtrace(struct pt_regs * const regs, unsigned int depth) { - struct frame_head *head = (struct frame_head *)frame_pointer(regs); + struct stack_frame *head = (struct stack_frame *)frame_pointer(regs); if (!user_mode_vm(regs)) { unsigned long stack = kernel_stack_pointer(regs); @@ -86,6 +131,9 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth) return; } + if (x86_backtrace_32(regs, depth)) + return; + while (depth-- && head) head = dump_user_backtrace(head); } diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index cfe4faabb0f..bd1489c3ce0 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -671,7 +671,10 @@ static int __init ppro_init(char **cpu_type) case 14: *cpu_type = "i386/core"; break; - case 15: case 23: + case 0x0f: + case 0x16: + case 0x17: + case 0x1d: *cpu_type = "i386/core_2"; break; case 0x1a: @@ -692,9 +695,6 @@ static int __init ppro_init(char **cpu_type) return 1; } -/* in order to get sysfs right */ -static int using_nmi; - int __init op_nmi_init(struct oprofile_operations *ops) { __u8 vendor = boot_cpu_data.x86_vendor; @@ -702,8 +702,6 @@ int __init op_nmi_init(struct oprofile_operations *ops) char *cpu_type = NULL; int ret = 0; - using_nmi = 0; - if (!cpu_has_apic) return -ENODEV; @@ -787,13 +785,11 @@ int __init op_nmi_init(struct oprofile_operations *ops) if (ret) return ret; - using_nmi = 1; printk(KERN_INFO "oprofile: using NMI interrupt.\n"); return 0; } void op_nmi_exit(void) { - if (using_nmi) - exit_sysfs(); + exit_sysfs(); } diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 1a5353a753f..b2bb5aa3b05 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -489,8 +489,9 @@ static void xen_hvm_setup_cpu_clockevents(void) __init void xen_hvm_init_time_ops(void) { /* vector callback is needed otherwise we cannot receive interrupts - * on cpu > 0 */ - if (!xen_have_vector_callback && num_present_cpus() > 1) + * on cpu > 0 and at this point we don't know how many cpus are + * available */ + if (!xen_have_vector_callback) return; if (!xen_feature(XENFEAT_hvm_safe_pvclock)) { printk(KERN_INFO "Xen doesn't support pvclock on HVM," |