diff options
Diffstat (limited to 'arch')
105 files changed, 2347 insertions, 2040 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 7536b9cbb07..8090cad0dd5 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -346,7 +346,6 @@ config ARCH_GEMINI config ARCH_PRIMA2 bool "CSR SiRFSoC PRIMA2 ARM Cortex A9 Platform" select CPU_V7 - select GENERIC_TIME select NO_IOPORT select GENERIC_CLOCKEVENTS select CLKDEV_LOOKUP @@ -520,7 +519,6 @@ config ARCH_LPC32XX select ARM_AMBA select USB_ARCH_HAS_OHCI select CLKDEV_LOOKUP - select GENERIC_TIME select GENERIC_CLOCKEVENTS help Support for the NXP LPC32XX family of processors @@ -599,7 +597,6 @@ config ARCH_TEGRA bool "NVIDIA Tegra" select CLKDEV_LOOKUP select CLKSRC_MMIO - select GENERIC_TIME select GENERIC_CLOCKEVENTS select GENERIC_GPIO select HAVE_CLK @@ -727,7 +724,6 @@ config ARCH_S3C64XX select ARCH_REQUIRE_GPIOLIB select SAMSUNG_CLKSRC select SAMSUNG_IRQ_VIC_TIMER - select SAMSUNG_IRQ_UART select S3C_GPIO_TRACK select S3C_GPIO_PULL_UPDOWN select S3C_GPIO_CFG_S3C24XX @@ -915,7 +911,6 @@ config ARCH_VT8500 config ARCH_ZYNQ bool "Xilinx Zynq ARM Cortex A9 Platform" select CPU_V7 - select GENERIC_TIME select GENERIC_CLOCKEVENTS select CLKDEV_LOOKUP select ARM_GIC diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c index 666b278e56d..bdbb3f74f0f 100644 --- a/arch/arm/common/gic.c +++ b/arch/arm/common/gic.c @@ -33,7 +33,7 @@ #include <asm/mach/irq.h> #include <asm/hardware/gic.h> -static DEFINE_SPINLOCK(irq_controller_lock); +static DEFINE_RAW_SPINLOCK(irq_controller_lock); /* Address of GIC 0 CPU interface */ void __iomem *gic_cpu_base_addr __read_mostly; @@ -82,30 +82,30 @@ static void gic_mask_irq(struct irq_data *d) { u32 mask = 1 << (d->irq % 32); - spin_lock(&irq_controller_lock); + raw_spin_lock(&irq_controller_lock); writel_relaxed(mask, gic_dist_base(d) + GIC_DIST_ENABLE_CLEAR + (gic_irq(d) / 32) * 4); if (gic_arch_extn.irq_mask) gic_arch_extn.irq_mask(d); - spin_unlock(&irq_controller_lock); + raw_spin_unlock(&irq_controller_lock); } static void gic_unmask_irq(struct irq_data *d) { u32 mask = 1 << (d->irq % 32); - spin_lock(&irq_controller_lock); + raw_spin_lock(&irq_controller_lock); if (gic_arch_extn.irq_unmask) gic_arch_extn.irq_unmask(d); writel_relaxed(mask, gic_dist_base(d) + GIC_DIST_ENABLE_SET + (gic_irq(d) / 32) * 4); - spin_unlock(&irq_controller_lock); + raw_spin_unlock(&irq_controller_lock); } static void gic_eoi_irq(struct irq_data *d) { if (gic_arch_extn.irq_eoi) { - spin_lock(&irq_controller_lock); + raw_spin_lock(&irq_controller_lock); gic_arch_extn.irq_eoi(d); - spin_unlock(&irq_controller_lock); + raw_spin_unlock(&irq_controller_lock); } writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_EOI); @@ -129,7 +129,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type) if (type != IRQ_TYPE_LEVEL_HIGH && type != IRQ_TYPE_EDGE_RISING) return -EINVAL; - spin_lock(&irq_controller_lock); + raw_spin_lock(&irq_controller_lock); if (gic_arch_extn.irq_set_type) gic_arch_extn.irq_set_type(d, type); @@ -154,7 +154,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type) if (enabled) writel_relaxed(enablemask, base + GIC_DIST_ENABLE_SET + enableoff); - spin_unlock(&irq_controller_lock); + raw_spin_unlock(&irq_controller_lock); return 0; } @@ -182,10 +182,10 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, mask = 0xff << shift; bit = 1 << (cpu_logical_map(cpu) + shift); - spin_lock(&irq_controller_lock); + raw_spin_lock(&irq_controller_lock); val = readl_relaxed(reg) & ~mask; writel_relaxed(val | bit, reg); - spin_unlock(&irq_controller_lock); + raw_spin_unlock(&irq_controller_lock); return IRQ_SET_MASK_OK; } @@ -215,9 +215,9 @@ static void gic_handle_cascade_irq(unsigned int irq, struct irq_desc *desc) chained_irq_enter(chip, desc); - spin_lock(&irq_controller_lock); + raw_spin_lock(&irq_controller_lock); status = readl_relaxed(chip_data->cpu_base + GIC_CPU_INTACK); - spin_unlock(&irq_controller_lock); + raw_spin_unlock(&irq_controller_lock); gic_irq = (status & 0x3ff); if (gic_irq == 1023) diff --git a/arch/arm/include/asm/dma.h b/arch/arm/include/asm/dma.h index 628670e9d7c..69a5b0b6455 100644 --- a/arch/arm/include/asm/dma.h +++ b/arch/arm/include/asm/dma.h @@ -34,18 +34,18 @@ #define DMA_MODE_CASCADE 0xc0 #define DMA_AUTOINIT 0x10 -extern spinlock_t dma_spin_lock; +extern raw_spinlock_t dma_spin_lock; static inline unsigned long claim_dma_lock(void) { unsigned long flags; - spin_lock_irqsave(&dma_spin_lock, flags); + raw_spin_lock_irqsave(&dma_spin_lock, flags); return flags; } static inline void release_dma_lock(unsigned long flags) { - spin_unlock_irqrestore(&dma_spin_lock, flags); + raw_spin_unlock_irqrestore(&dma_spin_lock, flags); } /* Clear the 'DMA Pointer Flip Flop'. diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h index b4ffe9d5b52..14965658a92 100644 --- a/arch/arm/include/asm/mmu.h +++ b/arch/arm/include/asm/mmu.h @@ -6,7 +6,7 @@ typedef struct { #ifdef CONFIG_CPU_HAS_ASID unsigned int id; - spinlock_t id_lock; + raw_spinlock_t id_lock; #endif unsigned int kvm_seq; } mm_context_t; @@ -16,7 +16,7 @@ typedef struct { /* init_mm.context.id_lock should be initialized. */ #define INIT_MM_CONTEXT(name) \ - .context.id_lock = __SPIN_LOCK_UNLOCKED(name.context.id_lock), + .context.id_lock = __RAW_SPIN_LOCK_UNLOCKED(name.context.id_lock), #else #define ASID(mm) (0) #endif diff --git a/arch/arm/kernel/dma.c b/arch/arm/kernel/dma.c index 2c4a185f92c..7b829d9663b 100644 --- a/arch/arm/kernel/dma.c +++ b/arch/arm/kernel/dma.c @@ -23,7 +23,7 @@ #include <asm/mach/dma.h> -DEFINE_SPINLOCK(dma_spin_lock); +DEFINE_RAW_SPINLOCK(dma_spin_lock); EXPORT_SYMBOL(dma_spin_lock); static dma_t *dma_chan[MAX_DMA_CHANNELS]; diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 854ce33715f..94f34a6c861 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -566,7 +566,7 @@ static void percpu_timer_stop(void) } #endif -static DEFINE_SPINLOCK(stop_lock); +static DEFINE_RAW_SPINLOCK(stop_lock); /* * ipi_cpu_stop - handle IPI from smp_send_stop() @@ -575,10 +575,10 @@ static void ipi_cpu_stop(unsigned int cpu) { if (system_state == SYSTEM_BOOTING || system_state == SYSTEM_RUNNING) { - spin_lock(&stop_lock); + raw_spin_lock(&stop_lock); printk(KERN_CRIT "CPU%u: stopping\n", cpu); dump_stack(); - spin_unlock(&stop_lock); + raw_spin_unlock(&stop_lock); } set_cpu_online(cpu, false); diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 7f5b99eb2c5..99a57270250 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -257,7 +257,7 @@ static int __die(const char *str, int err, struct thread_info *thread, struct pt return ret; } -static DEFINE_SPINLOCK(die_lock); +static DEFINE_RAW_SPINLOCK(die_lock); /* * This function is protected against re-entrancy. @@ -269,7 +269,7 @@ void die(const char *str, struct pt_regs *regs, int err) oops_enter(); - spin_lock_irq(&die_lock); + raw_spin_lock_irq(&die_lock); console_verbose(); bust_spinlocks(1); if (!user_mode(regs)) @@ -281,7 +281,7 @@ void die(const char *str, struct pt_regs *regs, int err) bust_spinlocks(0); add_taint(TAINT_DIE); - spin_unlock_irq(&die_lock); + raw_spin_unlock_irq(&die_lock); oops_exit(); if (in_interrupt()) @@ -324,24 +324,24 @@ int is_valid_bugaddr(unsigned long pc) #endif static LIST_HEAD(undef_hook); -static DEFINE_SPINLOCK(undef_lock); +static DEFINE_RAW_SPINLOCK(undef_lock); void register_undef_hook(struct undef_hook *hook) { unsigned long flags; - spin_lock_irqsave(&undef_lock, flags); + raw_spin_lock_irqsave(&undef_lock, flags); list_add(&hook->node, &undef_hook); - spin_unlock_irqrestore(&undef_lock, flags); + raw_spin_unlock_irqrestore(&undef_lock, flags); } void unregister_undef_hook(struct undef_hook *hook) { unsigned long flags; - spin_lock_irqsave(&undef_lock, flags); + raw_spin_lock_irqsave(&undef_lock, flags); list_del(&hook->node); - spin_unlock_irqrestore(&undef_lock, flags); + raw_spin_unlock_irqrestore(&undef_lock, flags); } static int call_undef_hook(struct pt_regs *regs, unsigned int instr) @@ -350,12 +350,12 @@ static int call_undef_hook(struct pt_regs *regs, unsigned int instr) unsigned long flags; int (*fn)(struct pt_regs *regs, unsigned int instr) = NULL; - spin_lock_irqsave(&undef_lock, flags); + raw_spin_lock_irqsave(&undef_lock, flags); list_for_each_entry(hook, &undef_hook, node) if ((instr & hook->instr_mask) == hook->instr_val && (regs->ARM_cpsr & hook->cpsr_mask) == hook->cpsr_val) fn = hook->fn; - spin_unlock_irqrestore(&undef_lock, flags); + raw_spin_unlock_irqrestore(&undef_lock, flags); return fn ? fn(regs, instr) : 1; } diff --git a/arch/arm/mach-footbridge/include/mach/hardware.h b/arch/arm/mach-footbridge/include/mach/hardware.h index 15d54981674..e3d6ccac216 100644 --- a/arch/arm/mach-footbridge/include/mach/hardware.h +++ b/arch/arm/mach-footbridge/include/mach/hardware.h @@ -93,7 +93,7 @@ #define CPLD_FLASH_WR_ENABLE 1 #ifndef __ASSEMBLY__ -extern spinlock_t nw_gpio_lock; +extern raw_spinlock_t nw_gpio_lock; extern void nw_gpio_modify_op(unsigned int mask, unsigned int set); extern void nw_gpio_modify_io(unsigned int mask, unsigned int in); extern unsigned int nw_gpio_read(void); diff --git a/arch/arm/mach-footbridge/netwinder-hw.c b/arch/arm/mach-footbridge/netwinder-hw.c index 4cbc2e65ce3..0f7aeff486c 100644 --- a/arch/arm/mach-footbridge/netwinder-hw.c +++ b/arch/arm/mach-footbridge/netwinder-hw.c @@ -68,7 +68,7 @@ static inline void wb977_ww(int reg, int val) /* * This is a lock for accessing ports GP1_IO_BASE and GP2_IO_BASE */ -DEFINE_SPINLOCK(nw_gpio_lock); +DEFINE_RAW_SPINLOCK(nw_gpio_lock); EXPORT_SYMBOL(nw_gpio_lock); static unsigned int current_gpio_op; @@ -327,9 +327,9 @@ static inline void wb977_init_gpio(void) /* * Set Group1/Group2 outputs */ - spin_lock_irqsave(&nw_gpio_lock, flags); + raw_spin_lock_irqsave(&nw_gpio_lock, flags); nw_gpio_modify_op(-1, GPIO_RED_LED | GPIO_FAN); - spin_unlock_irqrestore(&nw_gpio_lock, flags); + raw_spin_unlock_irqrestore(&nw_gpio_lock, flags); } /* @@ -390,9 +390,9 @@ static void __init cpld_init(void) { unsigned long flags; - spin_lock_irqsave(&nw_gpio_lock, flags); + raw_spin_lock_irqsave(&nw_gpio_lock, flags); nw_cpld_modify(-1, CPLD_UNMUTE | CPLD_7111_DISABLE); - spin_unlock_irqrestore(&nw_gpio_lock, flags); + raw_spin_unlock_irqrestore(&nw_gpio_lock, flags); } static unsigned char rwa_unlock[] __initdata = @@ -616,9 +616,9 @@ static int __init nw_hw_init(void) cpld_init(); rwa010_init(); - spin_lock_irqsave(&nw_gpio_lock, flags); + raw_spin_lock_irqsave(&nw_gpio_lock, flags); nw_gpio_modify_op(GPIO_RED_LED|GPIO_GREEN_LED, DEFAULT_LEDS); - spin_unlock_irqrestore(&nw_gpio_lock, flags); + raw_spin_unlock_irqrestore(&nw_gpio_lock, flags); } return 0; } diff --git a/arch/arm/mach-footbridge/netwinder-leds.c b/arch/arm/mach-footbridge/netwinder-leds.c index 00269fe0be8..e57102e871f 100644 --- a/arch/arm/mach-footbridge/netwinder-leds.c +++ b/arch/arm/mach-footbridge/netwinder-leds.c @@ -31,13 +31,13 @@ static char led_state; static char hw_led_state; -static DEFINE_SPINLOCK(leds_lock); +static DEFINE_RAW_SPINLOCK(leds_lock); static void netwinder_leds_event(led_event_t evt) { unsigned long flags; - spin_lock_irqsave(&leds_lock, flags); + raw_spin_lock_irqsave(&leds_lock, flags); switch (evt) { case led_start: @@ -117,12 +117,12 @@ static void netwinder_leds_event(led_event_t evt) break; } - spin_unlock_irqrestore(&leds_lock, flags); + raw_spin_unlock_irqrestore(&leds_lock, flags); if (led_state & LED_STATE_ENABLED) { - spin_lock_irqsave(&nw_gpio_lock, flags); + raw_spin_lock_irqsave(&nw_gpio_lock, flags); nw_gpio_modify_op(GPIO_RED_LED | GPIO_GREEN_LED, hw_led_state); - spin_unlock_irqrestore(&nw_gpio_lock, flags); + raw_spin_unlock_irqrestore(&nw_gpio_lock, flags); } } diff --git a/arch/arm/mach-integrator/core.c b/arch/arm/mach-integrator/core.c index 82ebc8d772d..4b38e13667a 100644 --- a/arch/arm/mach-integrator/core.c +++ b/arch/arm/mach-integrator/core.c @@ -209,7 +209,7 @@ static struct amba_pl010_data integrator_uart_data = { #define CM_CTRL IO_ADDRESS(INTEGRATOR_HDR_CTRL) -static DEFINE_SPINLOCK(cm_lock); +static DEFINE_RAW_SPINLOCK(cm_lock); /** * cm_control - update the CM_CTRL register. @@ -221,10 +221,10 @@ void cm_control(u32 mask, u32 set) unsigned long flags; u32 val; - spin_lock_irqsave(&cm_lock, flags); + raw_spin_lock_irqsave(&cm_lock, flags); val = readl(CM_CTRL) & ~mask; writel(val | set, CM_CTRL); - spin_unlock_irqrestore(&cm_lock, flags); + raw_spin_unlock_irqrestore(&cm_lock, flags); } EXPORT_SYMBOL(cm_control); diff --git a/arch/arm/mach-integrator/pci_v3.c b/arch/arm/mach-integrator/pci_v3.c index 11b86e5b71c..b4d8f8b8a08 100644 --- a/arch/arm/mach-integrator/pci_v3.c +++ b/arch/arm/mach-integrator/pci_v3.c @@ -163,7 +163,7 @@ * 7:2 register number * */ -static DEFINE_SPINLOCK(v3_lock); +static DEFINE_RAW_SPINLOCK(v3_lock); #define PCI_BUS_NONMEM_START 0x00000000 #define PCI_BUS_NONMEM_SIZE SZ_256M @@ -284,7 +284,7 @@ static int v3_read_config(struct pci_bus *bus, unsigned int devfn, int where, unsigned long flags; u32 v; - spin_lock_irqsave(&v3_lock, flags); + raw_spin_lock_irqsave(&v3_lock, flags); addr = v3_open_config_window(bus, devfn, where); switch (size) { @@ -302,7 +302,7 @@ static int v3_read_config(struct pci_bus *bus, unsigned int devfn, int where, } v3_close_config_window(); - spin_unlock_irqrestore(&v3_lock, flags); + raw_spin_unlock_irqrestore(&v3_lock, flags); *val = v; return PCIBIOS_SUCCESSFUL; @@ -314,7 +314,7 @@ static int v3_write_config(struct pci_bus *bus, unsigned int devfn, int where, unsigned long addr; unsigned long flags; - spin_lock_irqsave(&v3_lock, flags); + raw_spin_lock_irqsave(&v3_lock, flags); addr = v3_open_config_window(bus, devfn, where); switch (size) { @@ -335,7 +335,7 @@ static int v3_write_config(struct pci_bus *bus, unsigned int devfn, int where, } v3_close_config_window(); - spin_unlock_irqrestore(&v3_lock, flags); + raw_spin_unlock_irqrestore(&v3_lock, flags); return PCIBIOS_SUCCESSFUL; } @@ -513,7 +513,7 @@ void __init pci_v3_preinit(void) hook_fault_code(8, v3_pci_fault, SIGBUS, 0, "external abort on non-linefetch"); hook_fault_code(10, v3_pci_fault, SIGBUS, 0, "external abort on non-linefetch"); - spin_lock_irqsave(&v3_lock, flags); + raw_spin_lock_irqsave(&v3_lock, flags); /* * Unlock V3 registers, but only if they were previously locked. @@ -586,7 +586,7 @@ void __init pci_v3_preinit(void) printk(KERN_ERR "PCI: unable to grab PCI error " "interrupt: %d\n", ret); - spin_unlock_irqrestore(&v3_lock, flags); + raw_spin_unlock_irqrestore(&v3_lock, flags); } void __init pci_v3_postinit(void) diff --git a/arch/arm/mach-ixp4xx/common-pci.c b/arch/arm/mach-ixp4xx/common-pci.c index 85245e48099..f72a3a893c4 100644 --- a/arch/arm/mach-ixp4xx/common-pci.c +++ b/arch/arm/mach-ixp4xx/common-pci.c @@ -54,7 +54,7 @@ unsigned long ixp4xx_pci_reg_base = 0; * these transactions are atomic or we will end up * with corrupt data on the bus or in a driver. */ -static DEFINE_SPINLOCK(ixp4xx_pci_lock); +static DEFINE_RAW_SPINLOCK(ixp4xx_pci_lock); /* * Read from PCI config space @@ -62,10 +62,10 @@ static DEFINE_SPINLOCK(ixp4xx_pci_lock); static void crp_read(u32 ad_cbe, u32 *data) { unsigned long flags; - spin_lock_irqsave(&ixp4xx_pci_lock, flags); + raw_spin_lock_irqsave(&ixp4xx_pci_lock, flags); *PCI_CRP_AD_CBE = ad_cbe; *data = *PCI_CRP_RDATA; - spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); + raw_spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); } /* @@ -74,10 +74,10 @@ static void crp_read(u32 ad_cbe, u32 *data) static void crp_write(u32 ad_cbe, u32 data) { unsigned long flags; - spin_lock_irqsave(&ixp4xx_pci_lock, flags); + raw_spin_lock_irqsave(&ixp4xx_pci_lock, flags); *PCI_CRP_AD_CBE = CRP_AD_CBE_WRITE | ad_cbe; *PCI_CRP_WDATA = data; - spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); + raw_spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); } static inline int check_master_abort(void) @@ -101,7 +101,7 @@ int ixp4xx_pci_read_errata(u32 addr, u32 cmd, u32* data) int retval = 0; int i; - spin_lock_irqsave(&ixp4xx_pci_lock, flags); + raw_spin_lock_irqsave(&ixp4xx_pci_lock, flags); *PCI_NP_AD = addr; @@ -118,7 +118,7 @@ int ixp4xx_pci_read_errata(u32 addr, u32 cmd, u32* data) if(check_master_abort()) retval = 1; - spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); + raw_spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); return retval; } @@ -127,7 +127,7 @@ int ixp4xx_pci_read_no_errata(u32 addr, u32 cmd, u32* data) unsigned long flags; int retval = 0; - spin_lock_irqsave(&ixp4xx_pci_lock, flags); + raw_spin_lock_irqsave(&ixp4xx_pci_lock, flags); *PCI_NP_AD = addr; @@ -140,7 +140,7 @@ int ixp4xx_pci_read_no_errata(u32 addr, u32 cmd, u32* data) if(check_master_abort()) retval = 1; - spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); + raw_spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); return retval; } @@ -149,7 +149,7 @@ int ixp4xx_pci_write(u32 addr, u32 cmd, u32 data) unsigned long flags; int retval = 0; - spin_lock_irqsave(&ixp4xx_pci_lock, flags); + raw_spin_lock_irqsave(&ixp4xx_pci_lock, flags); *PCI_NP_AD = addr; @@ -162,7 +162,7 @@ int ixp4xx_pci_write(u32 addr, u32 cmd, u32 data) if(check_master_abort()) retval = 1; - spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); + raw_spin_unlock_irqrestore(&ixp4xx_pci_lock, flags); return retval; } diff --git a/arch/arm/mach-s3c64xx/dev-uart.c b/arch/arm/mach-s3c64xx/dev-uart.c index f797f748b99..c681b99eda0 100644 --- a/arch/arm/mach-s3c64xx/dev-uart.c +++ b/arch/arm/mach-s3c64xx/dev-uart.c @@ -37,21 +37,10 @@ static struct resource s3c64xx_uart0_resource[] = { .flags = IORESOURCE_MEM, }, [1] = { - .start = IRQ_S3CUART_RX0, - .end = IRQ_S3CUART_RX0, + .start = IRQ_UART0, + .end = IRQ_UART0, .flags = IORESOURCE_IRQ, }, - [2] = { - .start = IRQ_S3CUART_TX0, - .end = IRQ_S3CUART_TX0, - .flags = IORESOURCE_IRQ, - - }, - [3] = { - .start = IRQ_S3CUART_ERR0, - .end = IRQ_S3CUART_ERR0, - .flags = IORESOURCE_IRQ, - } }; static struct resource s3c64xx_uart1_resource[] = { @@ -61,19 +50,8 @@ static struct resource s3c64xx_uart1_resource[] = { .flags = IORESOURCE_MEM, }, [1] = { - .start = IRQ_S3CUART_RX1, - .end = IRQ_S3CUART_RX1, - .flags = IORESOURCE_IRQ, - }, - [2] = { - .start = IRQ_S3CUART_TX1, - .end = IRQ_S3CUART_TX1, - .flags = IORESOURCE_IRQ, - - }, - [3] = { - .start = IRQ_S3CUART_ERR1, - .end = IRQ_S3CUART_ERR1, + .start = IRQ_UART1, + .end = IRQ_UART1, .flags = IORESOURCE_IRQ, }, }; @@ -85,19 +63,8 @@ static struct resource s3c6xx_uart2_resource[] = { .flags = IORESOURCE_MEM, }, [1] = { - .start = IRQ_S3CUART_RX2, - .end = IRQ_S3CUART_RX2, - .flags = IORESOURCE_IRQ, - }, - [2] = { - .start = IRQ_S3CUART_TX2, - .end = IRQ_S3CUART_TX2, - .flags = IORESOURCE_IRQ, - - }, - [3] = { - .start = IRQ_S3CUART_ERR2, - .end = IRQ_S3CUART_ERR2, + .start = IRQ_UART2, + .end = IRQ_UART2, .flags = IORESOURCE_IRQ, }, }; @@ -109,19 +76,8 @@ static struct resource s3c64xx_uart3_resource[] = { .flags = IORESOURCE_MEM, }, [1] = { - .start = IRQ_S3CUART_RX3, - .end = IRQ_S3CUART_RX3, - .flags = IORESOURCE_IRQ, - }, - [2] = { - .start = IRQ_S3CUART_TX3, - .end = IRQ_S3CUART_TX3, - .flags = IORESOURCE_IRQ, - - }, - [3] = { - .start = IRQ_S3CUART_ERR3, - .end = IRQ_S3CUART_ERR3, + .start = IRQ_UART3, + .end = IRQ_UART3, .flags = IORESOURCE_IRQ, }, }; diff --git a/arch/arm/mach-s3c64xx/include/mach/irqs.h b/arch/arm/mach-s3c64xx/include/mach/irqs.h index c026f67a80d..443f85b3c20 100644 --- a/arch/arm/mach-s3c64xx/include/mach/irqs.h +++ b/arch/arm/mach-s3c64xx/include/mach/irqs.h @@ -27,36 +27,6 @@ #define IRQ_VIC0_BASE S3C_IRQ(0) #define IRQ_VIC1_BASE S3C_IRQ(32) -/* UART interrupts, each UART has 4 intterupts per channel so - * use the space between the ISA and S3C main interrupts. Note, these - * are not in the same order as the S3C24XX series! */ - -#define IRQ_S3CUART_BASE0 (16) -#define IRQ_S3CUART_BASE1 (20) -#define IRQ_S3CUART_BASE2 (24) -#define IRQ_S3CUART_BASE3 (28) - -#define UART_IRQ_RXD (0) -#define UART_IRQ_ERR (1) -#define UART_IRQ_TXD (2) -#define UART_IRQ_MODEM (3) - -#define IRQ_S3CUART_RX0 (IRQ_S3CUART_BASE0 + UART_IRQ_RXD) -#define IRQ_S3CUART_TX0 (IRQ_S3CUART_BASE0 + UART_IRQ_TXD) -#define IRQ_S3CUART_ERR0 (IRQ_S3CUART_BASE0 + UART_IRQ_ERR) - -#define IRQ_S3CUART_RX1 (IRQ_S3CUART_BASE1 + UART_IRQ_RXD) -#define IRQ_S3CUART_TX1 (IRQ_S3CUART_BASE1 + UART_IRQ_TXD) -#define IRQ_S3CUART_ERR1 (IRQ_S3CUART_BASE1 + UART_IRQ_ERR) - -#define IRQ_S3CUART_RX2 (IRQ_S3CUART_BASE2 + UART_IRQ_RXD) -#define IRQ_S3CUART_TX2 (IRQ_S3CUART_BASE2 + UART_IRQ_TXD) -#define IRQ_S3CUART_ERR2 (IRQ_S3CUART_BASE2 + UART_IRQ_ERR) - -#define IRQ_S3CUART_RX3 (IRQ_S3CUART_BASE3 + UART_IRQ_RXD) -#define IRQ_S3CUART_TX3 (IRQ_S3CUART_BASE3 + UART_IRQ_TXD) -#define IRQ_S3CUART_ERR3 (IRQ_S3CUART_BASE3 + UART_IRQ_ERR) - /* VIC based IRQs */ #define S3C64XX_IRQ_VIC0(x) (IRQ_VIC0_BASE + (x)) diff --git a/arch/arm/mach-s3c64xx/irq.c b/arch/arm/mach-s3c64xx/irq.c index 75d9a0e4919..b07357e9495 100644 --- a/arch/arm/mach-s3c64xx/irq.c +++ b/arch/arm/mach-s3c64xx/irq.c @@ -25,29 +25,6 @@ #include <plat/irq-uart.h> #include <plat/cpu.h> -static struct s3c_uart_irq uart_irqs[] = { - [0] = { - .regs = S3C_VA_UART0, - .base_irq = IRQ_S3CUART_BASE0, - .parent_irq = IRQ_UART0, - }, - [1] = { - .regs = S3C_VA_UART1, - .base_irq = IRQ_S3CUART_BASE1, - .parent_irq = IRQ_UART1, - }, - [2] = { - .regs = S3C_VA_UART2, - .base_irq = IRQ_S3CUART_BASE2, - .parent_irq = IRQ_UART2, - }, - [3] = { - .regs = S3C_VA_UART3, - .base_irq = IRQ_S3CUART_BASE3, - .parent_irq = IRQ_UART3, - }, -}; - /* setup the sources the vic should advertise resume for, even though it * is not doing the wake (set_irq_wake needs to be valid) */ #define IRQ_VIC0_RESUME (1 << (IRQ_RTC_TIC - IRQ_VIC0_BASE)) @@ -67,6 +44,4 @@ void __init s3c64xx_init_irq(u32 vic0_valid, u32 vic1_valid) /* add the timer sub-irqs */ s3c_init_vic_timer_irq(5, IRQ_TIMER0); - - s3c_init_uart_irqs(uart_irqs, ARRAY_SIZE(uart_irqs)); } diff --git a/arch/arm/mach-shark/leds.c b/arch/arm/mach-shark/leds.c index c9e32de4adf..ccd49189bbd 100644 --- a/arch/arm/mach-shark/leds.c +++ b/arch/arm/mach-shark/leds.c @@ -36,7 +36,7 @@ static char led_state; static short hw_led_state; static short saved_state; -static DEFINE_SPINLOCK(leds_lock); +static DEFINE_RAW_SPINLOCK(leds_lock); short sequoia_read(int addr) { outw(addr,0x24); @@ -52,7 +52,7 @@ static void sequoia_leds_event(led_event_t evt) { unsigned long flags; - spin_lock_irqsave(&leds_lock, flags); + raw_spin_lock_irqsave(&leds_lock, flags); hw_led_state = sequoia_read(0x09); @@ -144,7 +144,7 @@ static void sequoia_leds_event(led_event_t evt) if (led_state & LED_STATE_ENABLED) sequoia_write(hw_led_state,0x09); - spin_unlock_irqrestore(&leds_lock, flags); + raw_spin_unlock_irqrestore(&leds_lock, flags); } static int __init leds_init(void) diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 3f9b9980478..8ac9e9f8479 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -29,7 +29,7 @@ #define CACHE_LINE_SIZE 32 static void __iomem *l2x0_base; -static DEFINE_SPINLOCK(l2x0_lock); +static DEFINE_RAW_SPINLOCK(l2x0_lock); static uint32_t l2x0_way_mask; /* Bitmask of active ways */ static uint32_t l2x0_size; @@ -126,9 +126,9 @@ static void l2x0_cache_sync(void) { unsigned long flags; - spin_lock_irqsave(&l2x0_lock, flags); + raw_spin_lock_irqsave(&l2x0_lock, flags); cache_sync(); - spin_unlock_irqrestore(&l2x0_lock, flags); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); } static void __l2x0_flush_all(void) @@ -145,9 +145,9 @@ static void l2x0_flush_all(void) unsigned long flags; /* clean all ways */ - spin_lock_irqsave(&l2x0_lock, flags); + raw_spin_lock_irqsave(&l2x0_lock, flags); __l2x0_flush_all(); - spin_unlock_irqrestore(&l2x0_lock, flags); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); } static void l2x0_clean_all(void) @@ -155,11 +155,11 @@ static void l2x0_clean_all(void) unsigned long flags; /* clean all ways */ - spin_lock_irqsave(&l2x0_lock, flags); + raw_spin_lock_irqsave(&l2x0_lock, flags); writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_WAY); cache_wait_way(l2x0_base + L2X0_CLEAN_WAY, l2x0_way_mask); cache_sync(); - spin_unlock_irqrestore(&l2x0_lock, flags); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); } static void l2x0_inv_all(void) @@ -167,13 +167,13 @@ static void l2x0_inv_all(void) unsigned long flags; /* invalidate all ways */ - spin_lock_irqsave(&l2x0_lock, flags); + raw_spin_lock_irqsave(&l2x0_lock, flags); /* Invalidating when L2 is enabled is a nono */ BUG_ON(readl(l2x0_base + L2X0_CTRL) & 1); writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_INV_WAY); cache_wait_way(l2x0_base + L2X0_INV_WAY, l2x0_way_mask); cache_sync(); - spin_unlock_irqrestore(&l2x0_lock, flags); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); } static void l2x0_inv_range(unsigned long start, unsigned long end) @@ -181,7 +181,7 @@ static void l2x0_inv_range(unsigned long start, unsigned long end) void __iomem *base = l2x0_base; unsigned long flags; - spin_lock_irqsave(&l2x0_lock, flags); + raw_spin_lock_irqsave(&l2x0_lock, flags); if (start & (CACHE_LINE_SIZE - 1)) { start &= ~(CACHE_LINE_SIZE - 1); debug_writel(0x03); @@ -206,13 +206,13 @@ static void l2x0_inv_range(unsigned long start, unsigned long end) } if (blk_end < end) { - spin_unlock_irqrestore(&l2x0_lock, flags); - spin_lock_irqsave(&l2x0_lock, flags); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); + raw_spin_lock_irqsave(&l2x0_lock, flags); } } cache_wait(base + L2X0_INV_LINE_PA, 1); cache_sync(); - spin_unlock_irqrestore(&l2x0_lock, flags); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); } static void l2x0_clean_range(unsigned long start, unsigned long end) @@ -225,7 +225,7 @@ static void l2x0_clean_range(unsigned long start, unsigned long end) return; } - spin_lock_irqsave(&l2x0_lock, flags); + raw_spin_lock_irqsave(&l2x0_lock, flags); start &= ~(CACHE_LINE_SIZE - 1); while (start < end) { unsigned long blk_end = start + min(end - start, 4096UL); @@ -236,13 +236,13 @@ static void l2x0_clean_range(unsigned long start, unsigned long end) } if (blk_end < end) { - spin_unlock_irqrestore(&l2x0_lock, flags); - spin_lock_irqsave(&l2x0_lock, flags); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); + raw_spin_lock_irqsave(&l2x0_lock, flags); } } cache_wait(base + L2X0_CLEAN_LINE_PA, 1); cache_sync(); - spin_unlock_irqrestore(&l2x0_lock, flags); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); } static void l2x0_flush_range(unsigned long start, unsigned long end) @@ -255,7 +255,7 @@ static void l2x0_flush_range(unsigned long start, unsigned long end) return; } - spin_lock_irqsave(&l2x0_lock, flags); + raw_spin_lock_irqsave(&l2x0_lock, flags); start &= ~(CACHE_LINE_SIZE - 1); while (start < end) { unsigned long blk_end = start + min(end - start, 4096UL); @@ -268,24 +268,24 @@ static void l2x0_flush_range(unsigned long start, unsigned long end) debug_writel(0x00); if (blk_end < end) { - spin_unlock_irqrestore(&l2x0_lock, flags); - spin_lock_irqsave(&l2x0_lock, flags); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); + raw_spin_lock_irqsave(&l2x0_lock, flags); } } cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1); cache_sync(); - spin_unlock_irqrestore(&l2x0_lock, flags); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); } static void l2x0_disable(void) { unsigned long flags; - spin_lock_irqsave(&l2x0_lock, flags); + raw_spin_lock_irqsave(&l2x0_lock, flags); __l2x0_flush_all(); writel_relaxed(0, l2x0_base + L2X0_CTRL); dsb(); - spin_unlock_irqrestore(&l2x0_lock, flags); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); } static void l2x0_unlock(__u32 cache_id) diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index b0ee9ba3cfa..93aac068da9 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -16,7 +16,7 @@ #include <asm/mmu_context.h> #include <asm/tlbflush.h> -static DEFINE_SPINLOCK(cpu_asid_lock); +static DEFINE_RAW_SPINLOCK(cpu_asid_lock); unsigned int cpu_last_asid = ASID_FIRST_VERSION; #ifdef CONFIG_SMP DEFINE_PER_CPU(struct mm_struct *, current_mm); @@ -31,7 +31,7 @@ DEFINE_PER_CPU(struct mm_struct *, current_mm); void __init_new_context(struct task_struct *tsk, struct mm_struct *mm) { mm->context.id = 0; - spin_lock_init(&mm->context.id_lock); + raw_spin_lock_init(&mm->context.id_lock); } static void flush_context(void) @@ -58,7 +58,7 @@ static void set_mm_context(struct mm_struct *mm, unsigned int asid) * the broadcast. This function is also called via IPI so the * mm->context.id_lock has to be IRQ-safe. */ - spin_lock_irqsave(&mm->context.id_lock, flags); + raw_spin_lock_irqsave(&mm->context.id_lock, flags); if (likely((mm->context.id ^ cpu_last_asid) >> ASID_BITS)) { /* * Old version of ASID found. Set the new one and @@ -67,7 +67,7 @@ static void set_mm_context(struct mm_struct *mm, unsigned int asid) mm->context.id = asid; cpumask_clear(mm_cpumask(mm)); } - spin_unlock_irqrestore(&mm->context.id_lock, flags); + raw_spin_unlock_irqrestore(&mm->context.id_lock, flags); /* * Set the mm_cpumask(mm) bit for the current CPU. @@ -117,7 +117,7 @@ void __new_context(struct mm_struct *mm) { unsigned int asid; - spin_lock(&cpu_asid_lock); + raw_spin_lock(&cpu_asid_lock); #ifdef CONFIG_SMP /* * Check the ASID again, in case the change was broadcast from @@ -125,7 +125,7 @@ void __new_context(struct mm_struct *mm) */ if (unlikely(((mm->context.id ^ cpu_last_asid) >> ASID_BITS) == 0)) { cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); - spin_unlock(&cpu_asid_lock); + raw_spin_unlock(&cpu_asid_lock); return; } #endif @@ -153,5 +153,5 @@ void __new_context(struct mm_struct *mm) } set_mm_context(mm, asid); - spin_unlock(&cpu_asid_lock); + raw_spin_unlock(&cpu_asid_lock); } diff --git a/arch/arm/mm/copypage-v4mc.c b/arch/arm/mm/copypage-v4mc.c index b8061519ce7..7d0a8c23034 100644 --- a/arch/arm/mm/copypage-v4mc.c +++ b/arch/arm/mm/copypage-v4mc.c @@ -30,7 +30,7 @@ #define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \ L_PTE_MT_MINICACHE) -static DEFINE_SPINLOCK(minicache_lock); +static DEFINE_RAW_SPINLOCK(minicache_lock); /* * ARMv4 mini-dcache optimised copy_user_highpage @@ -76,14 +76,14 @@ void v4_mc_copy_user_highpage(struct page *to, struct page *from, if (!test_and_set_bit(PG_dcache_clean, &from->flags)) __flush_dcache_page(page_mapping(from), from); - spin_lock(&minicache_lock); + raw_spin_lock(&minicache_lock); set_pte_ext(TOP_PTE(0xffff8000), pfn_pte(page_to_pfn(from), minicache_pgprot), 0); flush_tlb_kernel_page(0xffff8000); mc_copy_user_page((void *)0xffff8000, kto); - spin_unlock(&minicache_lock); + raw_spin_unlock(&minicache_lock); kunmap_atomic(kto, KM_USER1); } diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c index 63cca009713..3d9a1552cef 100644 --- a/arch/arm/mm/copypage-v6.c +++ b/arch/arm/mm/copypage-v6.c @@ -27,7 +27,7 @@ #define from_address (0xffff8000) #define to_address (0xffffc000) -static DEFINE_SPINLOCK(v6_lock); +static DEFINE_RAW_SPINLOCK(v6_lock); /* * Copy the user page. No aliasing to deal with so we can just @@ -88,7 +88,7 @@ static void v6_copy_user_highpage_aliasing(struct page *to, * Now copy the page using the same cache colour as the * pages ultimate destination. */ - spin_lock(&v6_lock); + raw_spin_lock(&v6_lock); set_pte_ext(TOP_PTE(from_address) + offset, pfn_pte(page_to_pfn(from), PAGE_KERNEL), 0); set_pte_ext(TOP_PTE(to_address) + offset, pfn_pte(page_to_pfn(to), PAGE_KERNEL), 0); @@ -101,7 +101,7 @@ static void v6_copy_user_highpage_aliasing(struct page *to, copy_page((void *)kto, (void *)kfrom); - spin_unlock(&v6_lock); + raw_spin_unlock(&v6_lock); } /* @@ -121,13 +121,13 @@ static void v6_clear_user_highpage_aliasing(struct page *page, unsigned long vad * Now clear the page using the same cache colour as * the pages ultimate destination. */ - spin_lock(&v6_lock); + raw_spin_lock(&v6_lock); set_pte_ext(TOP_PTE(to_address) + offset, pfn_pte(page_to_pfn(page), PAGE_KERNEL), 0); flush_tlb_kernel_page(to); clear_page((void *)to); - spin_unlock(&v6_lock); + raw_spin_unlock(&v6_lock); } struct cpu_user_fns v6_user_fns __initdata = { diff --git a/arch/arm/mm/copypage-xscale.c b/arch/arm/mm/copypage-xscale.c index 649bbcd325b..610c24ced31 100644 --- a/arch/arm/mm/copypage-xscale.c +++ b/arch/arm/mm/copypage-xscale.c @@ -32,7 +32,7 @@ #define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \ L_PTE_MT_MINICACHE) -static DEFINE_SPINLOCK(minicache_lock); +static DEFINE_RAW_SPINLOCK(minicache_lock); /* * XScale mini-dcache optimised copy_user_highpage @@ -98,14 +98,14 @@ void xscale_mc_copy_user_highpage(struct page *to, struct page *from, if (!test_and_set_bit(PG_dcache_clean, &from->flags)) __flush_dcache_page(page_mapping(from), from); - spin_lock(&minicache_lock); + raw_spin_lock(&minicache_lock); set_pte_ext(TOP_PTE(COPYPAGE_MINICACHE), pfn_pte(page_to_pfn(from), minicache_pgprot), 0); flush_tlb_kernel_page(COPYPAGE_MINICACHE); mc_copy_user_page((void *)COPYPAGE_MINICACHE, kto); - spin_unlock(&minicache_lock); + raw_spin_unlock(&minicache_lock); kunmap_atomic(kto, KM_USER1); } diff --git a/arch/arm/plat-s5p/Kconfig b/arch/arm/plat-s5p/Kconfig index 9843c954c04..9a197e55f66 100644 --- a/arch/arm/plat-s5p/Kconfig +++ b/arch/arm/plat-s5p/Kconfig @@ -22,7 +22,6 @@ config PLAT_S5P select PLAT_SAMSUNG select SAMSUNG_CLKSRC select SAMSUNG_IRQ_VIC_TIMER - select SAMSUNG_IRQ_UART help Base platform code for Samsung's S5P series SoC. diff --git a/arch/arm/plat-s5p/dev-uart.c b/arch/arm/plat-s5p/dev-uart.c index afaf87fdb93..c9308db3618 100644 --- a/arch/arm/plat-s5p/dev-uart.c +++ b/arch/arm/plat-s5p/dev-uart.c @@ -32,20 +32,10 @@ static struct resource s5p_uart0_resource[] = { .flags = IORESOURCE_MEM, }, [1] = { - .start = IRQ_S5P_UART_RX0, - .end = IRQ_S5P_UART_RX0, + .start = IRQ_UART0, + .end = IRQ_UART0, .flags = IORESOURCE_IRQ, }, - [2] = { - .start = IRQ_S5P_UART_TX0, - .end = IRQ_S5P_UART_TX0, - .flags = IORESOURCE_IRQ, - }, - [3] = { - .start = IRQ_S5P_UART_ERR0, - .end = IRQ_S5P_UART_ERR0, - .flags = IORESOURCE_IRQ, - } }; static struct resource s5p_uart1_resource[] = { @@ -55,18 +45,8 @@ static struct resource s5p_uart1_resource[] = { .flags = IORESOURCE_MEM, }, [1] = { - .start = IRQ_S5P_UART_RX1, - .end = IRQ_S5P_UART_RX1, - .flags = IORESOURCE_IRQ, - }, - [2] = { - .start = IRQ_S5P_UART_TX1, - .end = IRQ_S5P_UART_TX1, - .flags = IORESOURCE_IRQ, - }, - [3] = { - .start = IRQ_S5P_UART_ERR1, - .end = IRQ_S5P_UART_ERR1, + .start = IRQ_UART1, + .end = IRQ_UART1, .flags = IORESOURCE_IRQ, }, }; @@ -78,18 +58,8 @@ static struct resource s5p_uart2_resource[] = { .flags = IORESOURCE_MEM, }, [1] = { - .start = IRQ_S5P_UART_RX2, - .end = IRQ_S5P_UART_RX2, - .flags = IORESOURCE_IRQ, - }, - [2] = { - .start = IRQ_S5P_UART_TX2, - .end = IRQ_S5P_UART_TX2, - .flags = IORESOURCE_IRQ, - }, - [3] = { - .start = IRQ_S5P_UART_ERR2, - .end = IRQ_S5P_UART_ERR2, + .start = IRQ_UART2, + .end = IRQ_UART2, .flags = IORESOURCE_IRQ, }, }; @@ -102,18 +72,8 @@ static struct resource s5p_uart3_resource[] = { .flags = IORESOURCE_MEM, }, [1] = { - .start = IRQ_S5P_UART_RX3, - .end = IRQ_S5P_UART_RX3, - .flags = IORESOURCE_IRQ, - }, - [2] = { - .start = IRQ_S5P_UART_TX3, - .end = IRQ_S5P_UART_TX3, - .flags = IORESOURCE_IRQ, - }, - [3] = { - .start = IRQ_S5P_UART_ERR3, - .end = IRQ_S5P_UART_ERR3, + .start = IRQ_UART3, + .end = IRQ_UART3, .flags = IORESOURCE_IRQ, }, #endif @@ -127,18 +87,8 @@ static struct resource s5p_uart4_resource[] = { .flags = IORESOURCE_MEM, }, [1] = { - .start = IRQ_S5P_UART_RX4, - .end = IRQ_S5P_UART_RX4, - .flags = IORESOURCE_IRQ, - }, - [2] = { - .start = IRQ_S5P_UART_TX4, - .end = IRQ_S5P_UART_TX4, - .flags = IORESOURCE_IRQ, - }, - [3] = { - .start = IRQ_S5P_UART_ERR4, - .end = IRQ_S5P_UART_ERR4, + .start = IRQ_UART4, + .end = IRQ_UART4, .flags = IORESOURCE_IRQ, }, #endif @@ -152,18 +102,8 @@ static struct resource s5p_uart5_resource[] = { .flags = IORESOURCE_MEM, }, [1] = { - .start = IRQ_S5P_UART_RX5, - .end = IRQ_S5P_UART_RX5, - .flags = IORESOURCE_IRQ, - }, - [2] = { - .start = IRQ_S5P_UART_TX5, - .end = IRQ_S5P_UART_TX5, - .flags = IORESOURCE_IRQ, - }, - [3] = { - .start = IRQ_S5P_UART_ERR5, - .end = IRQ_S5P_UART_ERR5, + .start = IRQ_UART5, + .end = IRQ_UART5, .flags = IORESOURCE_IRQ, }, #endif diff --git a/arch/arm/plat-s5p/include/plat/irqs.h b/arch/arm/plat-s5p/include/plat/irqs.h index ba9121c60a2..144dbfc6506 100644 --- a/arch/arm/plat-s5p/include/plat/irqs.h +++ b/arch/arm/plat-s5p/include/plat/irqs.h @@ -37,41 +37,6 @@ #define IRQ_VIC1_BASE S5P_VIC1_BASE #define IRQ_VIC2_BASE S5P_VIC2_BASE -/* UART interrupts, each UART has 4 intterupts per channel so - * use the space between the ISA and S3C main interrupts. Note, these - * are not in the same order as the S3C24XX series! */ - -#define IRQ_S5P_UART_BASE0 (16) -#define IRQ_S5P_UART_BASE1 (20) -#define IRQ_S5P_UART_BASE2 (24) -#define IRQ_S5P_UART_BASE3 (28) - -#define UART_IRQ_RXD (0) -#define UART_IRQ_ERR (1) -#define UART_IRQ_TXD (2) - -#define IRQ_S5P_UART_RX0 (IRQ_S5P_UART_BASE0 + UART_IRQ_RXD) -#define IRQ_S5P_UART_TX0 (IRQ_S5P_UART_BASE0 + UART_IRQ_TXD) -#define IRQ_S5P_UART_ERR0 (IRQ_S5P_UART_BASE0 + UART_IRQ_ERR) - -#define IRQ_S5P_UART_RX1 (IRQ_S5P_UART_BASE1 + UART_IRQ_RXD) -#define IRQ_S5P_UART_TX1 (IRQ_S5P_UART_BASE1 + UART_IRQ_TXD) -#define IRQ_S5P_UART_ERR1 (IRQ_S5P_UART_BASE1 + UART_IRQ_ERR) - -#define IRQ_S5P_UART_RX2 (IRQ_S5P_UART_BASE2 + UART_IRQ_RXD) -#define IRQ_S5P_UART_TX2 (IRQ_S5P_UART_BASE2 + UART_IRQ_TXD) -#define IRQ_S5P_UART_ERR2 (IRQ_S5P_UART_BASE2 + UART_IRQ_ERR) - -#define IRQ_S5P_UART_RX3 (IRQ_S5P_UART_BASE3 + UART_IRQ_RXD) -#define IRQ_S5P_UART_TX3 (IRQ_S5P_UART_BASE3 + UART_IRQ_TXD) -#define IRQ_S5P_UART_ERR3 (IRQ_S5P_UART_BASE3 + UART_IRQ_ERR) - -/* S3C compatibilty defines */ -#define IRQ_S3CUART_RX0 IRQ_S5P_UART_RX0 -#define IRQ_S3CUART_RX1 IRQ_S5P_UART_RX1 -#define IRQ_S3CUART_RX2 IRQ_S5P_UART_RX2 -#define IRQ_S3CUART_RX3 IRQ_S5P_UART_RX3 - /* VIC based IRQs */ #define S5P_IRQ_VIC0(x) (S5P_VIC0_BASE + (x)) diff --git a/arch/arm/plat-s5p/irq.c b/arch/arm/plat-s5p/irq.c index a97c08957f4..afdaa1082b9 100644 --- a/arch/arm/plat-s5p/irq.c +++ b/arch/arm/plat-s5p/irq.c @@ -17,42 +17,10 @@ #include <asm/hardware/vic.h> -#include <linux/serial_core.h> #include <mach/map.h> #include <plat/regs-timer.h> -#include <plat/regs-serial.h> #include <plat/cpu.h> #include <plat/irq-vic-timer.h> -#include <plat/irq-uart.h> - -/* - * Note, we make use of the fact that the parent IRQs, IRQ_UART[0..3] - * are consecutive when looking up the interrupt in the demux routines. - */ -static struct s3c_uart_irq uart_irqs[] = { - [0] = { - .regs = S5P_VA_UART0, - .base_irq = IRQ_S5P_UART_BASE0, - .parent_irq = IRQ_UART0, - }, - [1] = { - .regs = S5P_VA_UART1, - .base_irq = IRQ_S5P_UART_BASE1, - .parent_irq = IRQ_UART1, - }, - [2] = { - .regs = S5P_VA_UART2, - .base_irq = IRQ_S5P_UART_BASE2, - .parent_irq = IRQ_UART2, - }, -#if CONFIG_SERIAL_SAMSUNG_UARTS > 3 - [3] = { - .regs = S5P_VA_UART3, - .base_irq = IRQ_S5P_UART_BASE3, - .parent_irq = IRQ_UART3, - }, -#endif -}; void __init s5p_init_irq(u32 *vic, u32 num_vic) { @@ -65,6 +33,4 @@ void __init s5p_init_irq(u32 *vic, u32 num_vic) #endif s3c_init_vic_timer_irq(5, IRQ_TIMER0); - - s3c_init_uart_irqs(uart_irqs, ARRAY_SIZE(uart_irqs)); } diff --git a/arch/arm/plat-samsung/Kconfig b/arch/arm/plat-samsung/Kconfig index b3e10659e4b..dffa37bc4a0 100644 --- a/arch/arm/plat-samsung/Kconfig +++ b/arch/arm/plat-samsung/Kconfig @@ -65,11 +65,6 @@ config SAMSUNG_IRQ_VIC_TIMER help Internal configuration to build the VIC timer interrupt code. -config SAMSUNG_IRQ_UART - bool - help - Internal configuration to build the IRQ UART demux code. - # options for gpio configuration support config SAMSUNG_GPIOLIB_4BIT diff --git a/arch/arm/plat-samsung/Makefile b/arch/arm/plat-samsung/Makefile index 853764ba8cc..1105922342f 100644 --- a/arch/arm/plat-samsung/Makefile +++ b/arch/arm/plat-samsung/Makefile @@ -21,7 +21,6 @@ obj-y += dev-asocdma.o obj-$(CONFIG_SAMSUNG_CLKSRC) += clock-clksrc.o -obj-$(CONFIG_SAMSUNG_IRQ_UART) += irq-uart.o obj-$(CONFIG_SAMSUNG_IRQ_VIC_TIMER) += irq-vic-timer.o # ADC diff --git a/arch/arm/plat-samsung/include/plat/regs-serial.h b/arch/arm/plat-samsung/include/plat/regs-serial.h index bac36fa3bec..72073484702 100644 --- a/arch/arm/plat-samsung/include/plat/regs-serial.h +++ b/arch/arm/plat-samsung/include/plat/regs-serial.h @@ -186,6 +186,11 @@ #define S3C64XX_UINTSP 0x34 #define S3C64XX_UINTM 0x38 +#define S3C64XX_UINTM_RXD (0) +#define S3C64XX_UINTM_TXD (2) +#define S3C64XX_UINTM_RXD_MSK (1 << S3C64XX_UINTM_RXD) +#define S3C64XX_UINTM_TXD_MSK (1 << S3C64XX_UINTM_TXD) + /* Following are specific to S5PV210 */ #define S5PV210_UCON_CLKMASK (1<<10) #define S5PV210_UCON_PCLK (0<<10) diff --git a/arch/arm/plat-samsung/irq-uart.c b/arch/arm/plat-samsung/irq-uart.c deleted file mode 100644 index 3014c7226bd..00000000000 --- a/arch/arm/plat-samsung/irq-uart.c +++ /dev/null @@ -1,96 +0,0 @@ -/* arch/arm/plat-samsung/irq-uart.c - * originally part of arch/arm/plat-s3c64xx/irq.c - * - * Copyright 2008 Openmoko, Inc. - * Copyright 2008 Simtec Electronics - * Ben Dooks <ben@simtec.co.uk> - * http://armlinux.simtec.co.uk/ - * - * Samsung- UART Interrupt handling - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/kernel.h> -#include <linux/interrupt.h> -#include <linux/serial_core.h> -#include <linux/irq.h> -#include <linux/io.h> - -#include <asm/mach/irq.h> - -#include <mach/map.h> -#include <plat/irq-uart.h> -#include <plat/regs-serial.h> -#include <plat/cpu.h> - -/* Note, we make use of the fact that the parent IRQs, IRQ_UART[0..3] - * are consecutive when looking up the interrupt in the demux routines. - */ -static void s3c_irq_demux_uart(unsigned int irq, struct irq_desc *desc) -{ - struct s3c_uart_irq *uirq = desc->irq_data.handler_data; - struct irq_chip *chip = irq_get_chip(irq); - u32 pend = __raw_readl(uirq->regs + S3C64XX_UINTP); - int base = uirq->base_irq; - - chained_irq_enter(chip, desc); - - if (pend & (1 << 0)) - generic_handle_irq(base); - if (pend & (1 << 1)) - generic_handle_irq(base + 1); - if (pend & (1 << 2)) - generic_handle_irq(base + 2); - if (pend & (1 << 3)) - generic_handle_irq(base + 3); - - chained_irq_exit(chip, desc); -} - -static void __init s3c_init_uart_irq(struct s3c_uart_irq *uirq) -{ - void __iomem *reg_base = uirq->regs; - struct irq_chip_generic *gc; - struct irq_chip_type *ct; - - /* mask all interrupts at the start. */ - __raw_writel(0xf, reg_base + S3C64XX_UINTM); - - gc = irq_alloc_generic_chip("s3c-uart", 1, uirq->base_irq, reg_base, - handle_level_irq); - - if (!gc) { - pr_err("%s: irq_alloc_generic_chip for IRQ %u failed\n", - __func__, uirq->base_irq); - return; - } - - ct = gc->chip_types; - ct->chip.irq_ack = irq_gc_ack_set_bit; - ct->chip.irq_mask = irq_gc_mask_set_bit; - ct->chip.irq_unmask = irq_gc_mask_clr_bit; - ct->regs.ack = S3C64XX_UINTP; - ct->regs.mask = S3C64XX_UINTM; - irq_setup_generic_chip(gc, IRQ_MSK(4), IRQ_GC_INIT_MASK_CACHE, - IRQ_NOREQUEST | IRQ_NOPROBE, 0); - - irq_set_handler_data(uirq->parent_irq, uirq); - irq_set_chained_handler(uirq->parent_irq, s3c_irq_demux_uart); -} - -/** - * s3c_init_uart_irqs() - initialise UART IRQs and the necessary demuxing - * @irq: The interrupt data for registering - * @nr_irqs: The number of interrupt descriptions in @irq. - * - * Register the UART interrupts specified by @irq including the demuxing - * routines. This supports the S3C6400 and newer style of devices. - */ -void __init s3c_init_uart_irqs(struct s3c_uart_irq *irq, unsigned int nr_irqs) -{ - for (; nr_irqs > 0; nr_irqs--, irq++) - s3c_init_uart_irq(irq); -} diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 7ed7714573c..d1f377f5d3b 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -195,7 +195,7 @@ config UNIX98_PTYS source "drivers/char/pcmcia/Kconfig" -source "drivers/serial/Kconfig" +source "drivers/tty/serial/Kconfig" source "drivers/i2c/Kconfig" diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig index 0e5cd1405e0..43ab1cd097a 100644 --- a/arch/ia64/configs/generic_defconfig +++ b/arch/ia64/configs/generic_defconfig @@ -234,4 +234,4 @@ CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRC_T10DIF=y CONFIG_MISC_DEVICES=y -CONFIG_DMAR=y +CONFIG_INTEL_IOMMU=y diff --git a/arch/ia64/dig/Makefile b/arch/ia64/dig/Makefile index 2f7caddf093..ae16ec4f630 100644 --- a/arch/ia64/dig/Makefile +++ b/arch/ia64/dig/Makefile @@ -6,7 +6,7 @@ # obj-y := setup.o -ifeq ($(CONFIG_DMAR), y) +ifeq ($(CONFIG_INTEL_IOMMU), y) obj-$(CONFIG_IA64_GENERIC) += machvec.o machvec_vtd.o else obj-$(CONFIG_IA64_GENERIC) += machvec.o diff --git a/arch/ia64/include/asm/device.h b/arch/ia64/include/asm/device.h index d66d446b127..d05e78f6db9 100644 --- a/arch/ia64/include/asm/device.h +++ b/arch/ia64/include/asm/device.h @@ -10,7 +10,7 @@ struct dev_archdata { #ifdef CONFIG_ACPI void *acpi_handle; #endif -#ifdef CONFIG_DMAR +#ifdef CONFIG_INTEL_IOMMU void *iommu; /* hook for IOMMU specific extension */ #endif }; diff --git a/arch/ia64/include/asm/iommu.h b/arch/ia64/include/asm/iommu.h index 745e095fe82..105c93b00b1 100644 --- a/arch/ia64/include/asm/iommu.h +++ b/arch/ia64/include/asm/iommu.h @@ -7,12 +7,14 @@ extern void pci_iommu_shutdown(void); extern void no_iommu_init(void); +#ifdef CONFIG_INTEL_IOMMU extern int force_iommu, no_iommu; -extern int iommu_detected; -#ifdef CONFIG_DMAR extern int iommu_pass_through; +extern int iommu_detected; #else #define iommu_pass_through (0) +#define no_iommu (1) +#define iommu_detected (0) #endif extern void iommu_dma_init(void); extern void machvec_init(const char *name); diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h index 73b5f785e70..127dd7be346 100644 --- a/arch/ia64/include/asm/pci.h +++ b/arch/ia64/include/asm/pci.h @@ -139,7 +139,7 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) return channel ? isa_irq_to_vector(15) : isa_irq_to_vector(14); } -#ifdef CONFIG_DMAR +#ifdef CONFIG_INTEL_IOMMU extern void pci_iommu_alloc(void); #endif #endif /* _ASM_IA64_PCI_H */ diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 395c2f216dd..d959c84904b 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -43,7 +43,7 @@ obj-$(CONFIG_IA64_ESI) += esi.o ifneq ($(CONFIG_IA64_ESI),) obj-y += esi_stub.o # must be in kernel proper endif -obj-$(CONFIG_DMAR) += pci-dma.o +obj-$(CONFIG_INTEL_IOMMU) += pci-dma.o obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o obj-$(CONFIG_BINFMT_ELF) += elfcore.o diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 3be485a300b..bfb4d01e0e5 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -88,7 +88,7 @@ acpi_get_sysname(void) struct acpi_table_rsdp *rsdp; struct acpi_table_xsdt *xsdt; struct acpi_table_header *hdr; -#ifdef CONFIG_DMAR +#ifdef CONFIG_INTEL_IOMMU u64 i, nentries; #endif @@ -125,7 +125,7 @@ acpi_get_sysname(void) return "xen"; } -#ifdef CONFIG_DMAR +#ifdef CONFIG_INTEL_IOMMU /* Look for Intel IOMMU */ nentries = (hdr->length - sizeof(*hdr)) / sizeof(xsdt->table_offset_entry[0]); diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c index 009df5434a7..94e0db72d4a 100644 --- a/arch/ia64/kernel/msi_ia64.c +++ b/arch/ia64/kernel/msi_ia64.c @@ -131,7 +131,7 @@ void arch_teardown_msi_irq(unsigned int irq) return ia64_teardown_msi_irq(irq); } -#ifdef CONFIG_DMAR +#ifdef CONFIG_INTEL_IOMMU #ifdef CONFIG_SMP static int dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) @@ -210,5 +210,5 @@ int arch_setup_dmar_msi(unsigned int irq) "edge"); return 0; } -#endif /* CONFIG_DMAR */ +#endif /* CONFIG_INTEL_IOMMU */ diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c index f6b1ff0aea7..c16162c7086 100644 --- a/arch/ia64/kernel/pci-dma.c +++ b/arch/ia64/kernel/pci-dma.c @@ -14,7 +14,7 @@ #include <asm/system.h> -#ifdef CONFIG_DMAR +#ifdef CONFIG_INTEL_IOMMU #include <linux/kernel.h> diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index b92b9445255..6c4e9aaa70c 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -10,6 +10,7 @@ config M32R select HAVE_GENERIC_HARDIRQS select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW + select GENERIC_ATOMIC64 config SBUS bool diff --git a/arch/mips/pmc-sierra/msp71xx/msp_serial.c b/arch/mips/pmc-sierra/msp71xx/msp_serial.c index f7261628d8a..a1c7c7da233 100644 --- a/arch/mips/pmc-sierra/msp71xx/msp_serial.c +++ b/arch/mips/pmc-sierra/msp71xx/msp_serial.c @@ -27,6 +27,7 @@ #include <linux/serial.h> #include <linux/serial_core.h> #include <linux/serial_reg.h> +#include <linux/slab.h> #include <asm/bootinfo.h> #include <asm/io.h> @@ -38,6 +39,55 @@ #include <msp_int.h> #include <msp_regs.h> +struct msp_uart_data { + int last_lcr; +}; + +static void msp_serial_out(struct uart_port *p, int offset, int value) +{ + struct msp_uart_data *d = p->private_data; + + if (offset == UART_LCR) + d->last_lcr = value; + + offset <<= p->regshift; + writeb(value, p->membase + offset); +} + +static unsigned int msp_serial_in(struct uart_port *p, int offset) +{ + offset <<= p->regshift; + + return readb(p->membase + offset); +} + +static int msp_serial_handle_irq(struct uart_port *p) +{ + struct msp_uart_data *d = p->private_data; + unsigned int iir = readb(p->membase + (UART_IIR << p->regshift)); + + if (serial8250_handle_irq(p, iir)) { + return 1; + } else if ((iir & UART_IIR_BUSY) == UART_IIR_BUSY) { + /* + * The DesignWare APB UART has an Busy Detect (0x07) interrupt + * meaning an LCR write attempt occurred while the UART was + * busy. The interrupt must be cleared by reading the UART + * status register (USR) and the LCR re-written. + * + * Note: MSP reserves 0x20 bytes of address space for the UART + * and the USR is mapped in a separate block at an offset of + * 0xc0 from the start of the UART. + */ + (void)readb(p->membase + 0xc0); + writeb(d->last_lcr, p->membase + (UART_LCR << p->regshift)); + + return 1; + } + + return 0; +} + void __init msp_serial_setup(void) { char *s; @@ -59,13 +109,22 @@ void __init msp_serial_setup(void) up.irq = MSP_INT_UART0; up.uartclk = uartclk; up.regshift = 2; - up.iotype = UPIO_DWAPB; /* UPIO_MEM like */ + up.iotype = UPIO_MEM; up.flags = ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST; up.type = PORT_16550A; up.line = 0; - up.private_data = (void*)UART0_STATUS_REG; - if (early_serial_setup(&up)) - printk(KERN_ERR "Early serial init of port 0 failed\n"); + up.serial_out = msp_serial_out; + up.serial_in = msp_serial_in; + up.handle_irq = msp_serial_handle_irq; + up.private_data = kzalloc(sizeof(struct msp_uart_data), GFP_KERNEL); + if (!up.private_data) { + pr_err("failed to allocate uart private data\n"); + return; + } + if (early_serial_setup(&up)) { + kfree(up.private_data); + pr_err("Early serial init of port 0 failed\n"); + } /* Initialize the second serial port, if one exists */ switch (mips_machtype) { @@ -88,6 +147,8 @@ void __init msp_serial_setup(void) up.irq = MSP_INT_UART1; up.line = 1; up.private_data = (void*)UART1_STATUS_REG; - if (early_serial_setup(&up)) - printk(KERN_ERR "Early serial init of port 1 failed\n"); + if (early_serial_setup(&up)) { + kfree(up.private_data); + pr_err("Early serial init of port 1 failed\n"); + } } diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig index f093b3a8a4a..438db84a1f7 100644 --- a/arch/mn10300/Kconfig +++ b/arch/mn10300/Kconfig @@ -47,9 +47,6 @@ config GENERIC_CMOS_UPDATE config GENERIC_HWEIGHT def_bool y -config GENERIC_TIME - def_bool y - config GENERIC_CLOCKEVENTS def_bool y diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h index 93e05d1b34b..5354ae91bdd 100644 --- a/arch/powerpc/include/asm/udbg.h +++ b/arch/powerpc/include/asm/udbg.h @@ -54,6 +54,7 @@ extern void __init udbg_init_40x_realmode(void); extern void __init udbg_init_cpm(void); extern void __init udbg_init_usbgecko(void); extern void __init udbg_init_wsp(void); +extern void __init udbg_init_ehv_bc(void); #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_UDBG_H */ diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index faa82c1f3f6..b4607a91d1f 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -67,6 +67,8 @@ void __init udbg_early_init(void) udbg_init_usbgecko(); #elif defined(CONFIG_PPC_EARLY_DEBUG_WSP) udbg_init_wsp(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_EHV_BC) + udbg_init_ehv_bc(); #endif #ifdef CONFIG_PPC_EARLY_DEBUG diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c index 984cd202915..3330feca750 100644 --- a/arch/powerpc/sysdev/uic.c +++ b/arch/powerpc/sysdev/uic.c @@ -47,7 +47,7 @@ struct uic { int index; int dcrbase; - spinlock_t lock; + raw_spinlock_t lock; /* The remapper for this UIC */ struct irq_host *irqhost; @@ -61,14 +61,14 @@ static void uic_unmask_irq(struct irq_data *d) u32 er, sr; sr = 1 << (31-src); - spin_lock_irqsave(&uic->lock, flags); + raw_spin_lock_irqsave(&uic->lock, flags); /* ack level-triggered interrupts here */ if (irqd_is_level_type(d)) mtdcr(uic->dcrbase + UIC_SR, sr); er = mfdcr(uic->dcrbase + UIC_ER); er |= sr; mtdcr(uic->dcrbase + UIC_ER, er); - spin_unlock_irqrestore(&uic->lock, flags); + raw_spin_unlock_irqrestore(&uic->lock, flags); } static void uic_mask_irq(struct irq_data *d) @@ -78,11 +78,11 @@ static void uic_mask_irq(struct irq_data *d) unsigned long flags; u32 er; - spin_lock_irqsave(&uic->lock, flags); + raw_spin_lock_irqsave(&uic->lock, flags); er = mfdcr(uic->dcrbase + UIC_ER); er &= ~(1 << (31 - src)); mtdcr(uic->dcrbase + UIC_ER, er); - spin_unlock_irqrestore(&uic->lock, flags); + raw_spin_unlock_irqrestore(&uic->lock, flags); } static void uic_ack_irq(struct irq_data *d) @@ -91,9 +91,9 @@ static void uic_ack_irq(struct irq_data *d) unsigned int src = irqd_to_hwirq(d); unsigned long flags; - spin_lock_irqsave(&uic->lock, flags); + raw_spin_lock_irqsave(&uic->lock, flags); mtdcr(uic->dcrbase + UIC_SR, 1 << (31-src)); - spin_unlock_irqrestore(&uic->lock, flags); + raw_spin_unlock_irqrestore(&uic->lock, flags); } static void uic_mask_ack_irq(struct irq_data *d) @@ -104,7 +104,7 @@ static void uic_mask_ack_irq(struct irq_data *d) u32 er, sr; sr = 1 << (31-src); - spin_lock_irqsave(&uic->lock, flags); + raw_spin_lock_irqsave(&uic->lock, flags); er = mfdcr(uic->dcrbase + UIC_ER); er &= ~sr; mtdcr(uic->dcrbase + UIC_ER, er); @@ -118,7 +118,7 @@ static void uic_mask_ack_irq(struct irq_data *d) */ if (!irqd_is_level_type(d)) mtdcr(uic->dcrbase + UIC_SR, sr); - spin_unlock_irqrestore(&uic->lock, flags); + raw_spin_unlock_irqrestore(&uic->lock, flags); } static int uic_set_irq_type(struct irq_data *d, unsigned int flow_type) @@ -152,7 +152,7 @@ static int uic_set_irq_type(struct irq_data *d, unsigned int flow_type) mask = ~(1 << (31 - src)); - spin_lock_irqsave(&uic->lock, flags); + raw_spin_lock_irqsave(&uic->lock, flags); tr = mfdcr(uic->dcrbase + UIC_TR); pr = mfdcr(uic->dcrbase + UIC_PR); tr = (tr & mask) | (trigger << (31-src)); @@ -161,7 +161,7 @@ static int uic_set_irq_type(struct irq_data *d, unsigned int flow_type) mtdcr(uic->dcrbase + UIC_PR, pr); mtdcr(uic->dcrbase + UIC_TR, tr); - spin_unlock_irqrestore(&uic->lock, flags); + raw_spin_unlock_irqrestore(&uic->lock, flags); return 0; } @@ -254,7 +254,7 @@ static struct uic * __init uic_init_one(struct device_node *node) if (! uic) return NULL; /* FIXME: panic? */ - spin_lock_init(&uic->lock); + raw_spin_lock_init(&uic->lock); indexp = of_get_property(node, "cell-index", &len); if (!indexp || (len != sizeof(u32))) { printk(KERN_ERR "uic: Device node %s has missing or invalid " diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index dff933065ab..8d65bd0383f 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -109,10 +109,14 @@ static void fixup_clock_comparator(unsigned long long delta) set_clock_comparator(S390_lowcore.clock_comparator); } -static int s390_next_event(unsigned long delta, +static int s390_next_ktime(ktime_t expires, struct clock_event_device *evt) { - S390_lowcore.clock_comparator = get_clock() + delta; + u64 nsecs; + + nsecs = ktime_to_ns(ktime_sub(expires, ktime_get_monotonic_offset())); + do_div(nsecs, 125); + S390_lowcore.clock_comparator = TOD_UNIX_EPOCH + (nsecs << 9); set_clock_comparator(S390_lowcore.clock_comparator); return 0; } @@ -137,14 +141,15 @@ void init_cpu_timer(void) cpu = smp_processor_id(); cd = &per_cpu(comparators, cpu); cd->name = "comparator"; - cd->features = CLOCK_EVT_FEAT_ONESHOT; + cd->features = CLOCK_EVT_FEAT_ONESHOT | + CLOCK_EVT_FEAT_KTIME; cd->mult = 16777; cd->shift = 12; cd->min_delta_ns = 1; cd->max_delta_ns = LONG_MAX; cd->rating = 400; cd->cpumask = cpumask_of(cpu); - cd->set_next_event = s390_next_event; + cd->set_next_ktime = s390_next_ktime; cd->set_mode = s390_set_mode; clockevents_register_device(cd); diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index b30f71ac0d0..70a0de46cd1 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -46,9 +46,6 @@ config NEED_PER_CPU_PAGE_FIRST_CHUNK config SYS_SUPPORTS_HUGETLBFS def_bool y -config GENERIC_TIME - def_bool y - config GENERIC_CLOCKEVENTS def_bool y diff --git a/arch/tile/configs/tilegx_defconfig b/arch/tile/configs/tilegx_defconfig index 2ad73fb707b..dafdbbae112 100644 --- a/arch/tile/configs/tilegx_defconfig +++ b/arch/tile/configs/tilegx_defconfig @@ -11,7 +11,6 @@ CONFIG_HAVE_ARCH_ALLOC_REMAP=y CONFIG_HAVE_SETUP_PER_CPU_AREA=y CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y CONFIG_SYS_SUPPORTS_HUGETLBFS=y -CONFIG_GENERIC_TIME=y CONFIG_GENERIC_CLOCKEVENTS=y CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_DEFAULT_MIGRATION_COST=10000000 diff --git a/arch/tile/configs/tilepro_defconfig b/arch/tile/configs/tilepro_defconfig index f58dc362b94..6f05f969b56 100644 --- a/arch/tile/configs/tilepro_defconfig +++ b/arch/tile/configs/tilepro_defconfig @@ -11,7 +11,6 @@ CONFIG_HAVE_ARCH_ALLOC_REMAP=y CONFIG_HAVE_SETUP_PER_CPU_AREA=y CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y CONFIG_SYS_SUPPORTS_HUGETLBFS=y -CONFIG_GENERIC_TIME=y CONFIG_GENERIC_CLOCKEVENTS=y CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_DEFAULT_MIGRATION_COST=10000000 diff --git a/arch/um/defconfig b/arch/um/defconfig index 9f7634f08cf..761f5e1a657 100644 --- a/arch/um/defconfig +++ b/arch/um/defconfig @@ -13,7 +13,6 @@ CONFIG_LOCKDEP_SUPPORT=y # CONFIG_STACKTRACE_SUPPORT is not set CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_GENERIC_BUG=y -CONFIG_GENERIC_TIME=y CONFIG_GENERIC_CLOCKEVENTS=y CONFIG_IRQ_RELEASE_METHOD=y CONFIG_HZ=100 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9a4a267a8a5..f49767716c7 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -64,10 +64,12 @@ config X86 select HAVE_TEXT_POKE_SMP select HAVE_GENERIC_HARDIRQS select HAVE_SPARSE_IRQ + select SPARSE_IRQ select GENERIC_FIND_FIRST_BIT select GENERIC_IRQ_PROBE select GENERIC_PENDING_IRQ if SMP select GENERIC_IRQ_SHOW + select GENERIC_CLOCKEVENTS_MIN_ADJUST select IRQ_FORCED_THREADING select USE_GENERIC_SMP_HELPERS if SMP select HAVE_BPF_JIT if (X86_64 && NET) @@ -130,7 +132,7 @@ config SBUS bool config NEED_DMA_MAP_STATE - def_bool (X86_64 || DMAR || DMA_API_DEBUG) + def_bool (X86_64 || INTEL_IOMMU || DMA_API_DEBUG) config NEED_SG_DMA_LENGTH def_bool y @@ -220,7 +222,7 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC config HAVE_INTEL_TXT def_bool y - depends on EXPERIMENTAL && DMAR && ACPI + depends on EXPERIMENTAL && INTEL_IOMMU && ACPI config X86_32_SMP def_bool y @@ -287,7 +289,7 @@ config SMP config X86_X2APIC bool "Support x2apic" - depends on X86_LOCAL_APIC && X86_64 && INTR_REMAP + depends on X86_LOCAL_APIC && X86_64 && IRQ_REMAP ---help--- This enables x2apic support on CPUs that have this feature. diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 22a0dc8e51d..058a35b8286 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -67,8 +67,8 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=y CONFIG_CPU_FREQ_GOV_ONDEMAND=y CONFIG_X86_ACPI_CPUFREQ=y CONFIG_PCI_MMCONFIG=y -CONFIG_DMAR=y -# CONFIG_DMAR_DEFAULT_ON is not set +CONFIG_INTEL_IOMMU=y +# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set CONFIG_PCIEPORTBUS=y CONFIG_PCCARD=y CONFIG_YENTA=y diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 7b3ca8324b6..9b7273cb219 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -495,7 +495,7 @@ static inline void default_wait_for_init_deassert(atomic_t *deassert) return; } -extern struct apic *generic_bigsmp_probe(void); +extern void generic_bigsmp_probe(void); #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h index 029f230ab63..63a2a03d7d5 100644 --- a/arch/x86/include/asm/device.h +++ b/arch/x86/include/asm/device.h @@ -8,7 +8,7 @@ struct dev_archdata { #ifdef CONFIG_X86_64 struct dma_map_ops *dma_ops; #endif -#if defined(CONFIG_DMAR) || defined(CONFIG_AMD_IOMMU) +#if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU) void *iommu; /* hook for IOMMU specific extension */ #endif }; diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index 32609919931..f6f15986df6 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h @@ -27,6 +27,7 @@ #define CFI_REMEMBER_STATE .cfi_remember_state #define CFI_RESTORE_STATE .cfi_restore_state #define CFI_UNDEFINED .cfi_undefined +#define CFI_ESCAPE .cfi_escape #ifdef CONFIG_AS_CFI_SIGNAL_FRAME #define CFI_SIGNAL_FRAME .cfi_signal_frame @@ -68,6 +69,7 @@ #define CFI_REMEMBER_STATE cfi_ignore #define CFI_RESTORE_STATE cfi_ignore #define CFI_UNDEFINED cfi_ignore +#define CFI_ESCAPE cfi_ignore #define CFI_SIGNAL_FRAME cfi_ignore #endif diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 09199052060..eb92a6ed2be 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -119,7 +119,7 @@ struct irq_cfg { cpumask_var_t old_domain; u8 vector; u8 move_in_progress : 1; -#ifdef CONFIG_INTR_REMAP +#ifdef CONFIG_IRQ_REMAP struct irq_2_iommu irq_2_iommu; #endif }; diff --git a/arch/x86/include/asm/hyperv.h b/arch/x86/include/asm/hyperv.h index 5df477ac3af..b80420bcd09 100644 --- a/arch/x86/include/asm/hyperv.h +++ b/arch/x86/include/asm/hyperv.h @@ -189,5 +189,6 @@ #define HV_STATUS_INVALID_HYPERCALL_CODE 2 #define HV_STATUS_INVALID_HYPERCALL_INPUT 3 #define HV_STATUS_INVALID_ALIGNMENT 4 +#define HV_STATUS_INSUFFICIENT_BUFFERS 19 #endif diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 1c23360fb2d..47d99934580 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -3,7 +3,8 @@ #define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8) -#ifdef CONFIG_INTR_REMAP +#ifdef CONFIG_IRQ_REMAP +static void irq_remap_modify_chip_defaults(struct irq_chip *chip); static inline void prepare_irte(struct irte *irte, int vector, unsigned int dest) { @@ -36,6 +37,9 @@ static inline bool irq_remapped(struct irq_cfg *cfg) { return false; } +static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip) +{ +} #endif #endif /* _ASM_X86_IRQ_REMAPPING_H */ diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 7e50f06393a..4b4448761e8 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -160,19 +160,11 @@ static inline int invalid_vm86_irq(int irq) #define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS ) #ifdef CONFIG_X86_IO_APIC -# ifdef CONFIG_SPARSE_IRQ -# define CPU_VECTOR_LIMIT (64 * NR_CPUS) -# define NR_IRQS \ +# define CPU_VECTOR_LIMIT (64 * NR_CPUS) +# define NR_IRQS \ (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \ (NR_VECTORS + CPU_VECTOR_LIMIT) : \ (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) -# else -# define CPU_VECTOR_LIMIT (32 * NR_CPUS) -# define NR_IRQS \ - (CPU_VECTOR_LIMIT < IO_APIC_VECTOR_LIMIT ? \ - (NR_VECTORS + CPU_VECTOR_LIMIT) : \ - (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) -# endif #else /* !CONFIG_X86_IO_APIC: */ # define NR_IRQS NR_IRQS_LEGACY #endif diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 4886a68f267..fd3f9f18cf3 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -22,27 +22,26 @@ void arch_trigger_all_cpu_backtrace(void); #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace #endif -/* - * Define some priorities for the nmi notifier call chain. - * - * Create a local nmi bit that has a higher priority than - * external nmis, because the local ones are more frequent. - * - * Also setup some default high/normal/low settings for - * subsystems to registers with. Using 4 bits to separate - * the priorities. This can go a lot higher if needed be. - */ - -#define NMI_LOCAL_SHIFT 16 /* randomly picked */ -#define NMI_LOCAL_BIT (1ULL << NMI_LOCAL_SHIFT) -#define NMI_HIGH_PRIOR (1ULL << 8) -#define NMI_NORMAL_PRIOR (1ULL << 4) -#define NMI_LOW_PRIOR (1ULL << 0) -#define NMI_LOCAL_HIGH_PRIOR (NMI_LOCAL_BIT | NMI_HIGH_PRIOR) -#define NMI_LOCAL_NORMAL_PRIOR (NMI_LOCAL_BIT | NMI_NORMAL_PRIOR) -#define NMI_LOCAL_LOW_PRIOR (NMI_LOCAL_BIT | NMI_LOW_PRIOR) +#define NMI_FLAG_FIRST 1 + +enum { + NMI_LOCAL=0, + NMI_UNKNOWN, + NMI_MAX +}; + +#define NMI_DONE 0 +#define NMI_HANDLED 1 + +typedef int (*nmi_handler_t)(unsigned int, struct pt_regs *); + +int register_nmi_handler(unsigned int, nmi_handler_t, unsigned long, + const char *); + +void unregister_nmi_handler(unsigned int, const char *); void stop_nmi(void); void restart_nmi(void); +void local_touch_nmi(void); #endif /* _ASM_X86_NMI_H */ diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 094fb30817a..f61c62f7d5d 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -29,6 +29,9 @@ #define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) #define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL +#define AMD_PERFMON_EVENTSEL_GUESTONLY (1ULL << 40) +#define AMD_PERFMON_EVENTSEL_HOSTONLY (1ULL << 41) + #define AMD64_EVENTSEL_EVENT \ (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32)) #define INTEL_ARCH_EVENT_MASK \ @@ -43,14 +46,17 @@ #define AMD64_RAW_EVENT_MASK \ (X86_RAW_EVENT_MASK | \ AMD64_EVENTSEL_EVENT) +#define AMD64_NUM_COUNTERS 4 +#define AMD64_NUM_COUNTERS_F15H 6 +#define AMD64_NUM_COUNTERS_MAX AMD64_NUM_COUNTERS_F15H -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) -#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 +#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 /* * Intel "Architectural Performance Monitoring" CPUID @@ -110,6 +116,35 @@ union cpuid10_edx { */ #define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) +/* + * IBS cpuid feature detection + */ + +#define IBS_CPUID_FEATURES 0x8000001b + +/* + * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but + * bit 0 is used to indicate the existence of IBS. + */ +#define IBS_CAPS_AVAIL (1U<<0) +#define IBS_CAPS_FETCHSAM (1U<<1) +#define IBS_CAPS_OPSAM (1U<<2) +#define IBS_CAPS_RDWROPCNT (1U<<3) +#define IBS_CAPS_OPCNT (1U<<4) +#define IBS_CAPS_BRNTRGT (1U<<5) +#define IBS_CAPS_OPCNTEXT (1U<<6) + +#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \ + | IBS_CAPS_FETCHSAM \ + | IBS_CAPS_OPSAM) + +/* + * IBS APIC setup + */ +#define IBSCTL 0x1cc +#define IBSCTL_LVT_OFFSET_VALID (1ULL<<8) +#define IBSCTL_LVT_OFFSET_MASK 0x0F + /* IbsFetchCtl bits/masks */ #define IBS_FETCH_RAND_EN (1ULL<<57) #define IBS_FETCH_VAL (1ULL<<49) @@ -124,6 +159,8 @@ union cpuid10_edx { #define IBS_OP_MAX_CNT 0x0000FFFFULL #define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ +extern u32 get_ibs_caps(void); + #ifdef CONFIG_PERF_EVENTS extern void perf_events_lapic_init(void); @@ -159,7 +196,19 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); ); \ } +struct perf_guest_switch_msr { + unsigned msr; + u64 host, guest; +}; + +extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); #else +static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr) +{ + *nr = 0; + return NULL; +} + static inline void perf_events_lapic_init(void) { } #endif diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index 3250e3d605d..92f297069e8 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h @@ -23,7 +23,7 @@ void machine_real_restart(unsigned int type); #define MRR_BIOS 0 #define MRR_APM 1 -typedef void (*nmi_shootdown_cb)(int, struct die_args*); +typedef void (*nmi_shootdown_cb)(int, struct pt_regs*); void nmi_shootdown_cpus(nmi_shootdown_cb callback); #endif /* _ASM_X86_REBOOT_H */ diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 20104057344..0a6ba337a2e 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -624,7 +624,6 @@ __SYSCALL(__NR_vmsplice, sys_vmsplice) __SYSCALL(__NR_move_pages, sys_move_pages) #define __NR_utimensat 280 __SYSCALL(__NR_utimensat, sys_utimensat) -#define __IGNORE_getcpu /* implemented as a vsyscall */ #define __NR_epoll_pwait 281 __SYSCALL(__NR_epoll_pwait, sys_epoll_pwait) #define __NR_signalfd 282 diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 82f2912155a..8baca3c4871 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -19,7 +19,7 @@ endif obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o -obj-y += time.o ioport.o ldt.o dumpstack.o +obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-y += probe_roms.o diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 52fa56399a5..a2fd72e0ab3 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1437,27 +1437,21 @@ void enable_x2apic(void) int __init enable_IR(void) { -#ifdef CONFIG_INTR_REMAP +#ifdef CONFIG_IRQ_REMAP if (!intr_remapping_supported()) { pr_debug("intr-remapping not supported\n"); - return 0; + return -1; } if (!x2apic_preenabled && skip_ioapic_setup) { pr_info("Skipped enabling intr-remap because of skipping " "io-apic setup\n"); - return 0; + return -1; } - if (enable_intr_remapping(x2apic_supported())) - return 0; - - pr_info("Enabled Interrupt-remapping\n"); - - return 1; - + return enable_intr_remapping(); #endif - return 0; + return -1; } void __init enable_IR_x2apic(void) @@ -1481,11 +1475,11 @@ void __init enable_IR_x2apic(void) mask_ioapic_entries(); if (dmar_table_init_ret) - ret = 0; + ret = -1; else ret = enable_IR(); - if (!ret) { + if (ret < 0) { /* IR is required if there is APIC ID > 255 even when running * under KVM */ @@ -1499,6 +1493,9 @@ void __init enable_IR_x2apic(void) x2apic_force_phys(); } + if (ret == IRQ_REMAP_XAPIC_MODE) + goto nox2apic; + x2apic_enabled = 1; if (x2apic_supported() && !x2apic_mode) { @@ -1508,19 +1505,21 @@ void __init enable_IR_x2apic(void) } nox2apic: - if (!ret) /* IR enabling failed */ + if (ret < 0) /* IR enabling failed */ restore_ioapic_entries(); legacy_pic->restore_mask(); local_irq_restore(flags); out: - if (x2apic_enabled) + if (x2apic_enabled || !x2apic_supported()) return; if (x2apic_preenabled) panic("x2apic: enabled by BIOS but kernel init failed."); - else if (cpu_has_x2apic) - pr_info("Not enabling x2apic, Intr-remapping init failed.\n"); + else if (ret == IRQ_REMAP_XAPIC_MODE) + pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n"); + else if (ret < 0) + pr_info("x2apic not enabled, IRQ remapping init failed\n"); } #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index efd737e827f..521bead0113 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -255,12 +255,24 @@ static struct apic apic_bigsmp = { .x86_32_early_logical_apicid = bigsmp_early_logical_apicid, }; -struct apic * __init generic_bigsmp_probe(void) +void __init generic_bigsmp_probe(void) { - if (probe_bigsmp()) - return &apic_bigsmp; + unsigned int cpu; - return NULL; + if (!probe_bigsmp()) + return; + + apic = &apic_bigsmp; + + for_each_possible_cpu(cpu) { + if (early_per_cpu(x86_cpu_to_logical_apicid, + cpu) == BAD_APICID) + continue; + early_per_cpu(x86_cpu_to_logical_apicid, cpu) = + bigsmp_early_logical_apicid(cpu); + } + + pr_info("Overriding APIC driver with %s\n", apic_bigsmp.name); } apic_driver(apic_bigsmp); diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index d5e57db0f7b..31cb9ae992b 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -60,22 +60,10 @@ void arch_trigger_all_cpu_backtrace(void) } static int __kprobes -arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, - unsigned long cmd, void *__args) +arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) { - struct die_args *args = __args; - struct pt_regs *regs; int cpu; - switch (cmd) { - case DIE_NMI: - break; - - default: - return NOTIFY_DONE; - } - - regs = args->regs; cpu = smp_processor_id(); if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { @@ -86,21 +74,16 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, show_regs(regs); arch_spin_unlock(&lock); cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); - return NOTIFY_STOP; + return NMI_HANDLED; } - return NOTIFY_DONE; + return NMI_DONE; } -static __read_mostly struct notifier_block backtrace_notifier = { - .notifier_call = arch_trigger_all_cpu_backtrace_handler, - .next = NULL, - .priority = NMI_LOCAL_LOW_PRIOR, -}; - static int __init register_trigger_all_cpu_backtrace(void) { - register_die_notifier(&backtrace_notifier); + register_nmi_handler(NMI_LOCAL, arch_trigger_all_cpu_backtrace_handler, + 0, "arch_bt"); return 0; } early_initcall(register_trigger_all_cpu_backtrace); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8eb863e27ea..3c31fa98af6 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -92,21 +92,21 @@ static struct ioapic { DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); } ioapics[MAX_IO_APICS]; -#define mpc_ioapic_ver(id) ioapics[id].mp_config.apicver +#define mpc_ioapic_ver(ioapic_idx) ioapics[ioapic_idx].mp_config.apicver -int mpc_ioapic_id(int id) +int mpc_ioapic_id(int ioapic_idx) { - return ioapics[id].mp_config.apicid; + return ioapics[ioapic_idx].mp_config.apicid; } -unsigned int mpc_ioapic_addr(int id) +unsigned int mpc_ioapic_addr(int ioapic_idx) { - return ioapics[id].mp_config.apicaddr; + return ioapics[ioapic_idx].mp_config.apicaddr; } -struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int id) +struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx) { - return &ioapics[id].gsi_config; + return &ioapics[ioapic_idx].gsi_config; } int nr_ioapics; @@ -186,11 +186,7 @@ static struct irq_pin_list *alloc_irq_pin_list(int node) /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ -#ifdef CONFIG_SPARSE_IRQ static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY]; -#else -static struct irq_cfg irq_cfgx[NR_IRQS]; -#endif int __init arch_early_irq_init(void) { @@ -234,7 +230,6 @@ int __init arch_early_irq_init(void) return 0; } -#ifdef CONFIG_SPARSE_IRQ static struct irq_cfg *irq_cfg(unsigned int irq) { return irq_get_chip_data(irq); @@ -269,22 +264,6 @@ static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) kfree(cfg); } -#else - -struct irq_cfg *irq_cfg(unsigned int irq) -{ - return irq < nr_irqs ? irq_cfgx + irq : NULL; -} - -static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node) -{ - return irq_cfgx + irq; -} - -static inline void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) { } - -#endif - static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) { int res = irq_alloc_desc_at(at, node); @@ -394,13 +373,21 @@ union entry_union { struct IO_APIC_route_entry entry; }; +static struct IO_APIC_route_entry __ioapic_read_entry(int apic, int pin) +{ + union entry_union eu; + + eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); + eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); + return eu.entry; +} + static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) { union entry_union eu; unsigned long flags; raw_spin_lock_irqsave(&ioapic_lock, flags); - eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); - eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); + eu.entry = __ioapic_read_entry(apic, pin); raw_spin_unlock_irqrestore(&ioapic_lock, flags); return eu.entry; } @@ -529,18 +516,6 @@ static void io_apic_modify_irq(struct irq_cfg *cfg, __io_apic_modify_irq(entry, mask_and, mask_or, final); } -static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry) -{ - __io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER, - IO_APIC_REDIR_MASKED, NULL); -} - -static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry) -{ - __io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED, - IO_APIC_REDIR_LEVEL_TRIGGER, NULL); -} - static void io_apic_sync(struct irq_pin_list *entry) { /* @@ -585,6 +560,66 @@ static void unmask_ioapic_irq(struct irq_data *data) unmask_ioapic(data->chip_data); } +/* + * IO-APIC versions below 0x20 don't support EOI register. + * For the record, here is the information about various versions: + * 0Xh 82489DX + * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant + * 2Xh I/O(x)APIC which is PCI 2.2 Compliant + * 30h-FFh Reserved + * + * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic + * version as 0x2. This is an error with documentation and these ICH chips + * use io-apic's of version 0x20. + * + * For IO-APIC's with EOI register, we use that to do an explicit EOI. + * Otherwise, we simulate the EOI message manually by changing the trigger + * mode to edge and then back to level, with RTE being masked during this. + */ +static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg) +{ + if (mpc_ioapic_ver(apic) >= 0x20) { + /* + * Intr-remapping uses pin number as the virtual vector + * in the RTE. Actual vector is programmed in + * intr-remapping table entry. Hence for the io-apic + * EOI we use the pin number. + */ + if (cfg && irq_remapped(cfg)) + io_apic_eoi(apic, pin); + else + io_apic_eoi(apic, vector); + } else { + struct IO_APIC_route_entry entry, entry1; + + entry = entry1 = __ioapic_read_entry(apic, pin); + + /* + * Mask the entry and change the trigger mode to edge. + */ + entry1.mask = 1; + entry1.trigger = IOAPIC_EDGE; + + __ioapic_write_entry(apic, pin, entry1); + + /* + * Restore the previous level triggered entry. + */ + __ioapic_write_entry(apic, pin, entry); + } +} + +static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) +{ + struct irq_pin_list *entry; + unsigned long flags; + + raw_spin_lock_irqsave(&ioapic_lock, flags); + for_each_irq_pin(entry, cfg->irq_2_pin) + __eoi_ioapic_pin(entry->apic, entry->pin, cfg->vector, cfg); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); +} + static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) { struct IO_APIC_route_entry entry; @@ -593,10 +628,44 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) entry = ioapic_read_entry(apic, pin); if (entry.delivery_mode == dest_SMI) return; + + /* + * Make sure the entry is masked and re-read the contents to check + * if it is a level triggered pin and if the remote-IRR is set. + */ + if (!entry.mask) { + entry.mask = 1; + ioapic_write_entry(apic, pin, entry); + entry = ioapic_read_entry(apic, pin); + } + + if (entry.irr) { + unsigned long flags; + + /* + * Make sure the trigger mode is set to level. Explicit EOI + * doesn't clear the remote-IRR if the trigger mode is not + * set to level. + */ + if (!entry.trigger) { + entry.trigger = IOAPIC_LEVEL; + ioapic_write_entry(apic, pin, entry); + } + + raw_spin_lock_irqsave(&ioapic_lock, flags); + __eoi_ioapic_pin(apic, pin, entry.vector, NULL); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); + } + /* - * Disable it in the IO-APIC irq-routing table: + * Clear the rest of the bits in the IO-APIC RTE except for the mask + * bit. */ ioapic_mask_entry(apic, pin); + entry = ioapic_read_entry(apic, pin); + if (entry.irr) + printk(KERN_ERR "Unable to reset IRR for apic: %d, pin :%d\n", + mpc_ioapic_id(apic), pin); } static void clear_IO_APIC (void) @@ -712,13 +781,13 @@ int restore_ioapic_entries(void) /* * Find the IRQ entry number of a certain pin. */ -static int find_irq_entry(int apic, int pin, int type) +static int find_irq_entry(int ioapic_idx, int pin, int type) { int i; for (i = 0; i < mp_irq_entries; i++) if (mp_irqs[i].irqtype == type && - (mp_irqs[i].dstapic == mpc_ioapic_id(apic) || + (mp_irqs[i].dstapic == mpc_ioapic_id(ioapic_idx) || mp_irqs[i].dstapic == MP_APIC_ALL) && mp_irqs[i].dstirq == pin) return i; @@ -757,12 +826,13 @@ static int __init find_isa_irq_apic(int irq, int type) (mp_irqs[i].srcbusirq == irq)) break; } + if (i < mp_irq_entries) { - int apic; - for(apic = 0; apic < nr_ioapics; apic++) { - if (mpc_ioapic_id(apic) == mp_irqs[i].dstapic) - return apic; - } + int ioapic_idx; + + for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) + if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic) + return ioapic_idx; } return -1; @@ -977,7 +1047,7 @@ static int pin_2_irq(int idx, int apic, int pin) int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, struct io_apic_irq_attr *irq_attr) { - int apic, i, best_guess = -1; + int ioapic_idx, i, best_guess = -1; apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", @@ -990,8 +1060,8 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, for (i = 0; i < mp_irq_entries; i++) { int lbus = mp_irqs[i].srcbus; - for (apic = 0; apic < nr_ioapics; apic++) - if (mpc_ioapic_id(apic) == mp_irqs[i].dstapic || + for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) + if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic || mp_irqs[i].dstapic == MP_APIC_ALL) break; @@ -999,13 +1069,13 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, !mp_irqs[i].irqtype && (bus == lbus) && (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq); + int irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq); - if (!(apic || IO_APIC_IRQ(irq))) + if (!(ioapic_idx || IO_APIC_IRQ(irq))) continue; if (pin == (mp_irqs[i].srcbusirq & 3)) { - set_io_apic_irq_attr(irq_attr, apic, + set_io_apic_irq_attr(irq_attr, ioapic_idx, mp_irqs[i].dstirq, irq_trigger(i), irq_polarity(i)); @@ -1016,7 +1086,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, * best-guess fuzzy result for broken mptables. */ if (best_guess < 0) { - set_io_apic_irq_attr(irq_attr, apic, + set_io_apic_irq_attr(irq_attr, ioapic_idx, mp_irqs[i].dstirq, irq_trigger(i), irq_polarity(i)); @@ -1202,7 +1272,6 @@ void __setup_vector_irq(int cpu) } static struct irq_chip ioapic_chip; -static struct irq_chip ir_ioapic_chip; #ifdef CONFIG_X86_32 static inline int IO_APIC_irq_trigger(int irq) @@ -1246,7 +1315,7 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg, if (irq_remapped(cfg)) { irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - chip = &ir_ioapic_chip; + irq_remap_modify_chip_defaults(chip); fasteoi = trigger != 0; } @@ -1255,77 +1324,100 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg, fasteoi ? "fasteoi" : "edge"); } -static int setup_ioapic_entry(int apic_id, int irq, - struct IO_APIC_route_entry *entry, - unsigned int destination, int trigger, - int polarity, int vector, int pin) + +static int setup_ir_ioapic_entry(int irq, + struct IR_IO_APIC_route_entry *entry, + unsigned int destination, int vector, + struct io_apic_irq_attr *attr) { - /* - * add it to the IO-APIC irq-routing table: - */ - memset(entry,0,sizeof(*entry)); + int index; + struct irte irte; + int ioapic_id = mpc_ioapic_id(attr->ioapic); + struct intel_iommu *iommu = map_ioapic_to_ir(ioapic_id); - if (intr_remapping_enabled) { - struct intel_iommu *iommu = map_ioapic_to_ir(apic_id); - struct irte irte; - struct IR_IO_APIC_route_entry *ir_entry = - (struct IR_IO_APIC_route_entry *) entry; - int index; + if (!iommu) { + pr_warn("No mapping iommu for ioapic %d\n", ioapic_id); + return -ENODEV; + } - if (!iommu) - panic("No mapping iommu for ioapic %d\n", apic_id); + index = alloc_irte(iommu, irq, 1); + if (index < 0) { + pr_warn("Failed to allocate IRTE for ioapic %d\n", ioapic_id); + return -ENOMEM; + } - index = alloc_irte(iommu, irq, 1); - if (index < 0) - panic("Failed to allocate IRTE for ioapic %d\n", apic_id); + prepare_irte(&irte, vector, destination); - prepare_irte(&irte, vector, destination); + /* Set source-id of interrupt request */ + set_ioapic_sid(&irte, ioapic_id); - /* Set source-id of interrupt request */ - set_ioapic_sid(&irte, apic_id); + modify_irte(irq, &irte); - modify_irte(irq, &irte); + apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: " + "Set IRTE entry (P:%d FPD:%d Dst_Mode:%d " + "Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X " + "Avail:%X Vector:%02X Dest:%08X " + "SID:%04X SQ:%X SVT:%X)\n", + attr->ioapic, irte.present, irte.fpd, irte.dst_mode, + irte.redir_hint, irte.trigger_mode, irte.dlvry_mode, + irte.avail, irte.vector, irte.dest_id, + irte.sid, irte.sq, irte.svt); + + memset(entry, 0, sizeof(*entry)); + + entry->index2 = (index >> 15) & 0x1; + entry->zero = 0; + entry->format = 1; + entry->index = (index & 0x7fff); + /* + * IO-APIC RTE will be configured with virtual vector. + * irq handler will do the explicit EOI to the io-apic. + */ + entry->vector = attr->ioapic_pin; + entry->mask = 0; /* enable IRQ */ + entry->trigger = attr->trigger; + entry->polarity = attr->polarity; - ir_entry->index2 = (index >> 15) & 0x1; - ir_entry->zero = 0; - ir_entry->format = 1; - ir_entry->index = (index & 0x7fff); - /* - * IO-APIC RTE will be configured with virtual vector. - * irq handler will do the explicit EOI to the io-apic. - */ - ir_entry->vector = pin; - - apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: " - "Set IRTE entry (P:%d FPD:%d Dst_Mode:%d " - "Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X " - "Avail:%X Vector:%02X Dest:%08X " - "SID:%04X SQ:%X SVT:%X)\n", - apic_id, irte.present, irte.fpd, irte.dst_mode, - irte.redir_hint, irte.trigger_mode, irte.dlvry_mode, - irte.avail, irte.vector, irte.dest_id, - irte.sid, irte.sq, irte.svt); - } else { - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->dest = destination; - entry->vector = vector; - } + /* Mask level triggered irqs. + * Use IRQ_DELAYED_DISABLE for edge triggered irqs. + */ + if (attr->trigger) + entry->mask = 1; - entry->mask = 0; /* enable IRQ */ - entry->trigger = trigger; - entry->polarity = polarity; + return 0; +} - /* Mask level triggered irqs. +static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, + unsigned int destination, int vector, + struct io_apic_irq_attr *attr) +{ + if (intr_remapping_enabled) + return setup_ir_ioapic_entry(irq, + (struct IR_IO_APIC_route_entry *)entry, + destination, vector, attr); + + memset(entry, 0, sizeof(*entry)); + + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->dest = destination; + entry->vector = vector; + entry->mask = 0; /* enable IRQ */ + entry->trigger = attr->trigger; + entry->polarity = attr->polarity; + + /* + * Mask level triggered irqs. * Use IRQ_DELAYED_DISABLE for edge triggered irqs. */ - if (trigger) + if (attr->trigger) entry->mask = 1; + return 0; } -static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq, - struct irq_cfg *cfg, int trigger, int polarity) +static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, + struct io_apic_irq_attr *attr) { struct IO_APIC_route_entry entry; unsigned int dest; @@ -1348,49 +1440,48 @@ static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq, apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " "IRQ %d Mode:%i Active:%i Dest:%d)\n", - apic_id, mpc_ioapic_id(apic_id), pin, cfg->vector, - irq, trigger, polarity, dest); - + attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin, + cfg->vector, irq, attr->trigger, attr->polarity, dest); - if (setup_ioapic_entry(mpc_ioapic_id(apic_id), irq, &entry, - dest, trigger, polarity, cfg->vector, pin)) { - printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", - mpc_ioapic_id(apic_id), pin); + if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) { + pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", + mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); __clear_irq_vector(irq, cfg); + return; } - ioapic_register_intr(irq, cfg, trigger); + ioapic_register_intr(irq, cfg, attr->trigger); if (irq < legacy_pic->nr_legacy_irqs) legacy_pic->mask(irq); - ioapic_write_entry(apic_id, pin, entry); + ioapic_write_entry(attr->ioapic, attr->ioapic_pin, entry); } -static bool __init io_apic_pin_not_connected(int idx, int apic_id, int pin) +static bool __init io_apic_pin_not_connected(int idx, int ioapic_idx, int pin) { if (idx != -1) return false; apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n", - mpc_ioapic_id(apic_id), pin); + mpc_ioapic_id(ioapic_idx), pin); return true; } -static void __init __io_apic_setup_irqs(unsigned int apic_id) +static void __init __io_apic_setup_irqs(unsigned int ioapic_idx) { int idx, node = cpu_to_node(0); struct io_apic_irq_attr attr; unsigned int pin, irq; - for (pin = 0; pin < ioapics[apic_id].nr_registers; pin++) { - idx = find_irq_entry(apic_id, pin, mp_INT); - if (io_apic_pin_not_connected(idx, apic_id, pin)) + for (pin = 0; pin < ioapics[ioapic_idx].nr_registers; pin++) { + idx = find_irq_entry(ioapic_idx, pin, mp_INT); + if (io_apic_pin_not_connected(idx, ioapic_idx, pin)) continue; - irq = pin_2_irq(idx, apic_id, pin); + irq = pin_2_irq(idx, ioapic_idx, pin); - if ((apic_id > 0) && (irq > 16)) + if ((ioapic_idx > 0) && (irq > 16)) continue; /* @@ -1398,10 +1489,10 @@ static void __init __io_apic_setup_irqs(unsigned int apic_id) * installed and if it returns 1: */ if (apic->multi_timer_check && - apic->multi_timer_check(apic_id, irq)) + apic->multi_timer_check(ioapic_idx, irq)) continue; - set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx), + set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx), irq_polarity(idx)); io_apic_setup_irq_pin(irq, node, &attr); @@ -1410,12 +1501,12 @@ static void __init __io_apic_setup_irqs(unsigned int apic_id) static void __init setup_IO_APIC_irqs(void) { - unsigned int apic_id; + unsigned int ioapic_idx; apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); - for (apic_id = 0; apic_id < nr_ioapics; apic_id++) - __io_apic_setup_irqs(apic_id); + for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) + __io_apic_setup_irqs(ioapic_idx); } /* @@ -1425,28 +1516,28 @@ static void __init setup_IO_APIC_irqs(void) */ void setup_IO_APIC_irq_extra(u32 gsi) { - int apic_id = 0, pin, idx, irq, node = cpu_to_node(0); + int ioapic_idx = 0, pin, idx, irq, node = cpu_to_node(0); struct io_apic_irq_attr attr; /* * Convert 'gsi' to 'ioapic.pin'. */ - apic_id = mp_find_ioapic(gsi); - if (apic_id < 0) + ioapic_idx = mp_find_ioapic(gsi); + if (ioapic_idx < 0) return; - pin = mp_find_ioapic_pin(apic_id, gsi); - idx = find_irq_entry(apic_id, pin, mp_INT); + pin = mp_find_ioapic_pin(ioapic_idx, gsi); + idx = find_irq_entry(ioapic_idx, pin, mp_INT); if (idx == -1) return; - irq = pin_2_irq(idx, apic_id, pin); + irq = pin_2_irq(idx, ioapic_idx, pin); /* Only handle the non legacy irqs on secondary ioapics */ - if (apic_id == 0 || irq < NR_IRQS_LEGACY) + if (ioapic_idx == 0 || irq < NR_IRQS_LEGACY) return; - set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx), + set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx), irq_polarity(idx)); io_apic_setup_irq_pin_once(irq, node, &attr); @@ -1455,8 +1546,8 @@ void setup_IO_APIC_irq_extra(u32 gsi) /* * Set up the timer pin, possibly with the 8259A-master behind. */ -static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, - int vector) +static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, + unsigned int pin, int vector) { struct IO_APIC_route_entry entry; @@ -1487,45 +1578,29 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, /* * Add it to the IO-APIC irq-routing table: */ - ioapic_write_entry(apic_id, pin, entry); + ioapic_write_entry(ioapic_idx, pin, entry); } - -__apicdebuginit(void) print_IO_APIC(void) +__apicdebuginit(void) print_IO_APIC(int ioapic_idx) { - int apic, i; + int i; union IO_APIC_reg_00 reg_00; union IO_APIC_reg_01 reg_01; union IO_APIC_reg_02 reg_02; union IO_APIC_reg_03 reg_03; unsigned long flags; - struct irq_cfg *cfg; - unsigned int irq; - - printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); - for (i = 0; i < nr_ioapics; i++) - printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mpc_ioapic_id(i), ioapics[i].nr_registers); - - /* - * We are a bit conservative about what we expect. We have to - * know about every hardware change ASAP. - */ - printk(KERN_INFO "testing the IO APIC.......................\n"); - - for (apic = 0; apic < nr_ioapics; apic++) { raw_spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); - reg_01.raw = io_apic_read(apic, 1); + reg_00.raw = io_apic_read(ioapic_idx, 0); + reg_01.raw = io_apic_read(ioapic_idx, 1); if (reg_01.bits.version >= 0x10) - reg_02.raw = io_apic_read(apic, 2); + reg_02.raw = io_apic_read(ioapic_idx, 2); if (reg_01.bits.version >= 0x20) - reg_03.raw = io_apic_read(apic, 3); + reg_03.raw = io_apic_read(ioapic_idx, 3); raw_spin_unlock_irqrestore(&ioapic_lock, flags); printk("\n"); - printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(apic)); + printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx)); printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); @@ -1575,7 +1650,7 @@ __apicdebuginit(void) print_IO_APIC(void) struct IO_APIC_route_entry entry; struct IR_IO_APIC_route_entry *ir_entry; - entry = ioapic_read_entry(apic, i); + entry = ioapic_read_entry(ioapic_idx, i); ir_entry = (struct IR_IO_APIC_route_entry *) &entry; printk(KERN_DEBUG " %02x %04X ", i, @@ -1596,7 +1671,7 @@ __apicdebuginit(void) print_IO_APIC(void) } else { struct IO_APIC_route_entry entry; - entry = ioapic_read_entry(apic, i); + entry = ioapic_read_entry(ioapic_idx, i); printk(KERN_DEBUG " %02x %02X ", i, entry.dest @@ -1614,7 +1689,28 @@ __apicdebuginit(void) print_IO_APIC(void) ); } } - } +} + +__apicdebuginit(void) print_IO_APICs(void) +{ + int ioapic_idx; + struct irq_cfg *cfg; + unsigned int irq; + + printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); + for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) + printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", + mpc_ioapic_id(ioapic_idx), + ioapics[ioapic_idx].nr_registers); + + /* + * We are a bit conservative about what we expect. We have to + * know about every hardware change ASAP. + */ + printk(KERN_INFO "testing the IO APIC.......................\n"); + + for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) + print_IO_APIC(ioapic_idx); printk(KERN_DEBUG "IRQ to pin mappings:\n"); for_each_active_irq(irq) { @@ -1633,8 +1729,6 @@ __apicdebuginit(void) print_IO_APIC(void) } printk(KERN_INFO ".................................... done.\n"); - - return; } __apicdebuginit(void) print_APIC_field(int base) @@ -1828,7 +1922,7 @@ __apicdebuginit(int) print_ICs(void) return 0; print_local_APICs(show_lapic); - print_IO_APIC(); + print_IO_APICs(); return 0; } @@ -1953,7 +2047,7 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void) { union IO_APIC_reg_00 reg_00; physid_mask_t phys_id_present_map; - int apic_id; + int ioapic_idx; int i; unsigned char old_id; unsigned long flags; @@ -1967,21 +2061,20 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void) /* * Set the IOAPIC ID to the value stored in the MPC table. */ - for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { - + for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) { /* Read the register 0 value */ raw_spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic_id, 0); + reg_00.raw = io_apic_read(ioapic_idx, 0); raw_spin_unlock_irqrestore(&ioapic_lock, flags); - old_id = mpc_ioapic_id(apic_id); + old_id = mpc_ioapic_id(ioapic_idx); - if (mpc_ioapic_id(apic_id) >= get_physical_broadcast()) { + if (mpc_ioapic_id(ioapic_idx) >= get_physical_broadcast()) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", - apic_id, mpc_ioapic_id(apic_id)); + ioapic_idx, mpc_ioapic_id(ioapic_idx)); printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", reg_00.bits.ID); - ioapics[apic_id].mp_config.apicid = reg_00.bits.ID; + ioapics[ioapic_idx].mp_config.apicid = reg_00.bits.ID; } /* @@ -1990,9 +2083,9 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void) * 'stuck on smp_invalidate_needed IPI wait' messages. */ if (apic->check_apicid_used(&phys_id_present_map, - mpc_ioapic_id(apic_id))) { + mpc_ioapic_id(ioapic_idx))) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", - apic_id, mpc_ioapic_id(apic_id)); + ioapic_idx, mpc_ioapic_id(ioapic_idx)); for (i = 0; i < get_physical_broadcast(); i++) if (!physid_isset(i, phys_id_present_map)) break; @@ -2001,14 +2094,14 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void) printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", i); physid_set(i, phys_id_present_map); - ioapics[apic_id].mp_config.apicid = i; + ioapics[ioapic_idx].mp_config.apicid = i; } else { physid_mask_t tmp; - apic->apicid_to_cpu_present(mpc_ioapic_id(apic_id), + apic->apicid_to_cpu_present(mpc_ioapic_id(ioapic_idx), &tmp); apic_printk(APIC_VERBOSE, "Setting %d in the " "phys_id_present_map\n", - mpc_ioapic_id(apic_id)); + mpc_ioapic_id(ioapic_idx)); physids_or(phys_id_present_map, phys_id_present_map, tmp); } @@ -2016,35 +2109,35 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void) * We need to adjust the IRQ routing table * if the ID changed. */ - if (old_id != mpc_ioapic_id(apic_id)) + if (old_id != mpc_ioapic_id(ioapic_idx)) for (i = 0; i < mp_irq_entries; i++) if (mp_irqs[i].dstapic == old_id) mp_irqs[i].dstapic - = mpc_ioapic_id(apic_id); + = mpc_ioapic_id(ioapic_idx); /* * Update the ID register according to the right value * from the MPC table if they are different. */ - if (mpc_ioapic_id(apic_id) == reg_00.bits.ID) + if (mpc_ioapic_id(ioapic_idx) == reg_00.bits.ID) continue; apic_printk(APIC_VERBOSE, KERN_INFO "...changing IO-APIC physical APIC ID to %d ...", - mpc_ioapic_id(apic_id)); + mpc_ioapic_id(ioapic_idx)); - reg_00.bits.ID = mpc_ioapic_id(apic_id); + reg_00.bits.ID = mpc_ioapic_id(ioapic_idx); raw_spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic_id, 0, reg_00.raw); + io_apic_write(ioapic_idx, 0, reg_00.raw); raw_spin_unlock_irqrestore(&ioapic_lock, flags); /* * Sanity check */ raw_spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic_id, 0); + reg_00.raw = io_apic_read(ioapic_idx, 0); raw_spin_unlock_irqrestore(&ioapic_lock, flags); - if (reg_00.bits.ID != mpc_ioapic_id(apic_id)) + if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) printk("could not set ID!\n"); else apic_printk(APIC_VERBOSE, " ok.\n"); @@ -2255,7 +2348,7 @@ ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, return ret; } -#ifdef CONFIG_INTR_REMAP +#ifdef CONFIG_IRQ_REMAP /* * Migrate the IO-APIC irq in the presence of intr-remapping. @@ -2267,6 +2360,9 @@ ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, * updated vector information), by using a virtual vector (io-apic pin number). * Real vector that is used for interrupting cpu will be coming from * the interrupt-remapping table entry. + * + * As the migration is a simple atomic update of IRTE, the same mechanism + * is used to migrate MSI irq's in the presence of interrupt-remapping. */ static int ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, @@ -2291,10 +2387,16 @@ ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, irte.dest_id = IRTE_DEST(dest); /* - * Modified the IRTE and flushes the Interrupt entry cache. + * Atomically updates the IRTE with the new destination, vector + * and flushes the interrupt entry cache. */ modify_irte(irq, &irte); + /* + * After this point, all the interrupts will start arriving + * at the new destination. So, time to cleanup the previous + * vector allocation. + */ if (cfg->move_in_progress) send_cleanup_vector(cfg); @@ -2407,48 +2509,6 @@ static void ack_apic_edge(struct irq_data *data) atomic_t irq_mis_count; -/* - * IO-APIC versions below 0x20 don't support EOI register. - * For the record, here is the information about various versions: - * 0Xh 82489DX - * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant - * 2Xh I/O(x)APIC which is PCI 2.2 Compliant - * 30h-FFh Reserved - * - * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic - * version as 0x2. This is an error with documentation and these ICH chips - * use io-apic's of version 0x20. - * - * For IO-APIC's with EOI register, we use that to do an explicit EOI. - * Otherwise, we simulate the EOI message manually by changing the trigger - * mode to edge and then back to level, with RTE being masked during this. -*/ -static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) -{ - struct irq_pin_list *entry; - unsigned long flags; - - raw_spin_lock_irqsave(&ioapic_lock, flags); - for_each_irq_pin(entry, cfg->irq_2_pin) { - if (mpc_ioapic_ver(entry->apic) >= 0x20) { - /* - * Intr-remapping uses pin number as the virtual vector - * in the RTE. Actual vector is programmed in - * intr-remapping table entry. Hence for the io-apic - * EOI we use the pin number. - */ - if (irq_remapped(cfg)) - io_apic_eoi(entry->apic, entry->pin); - else - io_apic_eoi(entry->apic, cfg->vector); - } else { - __mask_and_edge_IO_APIC_irq(entry); - __unmask_and_level_IO_APIC_irq(entry); - } - } - raw_spin_unlock_irqrestore(&ioapic_lock, flags); -} - static void ack_apic_level(struct irq_data *data) { struct irq_cfg *cfg = data->chip_data; @@ -2552,7 +2612,7 @@ static void ack_apic_level(struct irq_data *data) } } -#ifdef CONFIG_INTR_REMAP +#ifdef CONFIG_IRQ_REMAP static void ir_ack_apic_edge(struct irq_data *data) { ack_APIC_irq(); @@ -2563,7 +2623,23 @@ static void ir_ack_apic_level(struct irq_data *data) ack_APIC_irq(); eoi_ioapic_irq(data->irq, data->chip_data); } -#endif /* CONFIG_INTR_REMAP */ + +static void ir_print_prefix(struct irq_data *data, struct seq_file *p) +{ + seq_printf(p, " IR-%s", data->chip->name); +} + +static void irq_remap_modify_chip_defaults(struct irq_chip *chip) +{ + chip->irq_print_chip = ir_print_prefix; + chip->irq_ack = ir_ack_apic_edge; + chip->irq_eoi = ir_ack_apic_level; + +#ifdef CONFIG_SMP + chip->irq_set_affinity = ir_ioapic_set_affinity; +#endif +} +#endif /* CONFIG_IRQ_REMAP */ static struct irq_chip ioapic_chip __read_mostly = { .name = "IO-APIC", @@ -2578,21 +2654,6 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_retrigger = ioapic_retrigger_irq, }; -static struct irq_chip ir_ioapic_chip __read_mostly = { - .name = "IR-IO-APIC", - .irq_startup = startup_ioapic_irq, - .irq_mask = mask_ioapic_irq, - .irq_unmask = unmask_ioapic_irq, -#ifdef CONFIG_INTR_REMAP - .irq_ack = ir_ack_apic_edge, - .irq_eoi = ir_ack_apic_level, -#ifdef CONFIG_SMP - .irq_set_affinity = ir_ioapic_set_affinity, -#endif -#endif - .irq_retrigger = ioapic_retrigger_irq, -}; - static inline void init_IO_APIC_traps(void) { struct irq_cfg *cfg; @@ -2944,27 +3005,26 @@ static int __init io_apic_bug_finalize(void) late_initcall(io_apic_bug_finalize); -static void resume_ioapic_id(int ioapic_id) +static void resume_ioapic_id(int ioapic_idx) { unsigned long flags; union IO_APIC_reg_00 reg_00; - raw_spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(ioapic_id, 0); - if (reg_00.bits.ID != mpc_ioapic_id(ioapic_id)) { - reg_00.bits.ID = mpc_ioapic_id(ioapic_id); - io_apic_write(ioapic_id, 0, reg_00.raw); + reg_00.raw = io_apic_read(ioapic_idx, 0); + if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) { + reg_00.bits.ID = mpc_ioapic_id(ioapic_idx); + io_apic_write(ioapic_idx, 0, reg_00.raw); } raw_spin_unlock_irqrestore(&ioapic_lock, flags); } static void ioapic_resume(void) { - int ioapic_id; + int ioapic_idx; - for (ioapic_id = nr_ioapics - 1; ioapic_id >= 0; ioapic_id--) - resume_ioapic_id(ioapic_id); + for (ioapic_idx = nr_ioapics - 1; ioapic_idx >= 0; ioapic_idx--) + resume_ioapic_id(ioapic_idx); restore_ioapic_entries(); } @@ -3144,45 +3204,6 @@ msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) return 0; } -#ifdef CONFIG_INTR_REMAP -/* - * Migrate the MSI irq to another cpumask. This migration is - * done in the process context using interrupt-remapping hardware. - */ -static int -ir_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) -{ - struct irq_cfg *cfg = data->chip_data; - unsigned int dest, irq = data->irq; - struct irte irte; - - if (get_irte(irq, &irte)) - return -1; - - if (__ioapic_set_affinity(data, mask, &dest)) - return -1; - - irte.vector = cfg->vector; - irte.dest_id = IRTE_DEST(dest); - - /* - * atomically update the IRTE with the new destination and vector. - */ - modify_irte(irq, &irte); - - /* - * After this point, all the interrupts will start arriving - * at the new destination. So, time to cleanup the previous - * vector allocation. - */ - if (cfg->move_in_progress) - send_cleanup_vector(cfg); - - return 0; -} - -#endif #endif /* CONFIG_SMP */ /* @@ -3200,19 +3221,6 @@ static struct irq_chip msi_chip = { .irq_retrigger = ioapic_retrigger_irq, }; -static struct irq_chip msi_ir_chip = { - .name = "IR-PCI-MSI", - .irq_unmask = unmask_msi_irq, - .irq_mask = mask_msi_irq, -#ifdef CONFIG_INTR_REMAP - .irq_ack = ir_ack_apic_edge, -#ifdef CONFIG_SMP - .irq_set_affinity = ir_msi_set_affinity, -#endif -#endif - .irq_retrigger = ioapic_retrigger_irq, -}; - /* * Map the PCI dev to the corresponding remapping hardware unit * and allocate 'nvec' consecutive interrupt-remapping table entries @@ -3255,7 +3263,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) if (irq_remapped(irq_get_chip_data(irq))) { irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - chip = &msi_ir_chip; + irq_remap_modify_chip_defaults(chip); } irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); @@ -3328,7 +3336,7 @@ void native_teardown_msi_irq(unsigned int irq) destroy_irq(irq); } -#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) +#ifdef CONFIG_DMAR_TABLE #ifdef CONFIG_SMP static int dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, @@ -3409,19 +3417,6 @@ static int hpet_msi_set_affinity(struct irq_data *data, #endif /* CONFIG_SMP */ -static struct irq_chip ir_hpet_msi_type = { - .name = "IR-HPET_MSI", - .irq_unmask = hpet_msi_unmask, - .irq_mask = hpet_msi_mask, -#ifdef CONFIG_INTR_REMAP - .irq_ack = ir_ack_apic_edge, -#ifdef CONFIG_SMP - .irq_set_affinity = ir_msi_set_affinity, -#endif -#endif - .irq_retrigger = ioapic_retrigger_irq, -}; - static struct irq_chip hpet_msi_type = { .name = "HPET_MSI", .irq_unmask = hpet_msi_unmask, @@ -3458,7 +3453,7 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id) hpet_msi_write(irq_get_handler_data(irq), &msg); irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); if (irq_remapped(irq_get_chip_data(irq))) - chip = &ir_hpet_msi_type; + irq_remap_modify_chip_defaults(chip); irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); return 0; @@ -3566,26 +3561,25 @@ io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr) return -EINVAL; ret = __add_pin_to_irq_node(cfg, node, attr->ioapic, attr->ioapic_pin); if (!ret) - setup_ioapic_irq(attr->ioapic, attr->ioapic_pin, irq, cfg, - attr->trigger, attr->polarity); + setup_ioapic_irq(irq, cfg, attr); return ret; } int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr) { - unsigned int id = attr->ioapic, pin = attr->ioapic_pin; + unsigned int ioapic_idx = attr->ioapic, pin = attr->ioapic_pin; int ret; /* Avoid redundant programming */ - if (test_bit(pin, ioapics[id].pin_programmed)) { + if (test_bit(pin, ioapics[ioapic_idx].pin_programmed)) { pr_debug("Pin %d-%d already programmed\n", - mpc_ioapic_id(id), pin); + mpc_ioapic_id(ioapic_idx), pin); return 0; } ret = io_apic_setup_irq_pin(irq, node, attr); if (!ret) - set_bit(pin, ioapics[id].pin_programmed); + set_bit(pin, ioapics[ioapic_idx].pin_programmed); return ret; } @@ -3621,7 +3615,6 @@ int get_nr_irqs_gsi(void) return nr_irqs_gsi; } -#ifdef CONFIG_SPARSE_IRQ int __init arch_probe_nr_irqs(void) { int nr; @@ -3641,7 +3634,6 @@ int __init arch_probe_nr_irqs(void) return NR_IRQS_LEGACY; } -#endif int io_apic_set_pci_routing(struct device *dev, int irq, struct io_apic_irq_attr *irq_attr) diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index b5254ad044a..0787bb3412f 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -200,14 +200,8 @@ void __init default_setup_apic_routing(void) * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support */ - if (!cmdline_apic && apic == &apic_default) { - struct apic *bigsmp = generic_bigsmp_probe(); - if (bigsmp) { - apic = bigsmp; - printk(KERN_INFO "Overriding APIC driver with %s\n", - apic->name); - } - } + if (!cmdline_apic && apic == &apic_default) + generic_bigsmp_probe(); #endif if (apic->setup_apic_routing) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 34b18594e72..75be00ecfff 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -672,18 +672,11 @@ void __cpuinit uv_cpu_init(void) /* * When NMI is received, print a stack trace. */ -int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) +int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) { unsigned long real_uv_nmi; int bid; - if (reason != DIE_NMIUNKNOWN) - return NOTIFY_OK; - - if (in_crash_kexec) - /* do nothing if entering the crash kernel */ - return NOTIFY_OK; - /* * Each blade has an MMR that indicates when an NMI has been sent * to cpus on the blade. If an NMI is detected, atomically @@ -704,7 +697,7 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) } if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count)) - return NOTIFY_DONE; + return NMI_DONE; __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count; @@ -717,17 +710,12 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) dump_stack(); spin_unlock(&uv_nmi_lock); - return NOTIFY_STOP; + return NMI_HANDLED; } -static struct notifier_block uv_dump_stack_nmi_nb = { - .notifier_call = uv_handle_nmi, - .priority = NMI_LOCAL_LOW_PRIOR - 1, -}; - void uv_register_nmi_notifier(void) { - if (register_die_notifier(&uv_dump_stack_nmi_nb)) + if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv")) printk(KERN_WARNING "UV NMI handler failed to register\n"); } diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 6042981d030..fe6eb197f84 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -28,10 +28,15 @@ obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o +ifdef CONFIG_PERF_EVENTS +obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o +obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o +endif + obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ -obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o +obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o quiet_cmd_mkcapflags = MKCAP $@ cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 0ed633c5048..6199232161c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -78,27 +78,20 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs) static cpumask_var_t mce_inject_cpumask; -static int mce_raise_notify(struct notifier_block *self, - unsigned long val, void *data) +static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) { - struct die_args *args = (struct die_args *)data; int cpu = smp_processor_id(); struct mce *m = &__get_cpu_var(injectm); - if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) - return NOTIFY_DONE; + if (!cpumask_test_cpu(cpu, mce_inject_cpumask)) + return NMI_DONE; cpumask_clear_cpu(cpu, mce_inject_cpumask); if (m->inject_flags & MCJ_EXCEPTION) - raise_exception(m, args->regs); + raise_exception(m, regs); else if (m->status) raise_poll(m); - return NOTIFY_STOP; + return NMI_HANDLED; } -static struct notifier_block mce_raise_nb = { - .notifier_call = mce_raise_notify, - .priority = NMI_LOCAL_NORMAL_PRIOR, -}; - /* Inject mce on current CPU */ static int raise_local(void) { @@ -216,7 +209,8 @@ static int inject_init(void) return -ENOMEM; printk(KERN_INFO "Machine check injector initialized\n"); mce_chrdev_ops.write = mce_write; - register_die_notifier(&mce_raise_nb); + register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, + "mce_notify"); return 0; } diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 08363b04212..fce51ad1f36 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -908,9 +908,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) percpu_inc(mce_exception_count); - if (notify_die(DIE_NMI, "machine check", regs, error_code, - 18, SIGKILL) == NOTIFY_STOP) - goto out; if (!banks) goto out; @@ -1140,6 +1137,15 @@ static void mce_start_timer(unsigned long data) add_timer_on(t, smp_processor_id()); } +/* Must not be called in IRQ context where del_timer_sync() can deadlock */ +static void mce_timer_delete_all(void) +{ + int cpu; + + for_each_online_cpu(cpu) + del_timer_sync(&per_cpu(mce_timer, cpu)); +} + static void mce_do_trigger(struct work_struct *work) { call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT); @@ -1750,7 +1756,6 @@ static struct syscore_ops mce_syscore_ops = { static void mce_cpu_restart(void *data) { - del_timer_sync(&__get_cpu_var(mce_timer)); if (!mce_available(__this_cpu_ptr(&cpu_info))) return; __mcheck_cpu_init_generic(); @@ -1760,16 +1765,15 @@ static void mce_cpu_restart(void *data) /* Reinit MCEs after user configuration changes */ static void mce_restart(void) { + mce_timer_delete_all(); on_each_cpu(mce_cpu_restart, NULL, 1); } /* Toggle features for corrected errors */ -static void mce_disable_ce(void *all) +static void mce_disable_cmci(void *data) { if (!mce_available(__this_cpu_ptr(&cpu_info))) return; - if (all) - del_timer_sync(&__get_cpu_var(mce_timer)); cmci_clear(); } @@ -1852,7 +1856,8 @@ static ssize_t set_ignore_ce(struct sys_device *s, if (mce_ignore_ce ^ !!new) { if (new) { /* disable ce features */ - on_each_cpu(mce_disable_ce, (void *)1, 1); + mce_timer_delete_all(); + on_each_cpu(mce_disable_cmci, NULL, 1); mce_ignore_ce = 1; } else { /* enable ce features */ @@ -1875,7 +1880,7 @@ static ssize_t set_cmci_disabled(struct sys_device *s, if (mce_cmci_disabled ^ !!new) { if (new) { /* disable cmci */ - on_each_cpu(mce_disable_ce, NULL, 1); + on_each_cpu(mce_disable_cmci, NULL, 1); mce_cmci_disabled = 1; } else { /* enable cmci */ diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 8694ef56459..38e49bc95ff 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -28,7 +28,7 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); * cmci_discover_lock protects against parallel discovery attempts * which could race against each other. */ -static DEFINE_SPINLOCK(cmci_discover_lock); +static DEFINE_RAW_SPINLOCK(cmci_discover_lock); #define CMCI_THRESHOLD 1 @@ -85,7 +85,7 @@ static void cmci_discover(int banks, int boot) int hdr = 0; int i; - spin_lock_irqsave(&cmci_discover_lock, flags); + raw_spin_lock_irqsave(&cmci_discover_lock, flags); for (i = 0; i < banks; i++) { u64 val; @@ -116,7 +116,7 @@ static void cmci_discover(int banks, int boot) WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); } } - spin_unlock_irqrestore(&cmci_discover_lock, flags); + raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); if (hdr) printk(KERN_CONT "\n"); } @@ -150,7 +150,7 @@ void cmci_clear(void) if (!cmci_supported(&banks)) return; - spin_lock_irqsave(&cmci_discover_lock, flags); + raw_spin_lock_irqsave(&cmci_discover_lock, flags); for (i = 0; i < banks; i++) { if (!test_bit(i, __get_cpu_var(mce_banks_owned))) continue; @@ -160,7 +160,7 @@ void cmci_clear(void) wrmsrl(MSR_IA32_MCx_CTL2(i), val); __clear_bit(i, __get_cpu_var(mce_banks_owned)); } - spin_unlock_irqrestore(&cmci_discover_lock, flags); + raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); } /* diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index cfa62ec090e..640891014b2 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -32,6 +32,8 @@ #include <asm/smp.h> #include <asm/alternative.h> +#include "perf_event.h" + #if 0 #undef wrmsrl #define wrmsrl(msr, val) \ @@ -43,283 +45,17 @@ do { \ } while (0) #endif -/* - * | NHM/WSM | SNB | - * register ------------------------------- - * | HT | no HT | HT | no HT | - *----------------------------------------- - * offcore | core | core | cpu | core | - * lbr_sel | core | core | cpu | core | - * ld_lat | cpu | core | cpu | core | - *----------------------------------------- - * - * Given that there is a small number of shared regs, - * we can pre-allocate their slot in the per-cpu - * per-core reg tables. - */ -enum extra_reg_type { - EXTRA_REG_NONE = -1, /* not used */ - - EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ - EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ - - EXTRA_REG_MAX /* number of entries needed */ -}; - -struct event_constraint { - union { - unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - u64 idxmsk64; - }; - u64 code; - u64 cmask; - int weight; -}; - -struct amd_nb { - int nb_id; /* NorthBridge id */ - int refcnt; /* reference count */ - struct perf_event *owners[X86_PMC_IDX_MAX]; - struct event_constraint event_constraints[X86_PMC_IDX_MAX]; -}; - -struct intel_percore; - -#define MAX_LBR_ENTRIES 16 - -struct cpu_hw_events { - /* - * Generic x86 PMC bits - */ - struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ - unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - int enabled; - - int n_events; - int n_added; - int n_txn; - int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ - u64 tags[X86_PMC_IDX_MAX]; - struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ - - unsigned int group_flag; - - /* - * Intel DebugStore bits - */ - struct debug_store *ds; - u64 pebs_enabled; - - /* - * Intel LBR bits - */ - int lbr_users; - void *lbr_context; - struct perf_branch_stack lbr_stack; - struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; - - /* - * manage shared (per-core, per-cpu) registers - * used on Intel NHM/WSM/SNB - */ - struct intel_shared_regs *shared_regs; - - /* - * AMD specific bits - */ - struct amd_nb *amd_nb; -}; - -#define __EVENT_CONSTRAINT(c, n, m, w) {\ - { .idxmsk64 = (n) }, \ - .code = (c), \ - .cmask = (m), \ - .weight = (w), \ -} - -#define EVENT_CONSTRAINT(c, n, m) \ - __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) - -/* - * Constraint on the Event code. - */ -#define INTEL_EVENT_CONSTRAINT(c, n) \ - EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT) - -/* - * Constraint on the Event code + UMask + fixed-mask - * - * filter mask to validate fixed counter events. - * the following filters disqualify for fixed counters: - * - inv - * - edge - * - cnt-mask - * The other filters are supported by fixed counters. - * The any-thread option is supported starting with v3. - */ -#define FIXED_EVENT_CONSTRAINT(c, n) \ - EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK) - -/* - * Constraint on the Event code + UMask - */ -#define INTEL_UEVENT_CONSTRAINT(c, n) \ - EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) - -#define EVENT_CONSTRAINT_END \ - EVENT_CONSTRAINT(0, 0, 0) - -#define for_each_event_constraint(e, c) \ - for ((e) = (c); (e)->weight; (e)++) - -/* - * Per register state. - */ -struct er_account { - raw_spinlock_t lock; /* per-core: protect structure */ - u64 config; /* extra MSR config */ - u64 reg; /* extra MSR number */ - atomic_t ref; /* reference count */ -}; - -/* - * Extra registers for specific events. - * - * Some events need large masks and require external MSRs. - * Those extra MSRs end up being shared for all events on - * a PMU and sometimes between PMU of sibling HT threads. - * In either case, the kernel needs to handle conflicting - * accesses to those extra, shared, regs. The data structure - * to manage those registers is stored in cpu_hw_event. - */ -struct extra_reg { - unsigned int event; - unsigned int msr; - u64 config_mask; - u64 valid_mask; - int idx; /* per_xxx->regs[] reg index */ -}; - -#define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ - .event = (e), \ - .msr = (ms), \ - .config_mask = (m), \ - .valid_mask = (vm), \ - .idx = EXTRA_REG_##i \ - } - -#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ - EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) - -#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0) - -union perf_capabilities { - struct { - u64 lbr_format : 6; - u64 pebs_trap : 1; - u64 pebs_arch_reg : 1; - u64 pebs_format : 4; - u64 smm_freeze : 1; - }; - u64 capabilities; -}; - -/* - * struct x86_pmu - generic x86 pmu - */ -struct x86_pmu { - /* - * Generic x86 PMC bits - */ - const char *name; - int version; - int (*handle_irq)(struct pt_regs *); - void (*disable_all)(void); - void (*enable_all)(int added); - void (*enable)(struct perf_event *); - void (*disable)(struct perf_event *); - int (*hw_config)(struct perf_event *event); - int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); - unsigned eventsel; - unsigned perfctr; - u64 (*event_map)(int); - int max_events; - int num_counters; - int num_counters_fixed; - int cntval_bits; - u64 cntval_mask; - int apic; - u64 max_period; - struct event_constraint * - (*get_event_constraints)(struct cpu_hw_events *cpuc, - struct perf_event *event); - - void (*put_event_constraints)(struct cpu_hw_events *cpuc, - struct perf_event *event); - struct event_constraint *event_constraints; - void (*quirks)(void); - int perfctr_second_write; - - int (*cpu_prepare)(int cpu); - void (*cpu_starting)(int cpu); - void (*cpu_dying)(int cpu); - void (*cpu_dead)(int cpu); - - /* - * Intel Arch Perfmon v2+ - */ - u64 intel_ctrl; - union perf_capabilities intel_cap; +struct x86_pmu x86_pmu __read_mostly; - /* - * Intel DebugStore bits - */ - int bts, pebs; - int bts_active, pebs_active; - int pebs_record_size; - void (*drain_pebs)(struct pt_regs *regs); - struct event_constraint *pebs_constraints; - - /* - * Intel LBR - */ - unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ - int lbr_nr; /* hardware stack size */ - - /* - * Extra registers for events - */ - struct extra_reg *extra_regs; - unsigned int er_flags; -}; - -#define ERF_NO_HT_SHARING 1 -#define ERF_HAS_RSP_1 2 - -static struct x86_pmu x86_pmu __read_mostly; - -static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { +DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; -static int x86_perf_event_set_period(struct perf_event *event); - -/* - * Generalized hw caching related hw_event table, filled - * in on a per model basis. A value of 0 means - * 'not supported', -1 means 'hw_event makes no sense on - * this CPU', any other value means the raw hw_event - * ID. - */ - -#define C(x) PERF_COUNT_HW_CACHE_##x - -static u64 __read_mostly hw_cache_event_ids +u64 __read_mostly hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX]; -static u64 __read_mostly hw_cache_extra_regs +u64 __read_mostly hw_cache_extra_regs [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX]; @@ -329,8 +65,7 @@ static u64 __read_mostly hw_cache_extra_regs * Can only be executed on the CPU where the event is active. * Returns the delta events processed. */ -static u64 -x86_perf_event_update(struct perf_event *event) +u64 x86_perf_event_update(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; int shift = 64 - x86_pmu.cntval_bits; @@ -373,30 +108,6 @@ again: return new_raw_count; } -static inline int x86_pmu_addr_offset(int index) -{ - int offset; - - /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */ - alternative_io(ASM_NOP2, - "shll $1, %%eax", - X86_FEATURE_PERFCTR_CORE, - "=a" (offset), - "a" (index)); - - return offset; -} - -static inline unsigned int x86_pmu_config_addr(int index) -{ - return x86_pmu.eventsel + x86_pmu_addr_offset(index); -} - -static inline unsigned int x86_pmu_event_addr(int index) -{ - return x86_pmu.perfctr + x86_pmu_addr_offset(index); -} - /* * Find and validate any extra registers to set up. */ @@ -532,9 +243,6 @@ msr_fail: return false; } -static void reserve_ds_buffers(void); -static void release_ds_buffers(void); - static void hw_perf_event_destroy(struct perf_event *event) { if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { @@ -583,7 +291,7 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) return x86_pmu_extra_regs(val, event); } -static int x86_setup_perfctr(struct perf_event *event) +int x86_setup_perfctr(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; struct hw_perf_event *hwc = &event->hw; @@ -647,7 +355,7 @@ static int x86_setup_perfctr(struct perf_event *event) return 0; } -static int x86_pmu_hw_config(struct perf_event *event) +int x86_pmu_hw_config(struct perf_event *event) { if (event->attr.precise_ip) { int precise = 0; @@ -723,7 +431,7 @@ static int __x86_pmu_event_init(struct perf_event *event) return x86_pmu.hw_config(event); } -static void x86_pmu_disable_all(void) +void x86_pmu_disable_all(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; @@ -758,15 +466,7 @@ static void x86_pmu_disable(struct pmu *pmu) x86_pmu.disable_all(); } -static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, - u64 enable_mask) -{ - if (hwc->extra_reg.reg) - wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); - wrmsrl(hwc->config_base, hwc->config | enable_mask); -} - -static void x86_pmu_enable_all(int added) +void x86_pmu_enable_all(int added) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; @@ -788,7 +488,7 @@ static inline int is_x86_event(struct perf_event *event) return event->pmu == &pmu; } -static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) +int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) { struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; @@ -959,7 +659,6 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc, } static void x86_pmu_start(struct perf_event *event, int flags); -static void x86_pmu_stop(struct perf_event *event, int flags); static void x86_pmu_enable(struct pmu *pmu) { @@ -1031,21 +730,13 @@ static void x86_pmu_enable(struct pmu *pmu) x86_pmu.enable_all(added); } -static inline void x86_pmu_disable_event(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - - wrmsrl(hwc->config_base, hwc->config); -} - static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); /* * Set the next IRQ period, based on the hwc->period_left value. * To be called with the event disabled in hw: */ -static int -x86_perf_event_set_period(struct perf_event *event) +int x86_perf_event_set_period(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; s64 left = local64_read(&hwc->period_left); @@ -1105,7 +796,7 @@ x86_perf_event_set_period(struct perf_event *event) return ret; } -static void x86_pmu_enable_event(struct perf_event *event) +void x86_pmu_enable_event(struct perf_event *event) { if (__this_cpu_read(cpu_hw_events.enabled)) __x86_pmu_enable_event(&event->hw, @@ -1244,7 +935,7 @@ void perf_event_print_debug(void) local_irq_restore(flags); } -static void x86_pmu_stop(struct perf_event *event, int flags) +void x86_pmu_stop(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; @@ -1297,7 +988,7 @@ static void x86_pmu_del(struct perf_event *event, int flags) perf_event_update_userpage(event); } -static int x86_pmu_handle_irq(struct pt_regs *regs) +int x86_pmu_handle_irq(struct pt_regs *regs) { struct perf_sample_data data; struct cpu_hw_events *cpuc; @@ -1367,109 +1058,28 @@ void perf_events_lapic_init(void) apic_write(APIC_LVTPC, APIC_DM_NMI); } -struct pmu_nmi_state { - unsigned int marked; - int handled; -}; - -static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi); - static int __kprobes -perf_event_nmi_handler(struct notifier_block *self, - unsigned long cmd, void *__args) +perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) { - struct die_args *args = __args; - unsigned int this_nmi; - int handled; - if (!atomic_read(&active_events)) - return NOTIFY_DONE; - - switch (cmd) { - case DIE_NMI: - break; - case DIE_NMIUNKNOWN: - this_nmi = percpu_read(irq_stat.__nmi_count); - if (this_nmi != __this_cpu_read(pmu_nmi.marked)) - /* let the kernel handle the unknown nmi */ - return NOTIFY_DONE; - /* - * This one is a PMU back-to-back nmi. Two events - * trigger 'simultaneously' raising two back-to-back - * NMIs. If the first NMI handles both, the latter - * will be empty and daze the CPU. So, we drop it to - * avoid false-positive 'unknown nmi' messages. - */ - return NOTIFY_STOP; - default: - return NOTIFY_DONE; - } - - handled = x86_pmu.handle_irq(args->regs); - if (!handled) - return NOTIFY_DONE; - - this_nmi = percpu_read(irq_stat.__nmi_count); - if ((handled > 1) || - /* the next nmi could be a back-to-back nmi */ - ((__this_cpu_read(pmu_nmi.marked) == this_nmi) && - (__this_cpu_read(pmu_nmi.handled) > 1))) { - /* - * We could have two subsequent back-to-back nmis: The - * first handles more than one counter, the 2nd - * handles only one counter and the 3rd handles no - * counter. - * - * This is the 2nd nmi because the previous was - * handling more than one counter. We will mark the - * next (3rd) and then drop it if unhandled. - */ - __this_cpu_write(pmu_nmi.marked, this_nmi + 1); - __this_cpu_write(pmu_nmi.handled, handled); - } + return NMI_DONE; - return NOTIFY_STOP; + return x86_pmu.handle_irq(regs); } -static __read_mostly struct notifier_block perf_event_nmi_notifier = { - .notifier_call = perf_event_nmi_handler, - .next = NULL, - .priority = NMI_LOCAL_LOW_PRIOR, -}; - -static struct event_constraint unconstrained; -static struct event_constraint emptyconstraint; - -static struct event_constraint * -x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) -{ - struct event_constraint *c; - - if (x86_pmu.event_constraints) { - for_each_event_constraint(c, x86_pmu.event_constraints) { - if ((event->hw.config & c->cmask) == c->code) - return c; - } - } - - return &unconstrained; -} - -#include "perf_event_amd.c" -#include "perf_event_p6.c" -#include "perf_event_p4.c" -#include "perf_event_intel_lbr.c" -#include "perf_event_intel_ds.c" -#include "perf_event_intel.c" +struct event_constraint emptyconstraint; +struct event_constraint unconstrained; static int __cpuinit x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) { unsigned int cpu = (long)hcpu; + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); int ret = NOTIFY_OK; switch (action & ~CPU_TASKS_FROZEN) { case CPU_UP_PREPARE: + cpuc->kfree_on_online = NULL; if (x86_pmu.cpu_prepare) ret = x86_pmu.cpu_prepare(cpu); break; @@ -1479,6 +1089,10 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) x86_pmu.cpu_starting(cpu); break; + case CPU_ONLINE: + kfree(cpuc->kfree_on_online); + break; + case CPU_DYING: if (x86_pmu.cpu_dying) x86_pmu.cpu_dying(cpu); @@ -1557,7 +1171,7 @@ static int __init init_hw_perf_events(void) ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; perf_events_lapic_init(); - register_die_notifier(&perf_event_nmi_notifier); + register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI"); unconstrained = (struct event_constraint) __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h new file mode 100644 index 00000000000..b9698d40ac4 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event.h @@ -0,0 +1,505 @@ +/* + * Performance events x86 architecture header + * + * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> + * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2009 Jaswinder Singh Rajput + * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter + * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> + * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> + * Copyright (C) 2009 Google, Inc., Stephane Eranian + * + * For licencing details see kernel-base/COPYING + */ + +#include <linux/perf_event.h> + +/* + * | NHM/WSM | SNB | + * register ------------------------------- + * | HT | no HT | HT | no HT | + *----------------------------------------- + * offcore | core | core | cpu | core | + * lbr_sel | core | core | cpu | core | + * ld_lat | cpu | core | cpu | core | + *----------------------------------------- + * + * Given that there is a small number of shared regs, + * we can pre-allocate their slot in the per-cpu + * per-core reg tables. + */ +enum extra_reg_type { + EXTRA_REG_NONE = -1, /* not used */ + + EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ + EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ + + EXTRA_REG_MAX /* number of entries needed */ +}; + +struct event_constraint { + union { + unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + u64 idxmsk64; + }; + u64 code; + u64 cmask; + int weight; +}; + +struct amd_nb { + int nb_id; /* NorthBridge id */ + int refcnt; /* reference count */ + struct perf_event *owners[X86_PMC_IDX_MAX]; + struct event_constraint event_constraints[X86_PMC_IDX_MAX]; +}; + +/* The maximal number of PEBS events: */ +#define MAX_PEBS_EVENTS 4 + +/* + * A debug store configuration. + * + * We only support architectures that use 64bit fields. + */ +struct debug_store { + u64 bts_buffer_base; + u64 bts_index; + u64 bts_absolute_maximum; + u64 bts_interrupt_threshold; + u64 pebs_buffer_base; + u64 pebs_index; + u64 pebs_absolute_maximum; + u64 pebs_interrupt_threshold; + u64 pebs_event_reset[MAX_PEBS_EVENTS]; +}; + +/* + * Per register state. + */ +struct er_account { + raw_spinlock_t lock; /* per-core: protect structure */ + u64 config; /* extra MSR config */ + u64 reg; /* extra MSR number */ + atomic_t ref; /* reference count */ +}; + +/* + * Per core/cpu state + * + * Used to coordinate shared registers between HT threads or + * among events on a single PMU. + */ +struct intel_shared_regs { + struct er_account regs[EXTRA_REG_MAX]; + int refcnt; /* per-core: #HT threads */ + unsigned core_id; /* per-core: core id */ +}; + +#define MAX_LBR_ENTRIES 16 + +struct cpu_hw_events { + /* + * Generic x86 PMC bits + */ + struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ + unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + int enabled; + + int n_events; + int n_added; + int n_txn; + int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ + u64 tags[X86_PMC_IDX_MAX]; + struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ + + unsigned int group_flag; + + /* + * Intel DebugStore bits + */ + struct debug_store *ds; + u64 pebs_enabled; + + /* + * Intel LBR bits + */ + int lbr_users; + void *lbr_context; + struct perf_branch_stack lbr_stack; + struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; + + /* + * Intel host/guest exclude bits + */ + u64 intel_ctrl_guest_mask; + u64 intel_ctrl_host_mask; + struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX]; + + /* + * manage shared (per-core, per-cpu) registers + * used on Intel NHM/WSM/SNB + */ + struct intel_shared_regs *shared_regs; + + /* + * AMD specific bits + */ + struct amd_nb *amd_nb; + + void *kfree_on_online; +}; + +#define __EVENT_CONSTRAINT(c, n, m, w) {\ + { .idxmsk64 = (n) }, \ + .code = (c), \ + .cmask = (m), \ + .weight = (w), \ +} + +#define EVENT_CONSTRAINT(c, n, m) \ + __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) + +/* + * Constraint on the Event code. + */ +#define INTEL_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT) + +/* + * Constraint on the Event code + UMask + fixed-mask + * + * filter mask to validate fixed counter events. + * the following filters disqualify for fixed counters: + * - inv + * - edge + * - cnt-mask + * The other filters are supported by fixed counters. + * The any-thread option is supported starting with v3. + */ +#define FIXED_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK) + +/* + * Constraint on the Event code + UMask + */ +#define INTEL_UEVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) + +#define EVENT_CONSTRAINT_END \ + EVENT_CONSTRAINT(0, 0, 0) + +#define for_each_event_constraint(e, c) \ + for ((e) = (c); (e)->weight; (e)++) + +/* + * Extra registers for specific events. + * + * Some events need large masks and require external MSRs. + * Those extra MSRs end up being shared for all events on + * a PMU and sometimes between PMU of sibling HT threads. + * In either case, the kernel needs to handle conflicting + * accesses to those extra, shared, regs. The data structure + * to manage those registers is stored in cpu_hw_event. + */ +struct extra_reg { + unsigned int event; + unsigned int msr; + u64 config_mask; + u64 valid_mask; + int idx; /* per_xxx->regs[] reg index */ +}; + +#define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ + .event = (e), \ + .msr = (ms), \ + .config_mask = (m), \ + .valid_mask = (vm), \ + .idx = EXTRA_REG_##i \ + } + +#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ + EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) + +#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0) + +union perf_capabilities { + struct { + u64 lbr_format:6; + u64 pebs_trap:1; + u64 pebs_arch_reg:1; + u64 pebs_format:4; + u64 smm_freeze:1; + }; + u64 capabilities; +}; + +/* + * struct x86_pmu - generic x86 pmu + */ +struct x86_pmu { + /* + * Generic x86 PMC bits + */ + const char *name; + int version; + int (*handle_irq)(struct pt_regs *); + void (*disable_all)(void); + void (*enable_all)(int added); + void (*enable)(struct perf_event *); + void (*disable)(struct perf_event *); + int (*hw_config)(struct perf_event *event); + int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); + unsigned eventsel; + unsigned perfctr; + u64 (*event_map)(int); + int max_events; + int num_counters; + int num_counters_fixed; + int cntval_bits; + u64 cntval_mask; + int apic; + u64 max_period; + struct event_constraint * + (*get_event_constraints)(struct cpu_hw_events *cpuc, + struct perf_event *event); + + void (*put_event_constraints)(struct cpu_hw_events *cpuc, + struct perf_event *event); + struct event_constraint *event_constraints; + void (*quirks)(void); + int perfctr_second_write; + + int (*cpu_prepare)(int cpu); + void (*cpu_starting)(int cpu); + void (*cpu_dying)(int cpu); + void (*cpu_dead)(int cpu); + + /* + * Intel Arch Perfmon v2+ + */ + u64 intel_ctrl; + union perf_capabilities intel_cap; + + /* + * Intel DebugStore bits + */ + int bts, pebs; + int bts_active, pebs_active; + int pebs_record_size; + void (*drain_pebs)(struct pt_regs *regs); + struct event_constraint *pebs_constraints; + + /* + * Intel LBR + */ + unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ + int lbr_nr; /* hardware stack size */ + + /* + * Extra registers for events + */ + struct extra_reg *extra_regs; + unsigned int er_flags; + + /* + * Intel host/guest support (KVM) + */ + struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); +}; + +#define ERF_NO_HT_SHARING 1 +#define ERF_HAS_RSP_1 2 + +extern struct x86_pmu x86_pmu __read_mostly; + +DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events); + +int x86_perf_event_set_period(struct perf_event *event); + +/* + * Generalized hw caching related hw_event table, filled + * in on a per model basis. A value of 0 means + * 'not supported', -1 means 'hw_event makes no sense on + * this CPU', any other value means the raw hw_event + * ID. + */ + +#define C(x) PERF_COUNT_HW_CACHE_##x + +extern u64 __read_mostly hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; +extern u64 __read_mostly hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + +u64 x86_perf_event_update(struct perf_event *event); + +static inline int x86_pmu_addr_offset(int index) +{ + int offset; + + /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */ + alternative_io(ASM_NOP2, + "shll $1, %%eax", + X86_FEATURE_PERFCTR_CORE, + "=a" (offset), + "a" (index)); + + return offset; +} + +static inline unsigned int x86_pmu_config_addr(int index) +{ + return x86_pmu.eventsel + x86_pmu_addr_offset(index); +} + +static inline unsigned int x86_pmu_event_addr(int index) +{ + return x86_pmu.perfctr + x86_pmu_addr_offset(index); +} + +int x86_setup_perfctr(struct perf_event *event); + +int x86_pmu_hw_config(struct perf_event *event); + +void x86_pmu_disable_all(void); + +static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, + u64 enable_mask) +{ + if (hwc->extra_reg.reg) + wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); + wrmsrl(hwc->config_base, hwc->config | enable_mask); +} + +void x86_pmu_enable_all(int added); + +int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); + +void x86_pmu_stop(struct perf_event *event, int flags); + +static inline void x86_pmu_disable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + wrmsrl(hwc->config_base, hwc->config); +} + +void x86_pmu_enable_event(struct perf_event *event); + +int x86_pmu_handle_irq(struct pt_regs *regs); + +extern struct event_constraint emptyconstraint; + +extern struct event_constraint unconstrained; + +#ifdef CONFIG_CPU_SUP_AMD + +int amd_pmu_init(void); + +#else /* CONFIG_CPU_SUP_AMD */ + +static inline int amd_pmu_init(void) +{ + return 0; +} + +#endif /* CONFIG_CPU_SUP_AMD */ + +#ifdef CONFIG_CPU_SUP_INTEL + +int intel_pmu_save_and_restart(struct perf_event *event); + +struct event_constraint * +x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event); + +struct intel_shared_regs *allocate_shared_regs(int cpu); + +int intel_pmu_init(void); + +void init_debug_store_on_cpu(int cpu); + +void fini_debug_store_on_cpu(int cpu); + +void release_ds_buffers(void); + +void reserve_ds_buffers(void); + +extern struct event_constraint bts_constraint; + +void intel_pmu_enable_bts(u64 config); + +void intel_pmu_disable_bts(void); + +int intel_pmu_drain_bts_buffer(void); + +extern struct event_constraint intel_core2_pebs_event_constraints[]; + +extern struct event_constraint intel_atom_pebs_event_constraints[]; + +extern struct event_constraint intel_nehalem_pebs_event_constraints[]; + +extern struct event_constraint intel_westmere_pebs_event_constraints[]; + +extern struct event_constraint intel_snb_pebs_event_constraints[]; + +struct event_constraint *intel_pebs_constraints(struct perf_event *event); + +void intel_pmu_pebs_enable(struct perf_event *event); + +void intel_pmu_pebs_disable(struct perf_event *event); + +void intel_pmu_pebs_enable_all(void); + +void intel_pmu_pebs_disable_all(void); + +void intel_ds_init(void); + +void intel_pmu_lbr_reset(void); + +void intel_pmu_lbr_enable(struct perf_event *event); + +void intel_pmu_lbr_disable(struct perf_event *event); + +void intel_pmu_lbr_enable_all(void); + +void intel_pmu_lbr_disable_all(void); + +void intel_pmu_lbr_read(void); + +void intel_pmu_lbr_init_core(void); + +void intel_pmu_lbr_init_nhm(void); + +void intel_pmu_lbr_init_atom(void); + +int p4_pmu_init(void); + +int p6_pmu_init(void); + +#else /* CONFIG_CPU_SUP_INTEL */ + +static inline void reserve_ds_buffers(void) +{ +} + +static inline void release_ds_buffers(void) +{ +} + +static inline int intel_pmu_init(void) +{ + return 0; +} + +static inline struct intel_shared_regs *allocate_shared_regs(int cpu) +{ + return NULL; +} + +#endif /* CONFIG_CPU_SUP_INTEL */ diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 941caa2e449..aeefd45697a 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -1,4 +1,10 @@ -#ifdef CONFIG_CPU_SUP_AMD +#include <linux/perf_event.h> +#include <linux/types.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <asm/apicdef.h> + +#include "perf_event.h" static __initconst const u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] @@ -132,6 +138,19 @@ static int amd_pmu_hw_config(struct perf_event *event) if (ret) return ret; + if (event->attr.exclude_host && event->attr.exclude_guest) + /* + * When HO == GO == 1 the hardware treats that as GO == HO == 0 + * and will count in both modes. We don't want to count in that + * case so we emulate no-counting by setting US = OS = 0. + */ + event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | + ARCH_PERFMON_EVENTSEL_OS); + else if (event->attr.exclude_host) + event->hw.config |= AMD_PERFMON_EVENTSEL_GUESTONLY; + else if (event->attr.exclude_guest) + event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY; + if (event->attr.type != PERF_TYPE_RAW) return 0; @@ -350,7 +369,7 @@ static void amd_pmu_cpu_starting(int cpu) continue; if (nb->nb_id == nb_id) { - kfree(cpuc->amd_nb); + cpuc->kfree_on_online = cpuc->amd_nb; cpuc->amd_nb = nb; break; } @@ -392,7 +411,7 @@ static __initconst const struct x86_pmu amd_pmu = { .perfctr = MSR_K7_PERFCTR0, .event_map = amd_pmu_event_map, .max_events = ARRAY_SIZE(amd_perfmon_event_map), - .num_counters = 4, + .num_counters = AMD64_NUM_COUNTERS, .cntval_bits = 48, .cntval_mask = (1ULL << 48) - 1, .apic = 1, @@ -556,7 +575,7 @@ static __initconst const struct x86_pmu amd_pmu_f15h = { .perfctr = MSR_F15H_PERF_CTR, .event_map = amd_pmu_event_map, .max_events = ARRAY_SIZE(amd_perfmon_event_map), - .num_counters = 6, + .num_counters = AMD64_NUM_COUNTERS_F15H, .cntval_bits = 48, .cntval_mask = (1ULL << 48) - 1, .apic = 1, @@ -573,7 +592,7 @@ static __initconst const struct x86_pmu amd_pmu_f15h = { #endif }; -static __init int amd_pmu_init(void) +__init int amd_pmu_init(void) { /* Performance-monitoring supported from K7 and later: */ if (boot_cpu_data.x86 < 6) @@ -602,12 +621,3 @@ static __init int amd_pmu_init(void) return 0; } - -#else /* CONFIG_CPU_SUP_AMD */ - -static int amd_pmu_init(void) -{ - return 0; -} - -#endif diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c new file mode 100644 index 00000000000..ab6343d2182 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c @@ -0,0 +1,294 @@ +/* + * Performance events - AMD IBS + * + * Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter + * + * For licencing details see kernel-base/COPYING + */ + +#include <linux/perf_event.h> +#include <linux/module.h> +#include <linux/pci.h> + +#include <asm/apic.h> + +static u32 ibs_caps; + +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) + +static struct pmu perf_ibs; + +static int perf_ibs_init(struct perf_event *event) +{ + if (perf_ibs.type != event->attr.type) + return -ENOENT; + return 0; +} + +static int perf_ibs_add(struct perf_event *event, int flags) +{ + return 0; +} + +static void perf_ibs_del(struct perf_event *event, int flags) +{ +} + +static struct pmu perf_ibs = { + .event_init= perf_ibs_init, + .add= perf_ibs_add, + .del= perf_ibs_del, +}; + +static __init int perf_event_ibs_init(void) +{ + if (!ibs_caps) + return -ENODEV; /* ibs not supported by the cpu */ + + perf_pmu_register(&perf_ibs, "ibs", -1); + printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps); + + return 0; +} + +#else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */ + +static __init int perf_event_ibs_init(void) { return 0; } + +#endif + +/* IBS - apic initialization, for perf and oprofile */ + +static __init u32 __get_ibs_caps(void) +{ + u32 caps; + unsigned int max_level; + + if (!boot_cpu_has(X86_FEATURE_IBS)) + return 0; + + /* check IBS cpuid feature flags */ + max_level = cpuid_eax(0x80000000); + if (max_level < IBS_CPUID_FEATURES) + return IBS_CAPS_DEFAULT; + + caps = cpuid_eax(IBS_CPUID_FEATURES); + if (!(caps & IBS_CAPS_AVAIL)) + /* cpuid flags not valid */ + return IBS_CAPS_DEFAULT; + + return caps; +} + +u32 get_ibs_caps(void) +{ + return ibs_caps; +} + +EXPORT_SYMBOL(get_ibs_caps); + +static inline int get_eilvt(int offset) +{ + return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1); +} + +static inline int put_eilvt(int offset) +{ + return !setup_APIC_eilvt(offset, 0, 0, 1); +} + +/* + * Check and reserve APIC extended interrupt LVT offset for IBS if available. + */ +static inline int ibs_eilvt_valid(void) +{ + int offset; + u64 val; + int valid = 0; + + preempt_disable(); + + rdmsrl(MSR_AMD64_IBSCTL, val); + offset = val & IBSCTL_LVT_OFFSET_MASK; + + if (!(val & IBSCTL_LVT_OFFSET_VALID)) { + pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n", + smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); + goto out; + } + + if (!get_eilvt(offset)) { + pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n", + smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); + goto out; + } + + valid = 1; +out: + preempt_enable(); + + return valid; +} + +static int setup_ibs_ctl(int ibs_eilvt_off) +{ + struct pci_dev *cpu_cfg; + int nodes; + u32 value = 0; + + nodes = 0; + cpu_cfg = NULL; + do { + cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD, + PCI_DEVICE_ID_AMD_10H_NB_MISC, + cpu_cfg); + if (!cpu_cfg) + break; + ++nodes; + pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off + | IBSCTL_LVT_OFFSET_VALID); + pci_read_config_dword(cpu_cfg, IBSCTL, &value); + if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) { + pci_dev_put(cpu_cfg); + printk(KERN_DEBUG "Failed to setup IBS LVT offset, " + "IBSCTL = 0x%08x\n", value); + return -EINVAL; + } + } while (1); + + if (!nodes) { + printk(KERN_DEBUG "No CPU node configured for IBS\n"); + return -ENODEV; + } + + return 0; +} + +/* + * This runs only on the current cpu. We try to find an LVT offset and + * setup the local APIC. For this we must disable preemption. On + * success we initialize all nodes with this offset. This updates then + * the offset in the IBS_CTL per-node msr. The per-core APIC setup of + * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that + * is using the new offset. + */ +static int force_ibs_eilvt_setup(void) +{ + int offset; + int ret; + + preempt_disable(); + /* find the next free available EILVT entry, skip offset 0 */ + for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) { + if (get_eilvt(offset)) + break; + } + preempt_enable(); + + if (offset == APIC_EILVT_NR_MAX) { + printk(KERN_DEBUG "No EILVT entry available\n"); + return -EBUSY; + } + + ret = setup_ibs_ctl(offset); + if (ret) + goto out; + + if (!ibs_eilvt_valid()) { + ret = -EFAULT; + goto out; + } + + pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset); + pr_err(FW_BUG "workaround enabled for IBS LVT offset\n"); + + return 0; +out: + preempt_disable(); + put_eilvt(offset); + preempt_enable(); + return ret; +} + +static inline int get_ibs_lvt_offset(void) +{ + u64 val; + + rdmsrl(MSR_AMD64_IBSCTL, val); + if (!(val & IBSCTL_LVT_OFFSET_VALID)) + return -EINVAL; + + return val & IBSCTL_LVT_OFFSET_MASK; +} + +static void setup_APIC_ibs(void *dummy) +{ + int offset; + + offset = get_ibs_lvt_offset(); + if (offset < 0) + goto failed; + + if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0)) + return; +failed: + pr_warn("perf: IBS APIC setup failed on cpu #%d\n", + smp_processor_id()); +} + +static void clear_APIC_ibs(void *dummy) +{ + int offset; + + offset = get_ibs_lvt_offset(); + if (offset >= 0) + setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1); +} + +static int __cpuinit +perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) +{ + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_STARTING: + setup_APIC_ibs(NULL); + break; + case CPU_DYING: + clear_APIC_ibs(NULL); + break; + default: + break; + } + + return NOTIFY_OK; +} + +static __init int amd_ibs_init(void) +{ + u32 caps; + int ret; + + caps = __get_ibs_caps(); + if (!caps) + return -ENODEV; /* ibs not supported by the cpu */ + + if (!ibs_eilvt_valid()) { + ret = force_ibs_eilvt_setup(); + if (ret) { + pr_err("Failed to setup IBS, %d\n", ret); + return ret; + } + } + + get_online_cpus(); + ibs_caps = caps; + /* make ibs_caps visible to other cpus: */ + smp_mb(); + perf_cpu_notifier(perf_ibs_cpu_notifier); + smp_call_function(setup_APIC_ibs, NULL, 1); + put_online_cpus(); + + return perf_event_ibs_init(); +} + +/* Since we need the pci subsystem to init ibs we can't do this earlier: */ +device_initcall(amd_ibs_init); diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index f88af2c2a56..e09ca20e86e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1,16 +1,19 @@ -#ifdef CONFIG_CPU_SUP_INTEL - /* * Per core/cpu state * * Used to coordinate shared registers between HT threads or * among events on a single PMU. */ -struct intel_shared_regs { - struct er_account regs[EXTRA_REG_MAX]; - int refcnt; /* per-core: #HT threads */ - unsigned core_id; /* per-core: core id */ -}; + +#include <linux/stddef.h> +#include <linux/types.h> +#include <linux/init.h> +#include <linux/slab.h> + +#include <asm/hardirq.h> +#include <asm/apic.h> + +#include "perf_event.h" /* * Intel PerfMon, used on Core and later. @@ -746,7 +749,8 @@ static void intel_pmu_enable_all(int added) intel_pmu_pebs_enable_all(); intel_pmu_lbr_enable_all(); - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, + x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask); if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { struct perf_event *event = @@ -869,6 +873,7 @@ static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) static void intel_pmu_disable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { intel_pmu_disable_bts(); @@ -876,6 +881,9 @@ static void intel_pmu_disable_event(struct perf_event *event) return; } + cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); + cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { intel_pmu_disable_fixed(hwc); return; @@ -921,6 +929,7 @@ static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) static void intel_pmu_enable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { if (!__this_cpu_read(cpu_hw_events.enabled)) @@ -930,6 +939,11 @@ static void intel_pmu_enable_event(struct perf_event *event) return; } + if (event->attr.exclude_host) + cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); + if (event->attr.exclude_guest) + cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx); + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { intel_pmu_enable_fixed(hwc); return; @@ -945,7 +959,7 @@ static void intel_pmu_enable_event(struct perf_event *event) * Save and restart an expired event. Called by NMI contexts, * so it has to be careful about preempting normal event ops: */ -static int intel_pmu_save_and_restart(struct perf_event *event) +int intel_pmu_save_and_restart(struct perf_event *event) { x86_perf_event_update(event); return x86_perf_event_set_period(event); @@ -1197,6 +1211,21 @@ intel_shared_regs_constraints(struct cpu_hw_events *cpuc, return c; } +struct event_constraint * +x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) +{ + struct event_constraint *c; + + if (x86_pmu.event_constraints) { + for_each_event_constraint(c, x86_pmu.event_constraints) { + if ((event->hw.config & c->cmask) == c->code) + return c; + } + } + + return &unconstrained; +} + static struct event_constraint * intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) { @@ -1284,12 +1313,84 @@ static int intel_pmu_hw_config(struct perf_event *event) return 0; } +struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr) +{ + if (x86_pmu.guest_get_msrs) + return x86_pmu.guest_get_msrs(nr); + *nr = 0; + return NULL; +} +EXPORT_SYMBOL_GPL(perf_guest_get_msrs); + +static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; + + arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; + arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask; + arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask; + + *nr = 1; + return arr; +} + +static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; + int idx; + + for (idx = 0; idx < x86_pmu.num_counters; idx++) { + struct perf_event *event = cpuc->events[idx]; + + arr[idx].msr = x86_pmu_config_addr(idx); + arr[idx].host = arr[idx].guest = 0; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + arr[idx].host = arr[idx].guest = + event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE; + + if (event->attr.exclude_host) + arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE; + else if (event->attr.exclude_guest) + arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE; + } + + *nr = x86_pmu.num_counters; + return arr; +} + +static void core_pmu_enable_event(struct perf_event *event) +{ + if (!event->attr.exclude_host) + x86_pmu_enable_event(event); +} + +static void core_pmu_enable_all(int added) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int idx; + + for (idx = 0; idx < x86_pmu.num_counters; idx++) { + struct hw_perf_event *hwc = &cpuc->events[idx]->hw; + + if (!test_bit(idx, cpuc->active_mask) || + cpuc->events[idx]->attr.exclude_host) + continue; + + __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); + } +} + static __initconst const struct x86_pmu core_pmu = { .name = "core", .handle_irq = x86_pmu_handle_irq, .disable_all = x86_pmu_disable_all, - .enable_all = x86_pmu_enable_all, - .enable = x86_pmu_enable_event, + .enable_all = core_pmu_enable_all, + .enable = core_pmu_enable_event, .disable = x86_pmu_disable_event, .hw_config = x86_pmu_hw_config, .schedule_events = x86_schedule_events, @@ -1307,9 +1408,10 @@ static __initconst const struct x86_pmu core_pmu = { .get_event_constraints = intel_get_event_constraints, .put_event_constraints = intel_put_event_constraints, .event_constraints = intel_core_event_constraints, + .guest_get_msrs = core_guest_get_msrs, }; -static struct intel_shared_regs *allocate_shared_regs(int cpu) +struct intel_shared_regs *allocate_shared_regs(int cpu) { struct intel_shared_regs *regs; int i; @@ -1362,7 +1464,7 @@ static void intel_pmu_cpu_starting(int cpu) pc = per_cpu(cpu_hw_events, i).shared_regs; if (pc && pc->core_id == core_id) { - kfree(cpuc->shared_regs); + cpuc->kfree_on_online = cpuc->shared_regs; cpuc->shared_regs = pc; break; } @@ -1413,6 +1515,7 @@ static __initconst const struct x86_pmu intel_pmu = { .cpu_prepare = intel_pmu_cpu_prepare, .cpu_starting = intel_pmu_cpu_starting, .cpu_dying = intel_pmu_cpu_dying, + .guest_get_msrs = intel_guest_get_msrs, }; static void intel_clovertown_quirks(void) @@ -1441,7 +1544,7 @@ static void intel_clovertown_quirks(void) x86_pmu.pebs_constraints = NULL; } -static __init int intel_pmu_init(void) +__init int intel_pmu_init(void) { union cpuid10_edx edx; union cpuid10_eax eax; @@ -1597,7 +1700,7 @@ static __init int intel_pmu_init(void) intel_pmu_lbr_init_nhm(); x86_pmu.event_constraints = intel_snb_event_constraints; - x86_pmu.pebs_constraints = intel_snb_pebs_events; + x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; x86_pmu.extra_regs = intel_snb_extra_regs; /* all extra regs are per-cpu when HT is on */ x86_pmu.er_flags |= ERF_HAS_RSP_1; @@ -1628,16 +1731,3 @@ static __init int intel_pmu_init(void) } return 0; } - -#else /* CONFIG_CPU_SUP_INTEL */ - -static int intel_pmu_init(void) -{ - return 0; -} - -static struct intel_shared_regs *allocate_shared_regs(int cpu) -{ - return NULL; -} -#endif /* CONFIG_CPU_SUP_INTEL */ diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 1b1ef3addcf..c0d238f49db 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -1,7 +1,10 @@ -#ifdef CONFIG_CPU_SUP_INTEL +#include <linux/bitops.h> +#include <linux/types.h> +#include <linux/slab.h> -/* The maximal number of PEBS events: */ -#define MAX_PEBS_EVENTS 4 +#include <asm/perf_event.h> + +#include "perf_event.h" /* The size of a BTS record in bytes: */ #define BTS_RECORD_SIZE 24 @@ -37,24 +40,7 @@ struct pebs_record_nhm { u64 status, dla, dse, lat; }; -/* - * A debug store configuration. - * - * We only support architectures that use 64bit fields. - */ -struct debug_store { - u64 bts_buffer_base; - u64 bts_index; - u64 bts_absolute_maximum; - u64 bts_interrupt_threshold; - u64 pebs_buffer_base; - u64 pebs_index; - u64 pebs_absolute_maximum; - u64 pebs_interrupt_threshold; - u64 pebs_event_reset[MAX_PEBS_EVENTS]; -}; - -static void init_debug_store_on_cpu(int cpu) +void init_debug_store_on_cpu(int cpu) { struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; @@ -66,7 +52,7 @@ static void init_debug_store_on_cpu(int cpu) (u32)((u64)(unsigned long)ds >> 32)); } -static void fini_debug_store_on_cpu(int cpu) +void fini_debug_store_on_cpu(int cpu) { if (!per_cpu(cpu_hw_events, cpu).ds) return; @@ -175,7 +161,7 @@ static void release_ds_buffer(int cpu) kfree(ds); } -static void release_ds_buffers(void) +void release_ds_buffers(void) { int cpu; @@ -194,7 +180,7 @@ static void release_ds_buffers(void) put_online_cpus(); } -static void reserve_ds_buffers(void) +void reserve_ds_buffers(void) { int bts_err = 0, pebs_err = 0; int cpu; @@ -260,10 +246,10 @@ static void reserve_ds_buffers(void) * BTS */ -static struct event_constraint bts_constraint = +struct event_constraint bts_constraint = EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); -static void intel_pmu_enable_bts(u64 config) +void intel_pmu_enable_bts(u64 config) { unsigned long debugctlmsr; @@ -282,7 +268,7 @@ static void intel_pmu_enable_bts(u64 config) update_debugctlmsr(debugctlmsr); } -static void intel_pmu_disable_bts(void) +void intel_pmu_disable_bts(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); unsigned long debugctlmsr; @@ -299,7 +285,7 @@ static void intel_pmu_disable_bts(void) update_debugctlmsr(debugctlmsr); } -static int intel_pmu_drain_bts_buffer(void) +int intel_pmu_drain_bts_buffer(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct debug_store *ds = cpuc->ds; @@ -361,7 +347,7 @@ static int intel_pmu_drain_bts_buffer(void) /* * PEBS */ -static struct event_constraint intel_core2_pebs_event_constraints[] = { +struct event_constraint intel_core2_pebs_event_constraints[] = { INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ INTEL_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ @@ -370,14 +356,14 @@ static struct event_constraint intel_core2_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; -static struct event_constraint intel_atom_pebs_event_constraints[] = { +struct event_constraint intel_atom_pebs_event_constraints[] = { INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ EVENT_CONSTRAINT_END }; -static struct event_constraint intel_nehalem_pebs_event_constraints[] = { +struct event_constraint intel_nehalem_pebs_event_constraints[] = { INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ @@ -392,7 +378,7 @@ static struct event_constraint intel_nehalem_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; -static struct event_constraint intel_westmere_pebs_event_constraints[] = { +struct event_constraint intel_westmere_pebs_event_constraints[] = { INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ @@ -407,7 +393,7 @@ static struct event_constraint intel_westmere_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; -static struct event_constraint intel_snb_pebs_events[] = { +struct event_constraint intel_snb_pebs_event_constraints[] = { INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ @@ -428,8 +414,7 @@ static struct event_constraint intel_snb_pebs_events[] = { EVENT_CONSTRAINT_END }; -static struct event_constraint * -intel_pebs_constraints(struct perf_event *event) +struct event_constraint *intel_pebs_constraints(struct perf_event *event) { struct event_constraint *c; @@ -446,7 +431,7 @@ intel_pebs_constraints(struct perf_event *event) return &emptyconstraint; } -static void intel_pmu_pebs_enable(struct perf_event *event) +void intel_pmu_pebs_enable(struct perf_event *event) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; @@ -460,7 +445,7 @@ static void intel_pmu_pebs_enable(struct perf_event *event) intel_pmu_lbr_enable(event); } -static void intel_pmu_pebs_disable(struct perf_event *event) +void intel_pmu_pebs_disable(struct perf_event *event) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; @@ -475,7 +460,7 @@ static void intel_pmu_pebs_disable(struct perf_event *event) intel_pmu_lbr_disable(event); } -static void intel_pmu_pebs_enable_all(void) +void intel_pmu_pebs_enable_all(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -483,7 +468,7 @@ static void intel_pmu_pebs_enable_all(void) wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); } -static void intel_pmu_pebs_disable_all(void) +void intel_pmu_pebs_disable_all(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -576,8 +561,6 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) return 0; } -static int intel_pmu_save_and_restart(struct perf_event *event); - static void __intel_pmu_pebs_event(struct perf_event *event, struct pt_regs *iregs, void *__pebs) { @@ -716,7 +699,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) * BTS, PEBS probe and setup */ -static void intel_ds_init(void) +void intel_ds_init(void) { /* * No support for 32bit formats @@ -749,15 +732,3 @@ static void intel_ds_init(void) } } } - -#else /* CONFIG_CPU_SUP_INTEL */ - -static void reserve_ds_buffers(void) -{ -} - -static void release_ds_buffers(void) -{ -} - -#endif /* CONFIG_CPU_SUP_INTEL */ diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index d202c1bece1..3fab3de3ce9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -1,4 +1,10 @@ -#ifdef CONFIG_CPU_SUP_INTEL +#include <linux/perf_event.h> +#include <linux/types.h> + +#include <asm/perf_event.h> +#include <asm/msr.h> + +#include "perf_event.h" enum { LBR_FORMAT_32 = 0x00, @@ -48,7 +54,7 @@ static void intel_pmu_lbr_reset_64(void) } } -static void intel_pmu_lbr_reset(void) +void intel_pmu_lbr_reset(void) { if (!x86_pmu.lbr_nr) return; @@ -59,7 +65,7 @@ static void intel_pmu_lbr_reset(void) intel_pmu_lbr_reset_64(); } -static void intel_pmu_lbr_enable(struct perf_event *event) +void intel_pmu_lbr_enable(struct perf_event *event) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -81,7 +87,7 @@ static void intel_pmu_lbr_enable(struct perf_event *event) cpuc->lbr_users++; } -static void intel_pmu_lbr_disable(struct perf_event *event) +void intel_pmu_lbr_disable(struct perf_event *event) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -95,7 +101,7 @@ static void intel_pmu_lbr_disable(struct perf_event *event) __intel_pmu_lbr_disable(); } -static void intel_pmu_lbr_enable_all(void) +void intel_pmu_lbr_enable_all(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -103,7 +109,7 @@ static void intel_pmu_lbr_enable_all(void) __intel_pmu_lbr_enable(); } -static void intel_pmu_lbr_disable_all(void) +void intel_pmu_lbr_disable_all(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -178,7 +184,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) cpuc->lbr_stack.nr = i; } -static void intel_pmu_lbr_read(void) +void intel_pmu_lbr_read(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -191,7 +197,7 @@ static void intel_pmu_lbr_read(void) intel_pmu_lbr_read_64(cpuc); } -static void intel_pmu_lbr_init_core(void) +void intel_pmu_lbr_init_core(void) { x86_pmu.lbr_nr = 4; x86_pmu.lbr_tos = 0x01c9; @@ -199,7 +205,7 @@ static void intel_pmu_lbr_init_core(void) x86_pmu.lbr_to = 0x60; } -static void intel_pmu_lbr_init_nhm(void) +void intel_pmu_lbr_init_nhm(void) { x86_pmu.lbr_nr = 16; x86_pmu.lbr_tos = 0x01c9; @@ -207,12 +213,10 @@ static void intel_pmu_lbr_init_nhm(void) x86_pmu.lbr_to = 0x6c0; } -static void intel_pmu_lbr_init_atom(void) +void intel_pmu_lbr_init_atom(void) { x86_pmu.lbr_nr = 8; x86_pmu.lbr_tos = 0x01c9; x86_pmu.lbr_from = 0x40; x86_pmu.lbr_to = 0x60; } - -#endif /* CONFIG_CPU_SUP_INTEL */ diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 7809d2bcb20..492bf1358a7 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -7,9 +7,13 @@ * For licencing details see kernel-base/COPYING */ -#ifdef CONFIG_CPU_SUP_INTEL +#include <linux/perf_event.h> #include <asm/perf_event_p4.h> +#include <asm/hardirq.h> +#include <asm/apic.h> + +#include "perf_event.h" #define P4_CNTR_LIMIT 3 /* @@ -1303,7 +1307,7 @@ static __initconst const struct x86_pmu p4_pmu = { .perfctr_second_write = 1, }; -static __init int p4_pmu_init(void) +__init int p4_pmu_init(void) { unsigned int low, high; @@ -1326,5 +1330,3 @@ static __init int p4_pmu_init(void) return 0; } - -#endif /* CONFIG_CPU_SUP_INTEL */ diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index 20c097e3386..c7181befecd 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c @@ -1,4 +1,7 @@ -#ifdef CONFIG_CPU_SUP_INTEL +#include <linux/perf_event.h> +#include <linux/types.h> + +#include "perf_event.h" /* * Not sure about some of these @@ -114,7 +117,7 @@ static __initconst const struct x86_pmu p6_pmu = { .event_constraints = p6_event_constraints, }; -static __init int p6_pmu_init(void) +__init int p6_pmu_init(void) { switch (boot_cpu_data.x86_model) { case 1: @@ -138,5 +141,3 @@ static __init int p6_pmu_init(void) return 0; } - -#endif /* CONFIG_CPU_SUP_INTEL */ diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 764c7c2b181..13ad89971d4 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -32,15 +32,12 @@ int in_crash_kexec; #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) -static void kdump_nmi_callback(int cpu, struct die_args *args) +static void kdump_nmi_callback(int cpu, struct pt_regs *regs) { - struct pt_regs *regs; #ifdef CONFIG_X86_32 struct pt_regs fixed_regs; #endif - regs = args->regs; - #ifdef CONFIG_X86_32 if (!user_mode_vm(regs)) { crash_fixup_ss_esp(&fixed_regs, regs); diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 6419bb05ecd..faf8d5e74b0 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -331,10 +331,15 @@ ENDPROC(native_usergs_sysret64) 1: incl PER_CPU_VAR(irq_count) jne 2f mov PER_CPU_VAR(irq_stack_ptr),%rsp - EMPTY_FRAME 0 + CFI_DEF_CFA_REGISTER rsi 2: /* Store previous stack value */ pushq %rsi + CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ + 0x77 /* DW_OP_breg7 */, 0, \ + 0x06 /* DW_OP_deref */, \ + 0x08 /* DW_OP_const1u */, SS+8-RBP, \ + 0x22 /* DW_OP_plus */ /* We entered an interrupt context - irqs are off: */ TRACE_IRQS_OFF .endm @@ -788,7 +793,6 @@ END(interrupt) subq $ORIG_RAX-RBP, %rsp CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP SAVE_ARGS_IRQ - PARTIAL_FRAME 0 call \func .endm @@ -813,10 +817,10 @@ ret_from_intr: /* Restore saved previous stack */ popq %rsi - leaq 16(%rsi), %rsp - + CFI_DEF_CFA_REGISTER rsi + leaq ARGOFFSET-RBP(%rsi), %rsp CFI_DEF_CFA_REGISTER rsp - CFI_ADJUST_CFA_OFFSET -16 + CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET exit_intr: GET_THREAD_INFO(%rcx) diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index 3fee346ef54..cacdd46d184 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -42,7 +42,7 @@ void arch_jump_label_transform(struct jump_entry *entry, put_online_cpus(); } -void arch_jump_label_text_poke_early(jump_label_t addr) +void __init_or_module arch_jump_label_text_poke_early(jump_label_t addr) { text_poke_early((void *)addr, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE); diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 00354d4919a..faba5771aca 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -511,28 +511,37 @@ single_step_cont(struct pt_regs *regs, struct die_args *args) static int was_in_debug_nmi[NR_CPUS]; -static int __kgdb_notify(struct die_args *args, unsigned long cmd) +static int kgdb_nmi_handler(unsigned int cmd, struct pt_regs *regs) { - struct pt_regs *regs = args->regs; - switch (cmd) { - case DIE_NMI: + case NMI_LOCAL: if (atomic_read(&kgdb_active) != -1) { /* KGDB CPU roundup */ kgdb_nmicallback(raw_smp_processor_id(), regs); was_in_debug_nmi[raw_smp_processor_id()] = 1; touch_nmi_watchdog(); - return NOTIFY_STOP; + return NMI_HANDLED; } - return NOTIFY_DONE; + break; - case DIE_NMIUNKNOWN: + case NMI_UNKNOWN: if (was_in_debug_nmi[raw_smp_processor_id()]) { was_in_debug_nmi[raw_smp_processor_id()] = 0; - return NOTIFY_STOP; + return NMI_HANDLED; } - return NOTIFY_DONE; + break; + default: + /* do nothing */ + break; + } + return NMI_DONE; +} + +static int __kgdb_notify(struct die_args *args, unsigned long cmd) +{ + struct pt_regs *regs = args->regs; + switch (cmd) { case DIE_DEBUG: if (atomic_read(&kgdb_cpu_doing_single_step) != -1) { if (user_mode(regs)) @@ -590,11 +599,6 @@ kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) static struct notifier_block kgdb_notifier = { .notifier_call = kgdb_notify, - - /* - * Lowest-prio notifier priority, we want to be notified last: - */ - .priority = NMI_LOCAL_LOW_PRIOR, }; /** @@ -605,7 +609,31 @@ static struct notifier_block kgdb_notifier = { */ int kgdb_arch_init(void) { - return register_die_notifier(&kgdb_notifier); + int retval; + + retval = register_die_notifier(&kgdb_notifier); + if (retval) + goto out; + + retval = register_nmi_handler(NMI_LOCAL, kgdb_nmi_handler, + 0, "kgdb"); + if (retval) + goto out1; + + retval = register_nmi_handler(NMI_UNKNOWN, kgdb_nmi_handler, + 0, "kgdb"); + + if (retval) + goto out2; + + return retval; + +out2: + unregister_nmi_handler(NMI_LOCAL, "kgdb"); +out1: + unregister_die_notifier(&kgdb_notifier); +out: + return retval; } static void kgdb_hw_overflow_handler(struct perf_event *event, @@ -673,6 +701,8 @@ void kgdb_arch_exit(void) breakinfo[i].pev = NULL; } } + unregister_nmi_handler(NMI_UNKNOWN, "kgdb"); + unregister_nmi_handler(NMI_LOCAL, "kgdb"); unregister_die_notifier(&kgdb_notifier); } diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 794bc95134c..7da647d8b64 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -75,10 +75,11 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); /* * Undefined/reserved opcodes, conditional jump, Opcode Extension * Groups, and some special opcodes can not boost. - * This is non-const to keep gcc from statically optimizing it out, as - * variable_test_bit makes gcc think only *(unsigned long*) is used. + * This is non-const and volatile to keep gcc from statically + * optimizing it out, as variable_test_bit makes gcc think only + * *(unsigned long*) is used. */ -static u32 twobyte_is_boostable[256 / 32] = { +static volatile u32 twobyte_is_boostable[256 / 32] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* ---------------------------------------------- */ W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */ diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c new file mode 100644 index 00000000000..7ec5bd140b8 --- /dev/null +++ b/arch/x86/kernel/nmi.c @@ -0,0 +1,433 @@ +/* + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs + * Copyright (C) 2011 Don Zickus Red Hat, Inc. + * + * Pentium III FXSR, SSE support + * Gareth Hughes <gareth@valinux.com>, May 2000 + */ + +/* + * Handle hardware traps and faults. + */ +#include <linux/spinlock.h> +#include <linux/kprobes.h> +#include <linux/kdebug.h> +#include <linux/nmi.h> +#include <linux/delay.h> +#include <linux/hardirq.h> +#include <linux/slab.h> + +#include <linux/mca.h> + +#if defined(CONFIG_EDAC) +#include <linux/edac.h> +#endif + +#include <linux/atomic.h> +#include <asm/traps.h> +#include <asm/mach_traps.h> +#include <asm/nmi.h> + +#define NMI_MAX_NAMELEN 16 +struct nmiaction { + struct list_head list; + nmi_handler_t handler; + unsigned int flags; + char *name; +}; + +struct nmi_desc { + spinlock_t lock; + struct list_head head; +}; + +static struct nmi_desc nmi_desc[NMI_MAX] = +{ + { + .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock), + .head = LIST_HEAD_INIT(nmi_desc[0].head), + }, + { + .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock), + .head = LIST_HEAD_INIT(nmi_desc[1].head), + }, + +}; + +struct nmi_stats { + unsigned int normal; + unsigned int unknown; + unsigned int external; + unsigned int swallow; +}; + +static DEFINE_PER_CPU(struct nmi_stats, nmi_stats); + +static int ignore_nmis; + +int unknown_nmi_panic; +/* + * Prevent NMI reason port (0x61) being accessed simultaneously, can + * only be used in NMI handler. + */ +static DEFINE_RAW_SPINLOCK(nmi_reason_lock); + +static int __init setup_unknown_nmi_panic(char *str) +{ + unknown_nmi_panic = 1; + return 1; +} +__setup("unknown_nmi_panic", setup_unknown_nmi_panic); + +#define nmi_to_desc(type) (&nmi_desc[type]) + +static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) +{ + struct nmi_desc *desc = nmi_to_desc(type); + struct nmiaction *a; + int handled=0; + + rcu_read_lock(); + + /* + * NMIs are edge-triggered, which means if you have enough + * of them concurrently, you can lose some because only one + * can be latched at any given time. Walk the whole list + * to handle those situations. + */ + list_for_each_entry_rcu(a, &desc->head, list) + handled += a->handler(type, regs); + + rcu_read_unlock(); + + /* return total number of NMI events handled */ + return handled; +} + +static int __setup_nmi(unsigned int type, struct nmiaction *action) +{ + struct nmi_desc *desc = nmi_to_desc(type); + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + + /* + * most handlers of type NMI_UNKNOWN never return because + * they just assume the NMI is theirs. Just a sanity check + * to manage expectations + */ + WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head)); + + /* + * some handlers need to be executed first otherwise a fake + * event confuses some handlers (kdump uses this flag) + */ + if (action->flags & NMI_FLAG_FIRST) + list_add_rcu(&action->list, &desc->head); + else + list_add_tail_rcu(&action->list, &desc->head); + + spin_unlock_irqrestore(&desc->lock, flags); + return 0; +} + +static struct nmiaction *__free_nmi(unsigned int type, const char *name) +{ + struct nmi_desc *desc = nmi_to_desc(type); + struct nmiaction *n; + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + + list_for_each_entry_rcu(n, &desc->head, list) { + /* + * the name passed in to describe the nmi handler + * is used as the lookup key + */ + if (!strcmp(n->name, name)) { + WARN(in_nmi(), + "Trying to free NMI (%s) from NMI context!\n", n->name); + list_del_rcu(&n->list); + break; + } + } + + spin_unlock_irqrestore(&desc->lock, flags); + synchronize_rcu(); + return (n); +} + +int register_nmi_handler(unsigned int type, nmi_handler_t handler, + unsigned long nmiflags, const char *devname) +{ + struct nmiaction *action; + int retval = -ENOMEM; + + if (!handler) + return -EINVAL; + + action = kzalloc(sizeof(struct nmiaction), GFP_KERNEL); + if (!action) + goto fail_action; + + action->handler = handler; + action->flags = nmiflags; + action->name = kstrndup(devname, NMI_MAX_NAMELEN, GFP_KERNEL); + if (!action->name) + goto fail_action_name; + + retval = __setup_nmi(type, action); + + if (retval) + goto fail_setup_nmi; + + return retval; + +fail_setup_nmi: + kfree(action->name); +fail_action_name: + kfree(action); +fail_action: + + return retval; +} +EXPORT_SYMBOL_GPL(register_nmi_handler); + +void unregister_nmi_handler(unsigned int type, const char *name) +{ + struct nmiaction *a; + + a = __free_nmi(type, name); + if (a) { + kfree(a->name); + kfree(a); + } +} + +EXPORT_SYMBOL_GPL(unregister_nmi_handler); + +static notrace __kprobes void +pci_serr_error(unsigned char reason, struct pt_regs *regs) +{ + pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", + reason, smp_processor_id()); + + /* + * On some machines, PCI SERR line is used to report memory + * errors. EDAC makes use of it. + */ +#if defined(CONFIG_EDAC) + if (edac_handler_set()) { + edac_atomic_assert_error(); + return; + } +#endif + + if (panic_on_unrecovered_nmi) + panic("NMI: Not continuing"); + + pr_emerg("Dazed and confused, but trying to continue\n"); + + /* Clear and disable the PCI SERR error line. */ + reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; + outb(reason, NMI_REASON_PORT); +} + +static notrace __kprobes void +io_check_error(unsigned char reason, struct pt_regs *regs) +{ + unsigned long i; + + pr_emerg( + "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", + reason, smp_processor_id()); + show_registers(regs); + + if (panic_on_io_nmi) + panic("NMI IOCK error: Not continuing"); + + /* Re-enable the IOCK line, wait for a few seconds */ + reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; + outb(reason, NMI_REASON_PORT); + + i = 20000; + while (--i) { + touch_nmi_watchdog(); + udelay(100); + } + + reason &= ~NMI_REASON_CLEAR_IOCHK; + outb(reason, NMI_REASON_PORT); +} + +static notrace __kprobes void +unknown_nmi_error(unsigned char reason, struct pt_regs *regs) +{ + int handled; + + /* + * Use 'false' as back-to-back NMIs are dealt with one level up. + * Of course this makes having multiple 'unknown' handlers useless + * as only the first one is ever run (unless it can actually determine + * if it caused the NMI) + */ + handled = nmi_handle(NMI_UNKNOWN, regs, false); + if (handled) { + __this_cpu_add(nmi_stats.unknown, handled); + return; + } + + __this_cpu_add(nmi_stats.unknown, 1); + +#ifdef CONFIG_MCA + /* + * Might actually be able to figure out what the guilty party + * is: + */ + if (MCA_bus) { + mca_handle_nmi(); + return; + } +#endif + pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", + reason, smp_processor_id()); + + pr_emerg("Do you have a strange power saving mode enabled?\n"); + if (unknown_nmi_panic || panic_on_unrecovered_nmi) + panic("NMI: Not continuing"); + + pr_emerg("Dazed and confused, but trying to continue\n"); +} + +static DEFINE_PER_CPU(bool, swallow_nmi); +static DEFINE_PER_CPU(unsigned long, last_nmi_rip); + +static notrace __kprobes void default_do_nmi(struct pt_regs *regs) +{ + unsigned char reason = 0; + int handled; + bool b2b = false; + + /* + * CPU-specific NMI must be processed before non-CPU-specific + * NMI, otherwise we may lose it, because the CPU-specific + * NMI can not be detected/processed on other CPUs. + */ + + /* + * Back-to-back NMIs are interesting because they can either + * be two NMI or more than two NMIs (any thing over two is dropped + * due to NMI being edge-triggered). If this is the second half + * of the back-to-back NMI, assume we dropped things and process + * more handlers. Otherwise reset the 'swallow' NMI behaviour + */ + if (regs->ip == __this_cpu_read(last_nmi_rip)) + b2b = true; + else + __this_cpu_write(swallow_nmi, false); + + __this_cpu_write(last_nmi_rip, regs->ip); + + handled = nmi_handle(NMI_LOCAL, regs, b2b); + __this_cpu_add(nmi_stats.normal, handled); + if (handled) { + /* + * There are cases when a NMI handler handles multiple + * events in the current NMI. One of these events may + * be queued for in the next NMI. Because the event is + * already handled, the next NMI will result in an unknown + * NMI. Instead lets flag this for a potential NMI to + * swallow. + */ + if (handled > 1) + __this_cpu_write(swallow_nmi, true); + return; + } + + /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ + raw_spin_lock(&nmi_reason_lock); + reason = get_nmi_reason(); + + if (reason & NMI_REASON_MASK) { + if (reason & NMI_REASON_SERR) + pci_serr_error(reason, regs); + else if (reason & NMI_REASON_IOCHK) + io_check_error(reason, regs); +#ifdef CONFIG_X86_32 + /* + * Reassert NMI in case it became active + * meanwhile as it's edge-triggered: + */ + reassert_nmi(); +#endif + __this_cpu_add(nmi_stats.external, 1); + raw_spin_unlock(&nmi_reason_lock); + return; + } + raw_spin_unlock(&nmi_reason_lock); + + /* + * Only one NMI can be latched at a time. To handle + * this we may process multiple nmi handlers at once to + * cover the case where an NMI is dropped. The downside + * to this approach is we may process an NMI prematurely, + * while its real NMI is sitting latched. This will cause + * an unknown NMI on the next run of the NMI processing. + * + * We tried to flag that condition above, by setting the + * swallow_nmi flag when we process more than one event. + * This condition is also only present on the second half + * of a back-to-back NMI, so we flag that condition too. + * + * If both are true, we assume we already processed this + * NMI previously and we swallow it. Otherwise we reset + * the logic. + * + * There are scenarios where we may accidentally swallow + * a 'real' unknown NMI. For example, while processing + * a perf NMI another perf NMI comes in along with a + * 'real' unknown NMI. These two NMIs get combined into + * one (as descibed above). When the next NMI gets + * processed, it will be flagged by perf as handled, but + * noone will know that there was a 'real' unknown NMI sent + * also. As a result it gets swallowed. Or if the first + * perf NMI returns two events handled then the second + * NMI will get eaten by the logic below, again losing a + * 'real' unknown NMI. But this is the best we can do + * for now. + */ + if (b2b && __this_cpu_read(swallow_nmi)) + __this_cpu_add(nmi_stats.swallow, 1); + else + unknown_nmi_error(reason, regs); +} + +dotraplinkage notrace __kprobes void +do_nmi(struct pt_regs *regs, long error_code) +{ + nmi_enter(); + + inc_irq_stat(__nmi_count); + + if (!ignore_nmis) + default_do_nmi(regs); + + nmi_exit(); +} + +void stop_nmi(void) +{ + ignore_nmis++; +} + +void restart_nmi(void) +{ + ignore_nmis--; +} + +/* reset the back-to-back NMI logic */ +void local_touch_nmi(void) +{ + __this_cpu_write(last_nmi_rip, 0); +} diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e7e3b019c43..b9b3b1a5164 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -49,7 +49,7 @@ void free_thread_xstate(struct task_struct *tsk) void free_thread_info(struct thread_info *ti) { free_thread_xstate(ti->task); - free_pages((unsigned long)ti, get_order(THREAD_SIZE)); + free_pages((unsigned long)ti, THREAD_ORDER); } void arch_task_cache_init(void) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 2196c703c5e..795b79f984c 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -57,6 +57,7 @@ #include <asm/idle.h> #include <asm/syscalls.h> #include <asm/debugreg.h> +#include <asm/nmi.h> asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -107,6 +108,7 @@ void cpu_idle(void) if (cpu_is_offline(cpu)) play_dead(); + local_touch_nmi(); local_irq_disable(); /* Don't trace irqs off for idle */ stop_critical_timings(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index f693e44e1bf..3bd7e6eebf3 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -51,6 +51,7 @@ #include <asm/idle.h> #include <asm/syscalls.h> #include <asm/debugreg.h> +#include <asm/nmi.h> asmlinkage extern void ret_from_fork(void); @@ -133,6 +134,7 @@ void cpu_idle(void) * from here on, until they go to idle. * Otherwise, idle callbacks can misfire. */ + local_touch_nmi(); local_irq_disable(); enter_idle(); /* Don't trace irqs off for idle */ diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 9242436e993..e334be1182b 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -464,7 +464,7 @@ static inline void kb_wait(void) } } -static void vmxoff_nmi(int cpu, struct die_args *args) +static void vmxoff_nmi(int cpu, struct pt_regs *regs) { cpu_emergency_vmxoff(); } @@ -736,14 +736,10 @@ static nmi_shootdown_cb shootdown_callback; static atomic_t waiting_for_crash_ipi; -static int crash_nmi_callback(struct notifier_block *self, - unsigned long val, void *data) +static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) { int cpu; - if (val != DIE_NMI) - return NOTIFY_OK; - cpu = raw_smp_processor_id(); /* Don't do anything if this handler is invoked on crashing cpu. @@ -751,10 +747,10 @@ static int crash_nmi_callback(struct notifier_block *self, * an NMI if system was initially booted with nmi_watchdog parameter. */ if (cpu == crashing_cpu) - return NOTIFY_STOP; + return NMI_HANDLED; local_irq_disable(); - shootdown_callback(cpu, (struct die_args *)data); + shootdown_callback(cpu, regs); atomic_dec(&waiting_for_crash_ipi); /* Assume hlt works */ @@ -762,7 +758,7 @@ static int crash_nmi_callback(struct notifier_block *self, for (;;) cpu_relax(); - return 1; + return NMI_HANDLED; } static void smp_send_nmi_allbutself(void) @@ -770,12 +766,6 @@ static void smp_send_nmi_allbutself(void) apic->send_IPI_allbutself(NMI_VECTOR); } -static struct notifier_block crash_nmi_nb = { - .notifier_call = crash_nmi_callback, - /* we want to be the first one called */ - .priority = NMI_LOCAL_HIGH_PRIOR+1, -}; - /* Halt all other CPUs, calling the specified function on each of them * * This function can be used to halt all other CPUs on crash @@ -794,7 +784,8 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); /* Would it be better to replace the trap vector here? */ - if (register_die_notifier(&crash_nmi_nb)) + if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback, + NMI_FLAG_FIRST, "crash")) return; /* return what? */ /* Ensure the new callback function is set before sending * out the NMI diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 6913369c234..a8e3eb83466 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -81,15 +81,6 @@ gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, }; DECLARE_BITMAP(used_vectors, NR_VECTORS); EXPORT_SYMBOL_GPL(used_vectors); -static int ignore_nmis; - -int unknown_nmi_panic; -/* - * Prevent NMI reason port (0x61) being accessed simultaneously, can - * only be used in NMI handler. - */ -static DEFINE_RAW_SPINLOCK(nmi_reason_lock); - static inline void conditional_sti(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) @@ -307,152 +298,6 @@ gp_in_kernel: die("general protection fault", regs, error_code); } -static int __init setup_unknown_nmi_panic(char *str) -{ - unknown_nmi_panic = 1; - return 1; -} -__setup("unknown_nmi_panic", setup_unknown_nmi_panic); - -static notrace __kprobes void -pci_serr_error(unsigned char reason, struct pt_regs *regs) -{ - pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", - reason, smp_processor_id()); - - /* - * On some machines, PCI SERR line is used to report memory - * errors. EDAC makes use of it. - */ -#if defined(CONFIG_EDAC) - if (edac_handler_set()) { - edac_atomic_assert_error(); - return; - } -#endif - - if (panic_on_unrecovered_nmi) - panic("NMI: Not continuing"); - - pr_emerg("Dazed and confused, but trying to continue\n"); - - /* Clear and disable the PCI SERR error line. */ - reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; - outb(reason, NMI_REASON_PORT); -} - -static notrace __kprobes void -io_check_error(unsigned char reason, struct pt_regs *regs) -{ - unsigned long i; - - pr_emerg( - "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", - reason, smp_processor_id()); - show_registers(regs); - - if (panic_on_io_nmi) - panic("NMI IOCK error: Not continuing"); - - /* Re-enable the IOCK line, wait for a few seconds */ - reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; - outb(reason, NMI_REASON_PORT); - - i = 20000; - while (--i) { - touch_nmi_watchdog(); - udelay(100); - } - - reason &= ~NMI_REASON_CLEAR_IOCHK; - outb(reason, NMI_REASON_PORT); -} - -static notrace __kprobes void -unknown_nmi_error(unsigned char reason, struct pt_regs *regs) -{ - if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == - NOTIFY_STOP) - return; -#ifdef CONFIG_MCA - /* - * Might actually be able to figure out what the guilty party - * is: - */ - if (MCA_bus) { - mca_handle_nmi(); - return; - } -#endif - pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", - reason, smp_processor_id()); - - pr_emerg("Do you have a strange power saving mode enabled?\n"); - if (unknown_nmi_panic || panic_on_unrecovered_nmi) - panic("NMI: Not continuing"); - - pr_emerg("Dazed and confused, but trying to continue\n"); -} - -static notrace __kprobes void default_do_nmi(struct pt_regs *regs) -{ - unsigned char reason = 0; - - /* - * CPU-specific NMI must be processed before non-CPU-specific - * NMI, otherwise we may lose it, because the CPU-specific - * NMI can not be detected/processed on other CPUs. - */ - if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP) - return; - - /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ - raw_spin_lock(&nmi_reason_lock); - reason = get_nmi_reason(); - - if (reason & NMI_REASON_MASK) { - if (reason & NMI_REASON_SERR) - pci_serr_error(reason, regs); - else if (reason & NMI_REASON_IOCHK) - io_check_error(reason, regs); -#ifdef CONFIG_X86_32 - /* - * Reassert NMI in case it became active - * meanwhile as it's edge-triggered: - */ - reassert_nmi(); -#endif - raw_spin_unlock(&nmi_reason_lock); - return; - } - raw_spin_unlock(&nmi_reason_lock); - - unknown_nmi_error(reason, regs); -} - -dotraplinkage notrace __kprobes void -do_nmi(struct pt_regs *regs, long error_code) -{ - nmi_enter(); - - inc_irq_stat(__nmi_count); - - if (!ignore_nmis) - default_do_nmi(regs); - - nmi_exit(); -} - -void stop_nmi(void) -{ - ignore_nmis++; -} - -void restart_nmi(void) -{ - ignore_nmis--; -} - /* May run on IST stack. */ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) { diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 9f33b984d0e..374562ed670 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -22,14 +22,23 @@ #include <asm/inat.h> #include <asm/insn.h> -#define get_next(t, insn) \ - ({t r; r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) +/* Verify next sizeof(t) bytes can be on the same instruction */ +#define validate_next(t, insn, n) \ + ((insn)->next_byte + sizeof(t) + n - (insn)->kaddr <= MAX_INSN_SIZE) + +#define __get_next(t, insn) \ + ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) + +#define __peek_nbyte_next(t, insn, n) \ + ({ t r = *(t*)((insn)->next_byte + n); r; }) -#define peek_next(t, insn) \ - ({t r; r = *(t*)insn->next_byte; r; }) +#define get_next(t, insn) \ + ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) #define peek_nbyte_next(t, insn, n) \ - ({t r; r = *(t*)((insn)->next_byte + n); r; }) + ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); }) + +#define peek_next(t, insn) peek_nbyte_next(t, insn, 0) /** * insn_init() - initialize struct insn @@ -158,6 +167,8 @@ vex_end: insn->vex_prefix.got = 1; prefixes->got = 1; + +err_out: return; } @@ -208,6 +219,9 @@ void insn_get_opcode(struct insn *insn) insn->attr = 0; /* This instruction is bad */ end: opcode->got = 1; + +err_out: + return; } /** @@ -241,6 +255,9 @@ void insn_get_modrm(struct insn *insn) if (insn->x86_64 && inat_is_force64(insn->attr)) insn->opnd_bytes = 8; modrm->got = 1; + +err_out: + return; } @@ -290,6 +307,9 @@ void insn_get_sib(struct insn *insn) } } insn->sib.got = 1; + +err_out: + return; } @@ -351,6 +371,9 @@ void insn_get_displacement(struct insn *insn) } out: insn->displacement.got = 1; + +err_out: + return; } /* Decode moffset16/32/64 */ @@ -373,6 +396,9 @@ static void __get_moffset(struct insn *insn) break; } insn->moffset1.got = insn->moffset2.got = 1; + +err_out: + return; } /* Decode imm v32(Iz) */ @@ -389,6 +415,9 @@ static void __get_immv32(struct insn *insn) insn->immediate.nbytes = 4; break; } + +err_out: + return; } /* Decode imm v64(Iv/Ov) */ @@ -411,6 +440,9 @@ static void __get_immv(struct insn *insn) break; } insn->immediate1.got = insn->immediate2.got = 1; + +err_out: + return; } /* Decode ptr16:16/32(Ap) */ @@ -432,6 +464,9 @@ static void __get_immptr(struct insn *insn) insn->immediate2.value = get_next(unsigned short, insn); insn->immediate2.nbytes = 2; insn->immediate1.got = insn->immediate2.got = 1; + +err_out: + return; } /** @@ -496,6 +531,9 @@ void insn_get_immediate(struct insn *insn) } done: insn->immediate.got = 1; + +err_out: + return; } /** diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 0d17c8c50ac..9c7378df740 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -420,12 +420,14 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) return 0; } +#ifdef CONFIG_CPU_SUP_AMD static const char errata93_warning[] = KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n" "******* Working around it, but it may cause SEGVs or burn power.\n" "******* Please consider a BIOS update.\n" "******* Disabling USB legacy in the BIOS may also help.\n"; +#endif /* * No vm86 mode in 64-bit mode: @@ -505,7 +507,11 @@ bad: */ static int is_errata93(struct pt_regs *regs, unsigned long address) { -#ifdef CONFIG_X86_64 +#if defined(CONFIG_X86_64) && defined(CONFIG_CPU_SUP_AMD) + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD + || boot_cpu_data.x86 != 0xf) + return 0; + if (address != regs->ip) return 0; diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 68894fdc034..75f9528e037 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -61,26 +61,15 @@ u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, } -static int profile_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) +static int profile_exceptions_notify(unsigned int val, struct pt_regs *regs) { - struct die_args *args = (struct die_args *)data; - int ret = NOTIFY_DONE; - - switch (val) { - case DIE_NMI: - if (ctr_running) - model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs)); - else if (!nmi_enabled) - break; - else - model->stop(&__get_cpu_var(cpu_msrs)); - ret = NOTIFY_STOP; - break; - default: - break; - } - return ret; + if (ctr_running) + model->check_ctrs(regs, &__get_cpu_var(cpu_msrs)); + else if (!nmi_enabled) + return NMI_DONE; + else + model->stop(&__get_cpu_var(cpu_msrs)); + return NMI_HANDLED; } static void nmi_cpu_save_registers(struct op_msrs *msrs) @@ -355,20 +344,14 @@ static void nmi_cpu_setup(void *dummy) int cpu = smp_processor_id(); struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); nmi_cpu_save_registers(msrs); - spin_lock(&oprofilefs_lock); + raw_spin_lock(&oprofilefs_lock); model->setup_ctrs(model, msrs); nmi_cpu_setup_mux(cpu, msrs); - spin_unlock(&oprofilefs_lock); + raw_spin_unlock(&oprofilefs_lock); per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC); apic_write(APIC_LVTPC, APIC_DM_NMI); } -static struct notifier_block profile_exceptions_nb = { - .notifier_call = profile_exceptions_notify, - .next = NULL, - .priority = NMI_LOCAL_LOW_PRIOR, -}; - static void nmi_cpu_restore_registers(struct op_msrs *msrs) { struct op_msr *counters = msrs->counters; @@ -402,8 +385,6 @@ static void nmi_cpu_shutdown(void *dummy) apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu)); apic_write(APIC_LVTERR, v); nmi_cpu_restore_registers(msrs); - if (model->cpu_down) - model->cpu_down(); } static void nmi_cpu_up(void *dummy) @@ -508,7 +489,8 @@ static int nmi_setup(void) ctr_running = 0; /* make variables visible to the nmi handler: */ smp_mb(); - err = register_die_notifier(&profile_exceptions_nb); + err = register_nmi_handler(NMI_LOCAL, profile_exceptions_notify, + 0, "oprofile"); if (err) goto fail; @@ -538,7 +520,7 @@ static void nmi_shutdown(void) put_online_cpus(); /* make variables visible to the nmi handler: */ smp_mb(); - unregister_die_notifier(&profile_exceptions_nb); + unregister_nmi_handler(NMI_LOCAL, "oprofile"); msrs = &get_cpu_var(cpu_msrs); model->shutdown(msrs); free_msrs(); diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c index 720bf5a53c5..7f8052cd662 100644 --- a/arch/x86/oprofile/nmi_timer_int.c +++ b/arch/x86/oprofile/nmi_timer_int.c @@ -18,32 +18,16 @@ #include <asm/apic.h> #include <asm/ptrace.h> -static int profile_timer_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) +static int profile_timer_exceptions_notify(unsigned int val, struct pt_regs *regs) { - struct die_args *args = (struct die_args *)data; - int ret = NOTIFY_DONE; - - switch (val) { - case DIE_NMI: - oprofile_add_sample(args->regs, 0); - ret = NOTIFY_STOP; - break; - default: - break; - } - return ret; + oprofile_add_sample(regs, 0); + return NMI_HANDLED; } -static struct notifier_block profile_timer_exceptions_nb = { - .notifier_call = profile_timer_exceptions_notify, - .next = NULL, - .priority = NMI_LOW_PRIOR, -}; - static int timer_start(void) { - if (register_die_notifier(&profile_timer_exceptions_nb)) + if (register_nmi_handler(NMI_LOCAL, profile_timer_exceptions_notify, + 0, "oprofile-timer")) return 1; return 0; } @@ -51,7 +35,7 @@ static int timer_start(void) static void timer_stop(void) { - unregister_die_notifier(&profile_timer_exceptions_nb); + unregister_nmi_handler(NMI_LOCAL, "oprofile-timer"); synchronize_sched(); /* Allow already-started NMIs to complete. */ } diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 9cbb710dc94..303f0863782 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -29,8 +29,6 @@ #include "op_x86_model.h" #include "op_counter.h" -#define NUM_COUNTERS 4 -#define NUM_COUNTERS_F15H 6 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX #define NUM_VIRT_COUNTERS 32 #else @@ -70,62 +68,12 @@ static struct ibs_config ibs_config; static struct ibs_state ibs_state; /* - * IBS cpuid feature detection - */ - -#define IBS_CPUID_FEATURES 0x8000001b - -/* - * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but - * bit 0 is used to indicate the existence of IBS. - */ -#define IBS_CAPS_AVAIL (1U<<0) -#define IBS_CAPS_FETCHSAM (1U<<1) -#define IBS_CAPS_OPSAM (1U<<2) -#define IBS_CAPS_RDWROPCNT (1U<<3) -#define IBS_CAPS_OPCNT (1U<<4) -#define IBS_CAPS_BRNTRGT (1U<<5) -#define IBS_CAPS_OPCNTEXT (1U<<6) - -#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \ - | IBS_CAPS_FETCHSAM \ - | IBS_CAPS_OPSAM) - -/* - * IBS APIC setup - */ -#define IBSCTL 0x1cc -#define IBSCTL_LVT_OFFSET_VALID (1ULL<<8) -#define IBSCTL_LVT_OFFSET_MASK 0x0F - -/* * IBS randomization macros */ #define IBS_RANDOM_BITS 12 #define IBS_RANDOM_MASK ((1ULL << IBS_RANDOM_BITS) - 1) #define IBS_RANDOM_MAXCNT_OFFSET (1ULL << (IBS_RANDOM_BITS - 5)) -static u32 get_ibs_caps(void) -{ - u32 ibs_caps; - unsigned int max_level; - - if (!boot_cpu_has(X86_FEATURE_IBS)) - return 0; - - /* check IBS cpuid feature flags */ - max_level = cpuid_eax(0x80000000); - if (max_level < IBS_CPUID_FEATURES) - return IBS_CAPS_DEFAULT; - - ibs_caps = cpuid_eax(IBS_CPUID_FEATURES); - if (!(ibs_caps & IBS_CAPS_AVAIL)) - /* cpuid flags not valid */ - return IBS_CAPS_DEFAULT; - - return ibs_caps; -} - /* * 16-bit Linear Feedback Shift Register (LFSR) * @@ -316,81 +264,6 @@ static void op_amd_stop_ibs(void) wrmsrl(MSR_AMD64_IBSOPCTL, 0); } -static inline int get_eilvt(int offset) -{ - return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1); -} - -static inline int put_eilvt(int offset) -{ - return !setup_APIC_eilvt(offset, 0, 0, 1); -} - -static inline int ibs_eilvt_valid(void) -{ - int offset; - u64 val; - int valid = 0; - - preempt_disable(); - - rdmsrl(MSR_AMD64_IBSCTL, val); - offset = val & IBSCTL_LVT_OFFSET_MASK; - - if (!(val & IBSCTL_LVT_OFFSET_VALID)) { - pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n", - smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); - goto out; - } - - if (!get_eilvt(offset)) { - pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n", - smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); - goto out; - } - - valid = 1; -out: - preempt_enable(); - - return valid; -} - -static inline int get_ibs_offset(void) -{ - u64 val; - - rdmsrl(MSR_AMD64_IBSCTL, val); - if (!(val & IBSCTL_LVT_OFFSET_VALID)) - return -EINVAL; - - return val & IBSCTL_LVT_OFFSET_MASK; -} - -static void setup_APIC_ibs(void) -{ - int offset; - - offset = get_ibs_offset(); - if (offset < 0) - goto failed; - - if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0)) - return; -failed: - pr_warn("oprofile: IBS APIC setup failed on cpu #%d\n", - smp_processor_id()); -} - -static void clear_APIC_ibs(void) -{ - int offset; - - offset = get_ibs_offset(); - if (offset >= 0) - setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1); -} - #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, @@ -439,7 +312,7 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs) goto fail; } /* both registers must be reserved */ - if (num_counters == NUM_COUNTERS_F15H) { + if (num_counters == AMD64_NUM_COUNTERS_F15H) { msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1); msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1); } else { @@ -504,15 +377,6 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, val |= op_x86_get_ctrl(model, &counter_config[virt]); wrmsrl(msrs->controls[i].addr, val); } - - if (ibs_caps) - setup_APIC_ibs(); -} - -static void op_amd_cpu_shutdown(void) -{ - if (ibs_caps) - clear_APIC_ibs(); } static int op_amd_check_ctrs(struct pt_regs * const regs, @@ -575,86 +439,6 @@ static void op_amd_stop(struct op_msrs const * const msrs) op_amd_stop_ibs(); } -static int setup_ibs_ctl(int ibs_eilvt_off) -{ - struct pci_dev *cpu_cfg; - int nodes; - u32 value = 0; - - nodes = 0; - cpu_cfg = NULL; - do { - cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD, - PCI_DEVICE_ID_AMD_10H_NB_MISC, - cpu_cfg); - if (!cpu_cfg) - break; - ++nodes; - pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off - | IBSCTL_LVT_OFFSET_VALID); - pci_read_config_dword(cpu_cfg, IBSCTL, &value); - if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) { - pci_dev_put(cpu_cfg); - printk(KERN_DEBUG "Failed to setup IBS LVT offset, " - "IBSCTL = 0x%08x\n", value); - return -EINVAL; - } - } while (1); - - if (!nodes) { - printk(KERN_DEBUG "No CPU node configured for IBS\n"); - return -ENODEV; - } - - return 0; -} - -/* - * This runs only on the current cpu. We try to find an LVT offset and - * setup the local APIC. For this we must disable preemption. On - * success we initialize all nodes with this offset. This updates then - * the offset in the IBS_CTL per-node msr. The per-core APIC setup of - * the IBS interrupt vector is called from op_amd_setup_ctrs()/op_- - * amd_cpu_shutdown() using the new offset. - */ -static int force_ibs_eilvt_setup(void) -{ - int offset; - int ret; - - preempt_disable(); - /* find the next free available EILVT entry, skip offset 0 */ - for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) { - if (get_eilvt(offset)) - break; - } - preempt_enable(); - - if (offset == APIC_EILVT_NR_MAX) { - printk(KERN_DEBUG "No EILVT entry available\n"); - return -EBUSY; - } - - ret = setup_ibs_ctl(offset); - if (ret) - goto out; - - if (!ibs_eilvt_valid()) { - ret = -EFAULT; - goto out; - } - - pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset); - pr_err(FW_BUG "workaround enabled for IBS LVT offset\n"); - - return 0; -out: - preempt_disable(); - put_eilvt(offset); - preempt_enable(); - return ret; -} - /* * check and reserve APIC extended interrupt LVT offset for IBS if * available @@ -667,17 +451,6 @@ static void init_ibs(void) if (!ibs_caps) return; - if (ibs_eilvt_valid()) - goto out; - - if (!force_ibs_eilvt_setup()) - goto out; - - /* Failed to setup ibs */ - ibs_caps = 0; - return; - -out: printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps); } @@ -741,9 +514,9 @@ static int op_amd_init(struct oprofile_operations *ops) ops->create_files = setup_ibs_files; if (boot_cpu_data.x86 == 0x15) { - num_counters = NUM_COUNTERS_F15H; + num_counters = AMD64_NUM_COUNTERS_F15H; } else { - num_counters = NUM_COUNTERS; + num_counters = AMD64_NUM_COUNTERS; } op_amd_spec.num_counters = num_counters; @@ -760,7 +533,6 @@ struct op_x86_model_spec op_amd_spec = { .init = op_amd_init, .fill_in_addresses = &op_amd_fill_in_addresses, .setup_ctrs = &op_amd_setup_ctrs, - .cpu_down = &op_amd_cpu_shutdown, .check_ctrs = &op_amd_check_ctrs, .start = &op_amd_start, .stop = &op_amd_stop, diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 94b745045e4..d90528ea541 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -28,7 +28,7 @@ static int counter_width = 32; #define MSR_PPRO_EVENTSEL_RESERVED ((0xFFFFFFFFULL<<32)|(1ULL<<21)) -static u64 *reset_value; +static u64 reset_value[OP_MAX_COUNTER]; static void ppro_shutdown(struct op_msrs const * const msrs) { @@ -40,10 +40,6 @@ static void ppro_shutdown(struct op_msrs const * const msrs) release_perfctr_nmi(MSR_P6_PERFCTR0 + i); release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); } - if (reset_value) { - kfree(reset_value); - reset_value = NULL; - } } static int ppro_fill_in_addresses(struct op_msrs * const msrs) @@ -79,13 +75,6 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, u64 val; int i; - if (!reset_value) { - reset_value = kzalloc(sizeof(reset_value[0]) * num_counters, - GFP_ATOMIC); - if (!reset_value) - return; - } - if (cpu_has_arch_perfmon) { union cpuid10_eax eax; eax.full = cpuid_eax(0xa); @@ -141,13 +130,6 @@ static int ppro_check_ctrs(struct pt_regs * const regs, u64 val; int i; - /* - * This can happen if perf counters are in use when - * we steal the die notifier NMI. - */ - if (unlikely(!reset_value)) - goto out; - for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; @@ -158,7 +140,6 @@ static int ppro_check_ctrs(struct pt_regs * const regs, wrmsrl(msrs->counters[i].addr, -reset_value[i]); } -out: /* Only P6 based Pentium M need to re-unmask the apic vector but it * doesn't hurt other P6 variant */ apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); @@ -179,8 +160,6 @@ static void ppro_start(struct op_msrs const * const msrs) u64 val; int i; - if (!reset_value) - return; for (i = 0; i < num_counters; ++i) { if (reset_value[i]) { rdmsrl(msrs->controls[i].addr, val); @@ -196,8 +175,6 @@ static void ppro_stop(struct op_msrs const * const msrs) u64 val; int i; - if (!reset_value) - return; for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; @@ -242,7 +219,7 @@ static void arch_perfmon_setup_counters(void) eax.split.bit_width = 40; } - num_counters = eax.split.num_counters; + num_counters = min((int)eax.split.num_counters, OP_MAX_COUNTER); op_arch_perfmon_spec.num_counters = num_counters; op_arch_perfmon_spec.num_controls = num_counters; diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 89017fa1fd6..71e8a67337e 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -43,7 +43,6 @@ struct op_x86_model_spec { int (*fill_in_addresses)(struct op_msrs * const msrs); void (*setup_ctrs)(struct op_x86_model_spec const *model, struct op_msrs const * const msrs); - void (*cpu_down)(void); int (*check_ctrs)(struct pt_regs * const regs, struct op_msrs const * const msrs); void (*start)(struct op_msrs const * const msrs); diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c index fe73276e026..e6379526675 100644 --- a/arch/x86/platform/mrst/mrst.c +++ b/arch/x86/platform/mrst/mrst.c @@ -14,6 +14,8 @@ #include <linux/init.h> #include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/scatterlist.h> #include <linux/sfi.h> #include <linux/intel_pmic_gpio.h> #include <linux/spi/spi.h> @@ -392,6 +394,7 @@ static void __init *max3111_platform_data(void *info) struct spi_board_info *spi_info = info; int intr = get_gpio_by_name("max3111_int"); + spi_info->mode = SPI_MODE_0; if (intr == -1) return NULL; spi_info->irq = intr + MRST_IRQ_OFFSET; diff --git a/arch/xtensa/configs/iss_defconfig b/arch/xtensa/configs/iss_defconfig index 0234cd198c5..f932b30b47f 100644 --- a/arch/xtensa/configs/iss_defconfig +++ b/arch/xtensa/configs/iss_defconfig @@ -15,7 +15,6 @@ CONFIG_GENERIC_GPIO=y # CONFIG_ARCH_HAS_ILOG2_U64 is not set CONFIG_NO_IOPORT=y CONFIG_HZ=100 -CONFIG_GENERIC_TIME=y CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" CONFIG_CONSTRUCTORS=y diff --git a/arch/xtensa/configs/s6105_defconfig b/arch/xtensa/configs/s6105_defconfig index 4891abbf16b..550e8ed5b5c 100644 --- a/arch/xtensa/configs/s6105_defconfig +++ b/arch/xtensa/configs/s6105_defconfig @@ -15,7 +15,6 @@ CONFIG_GENERIC_GPIO=y # CONFIG_ARCH_HAS_ILOG2_U64 is not set CONFIG_NO_IOPORT=y CONFIG_HZ=100 -CONFIG_GENERIC_TIME=y CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" # |