diff options
Diffstat (limited to 'arch/x86/kernel/apic')
-rw-r--r-- | arch/x86/kernel/apic/apic.c | 112 | ||||
-rw-r--r-- | arch/x86/kernel/apic/es7000_32.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/apic/io_apic.c | 375 | ||||
-rw-r--r-- | arch/x86/kernel/apic/ipi.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/apic/nmi.c | 20 | ||||
-rw-r--r-- | arch/x86/kernel/apic/numaq_32.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/apic/probe_32.c | 11 | ||||
-rw-r--r-- | arch/x86/kernel/apic/probe_64.c | 16 | ||||
-rw-r--r-- | arch/x86/kernel/apic/summit_32.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/apic/x2apic_cluster.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/apic/x2apic_phys.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/apic/x2apic_uv_x.c | 42 |
12 files changed, 318 insertions, 292 deletions
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 8c7c042ecad..159740decc4 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -49,6 +49,7 @@ #include <asm/mtrr.h> #include <asm/smp.h> #include <asm/mce.h> +#include <asm/kvm_para.h> unsigned int num_processors; @@ -140,7 +141,6 @@ int x2apic_mode; #ifdef CONFIG_X86_X2APIC /* x2apic enabled before OS handover */ static int x2apic_preenabled; -static int disable_x2apic; static __init int setup_nox2apic(char *str) { if (x2apic_enabled()) { @@ -149,7 +149,6 @@ static __init int setup_nox2apic(char *str) return 0; } - disable_x2apic = 1; setup_clear_cpu_cap(X86_FEATURE_X2APIC); return 0; } @@ -1363,52 +1362,80 @@ void enable_x2apic(void) } #endif /* CONFIG_X86_X2APIC */ -void __init enable_IR_x2apic(void) +int __init enable_IR(void) { #ifdef CONFIG_INTR_REMAP - int ret; - unsigned long flags; - struct IO_APIC_route_entry **ioapic_entries = NULL; - - ret = dmar_table_init(); - if (ret) { - pr_debug("dmar_table_init() failed with %d:\n", ret); - goto ir_failed; - } - if (!intr_remapping_supported()) { pr_debug("intr-remapping not supported\n"); - goto ir_failed; + return 0; } - if (!x2apic_preenabled && skip_ioapic_setup) { pr_info("Skipped enabling intr-remap because of skipping " "io-apic setup\n"); - return; + return 0; } + if (enable_intr_remapping(x2apic_supported())) + return 0; + + pr_info("Enabled Interrupt-remapping\n"); + + return 1; + +#endif + return 0; +} + +void __init enable_IR_x2apic(void) +{ + unsigned long flags; + struct IO_APIC_route_entry **ioapic_entries = NULL; + int ret, x2apic_enabled = 0; + int dmar_table_init_ret = 0; + +#ifdef CONFIG_INTR_REMAP + dmar_table_init_ret = dmar_table_init(); + if (dmar_table_init_ret) + pr_debug("dmar_table_init() failed with %d:\n", + dmar_table_init_ret); +#endif + ioapic_entries = alloc_ioapic_entries(); if (!ioapic_entries) { - pr_info("Allocate ioapic_entries failed: %d\n", ret); - goto end; + pr_err("Allocate ioapic_entries failed\n"); + goto out; } ret = save_IO_APIC_setup(ioapic_entries); if (ret) { pr_info("Saving IO-APIC state failed: %d\n", ret); - goto end; + goto out; } local_irq_save(flags); - mask_IO_APIC_setup(ioapic_entries); mask_8259A(); + mask_IO_APIC_setup(ioapic_entries); - ret = enable_intr_remapping(x2apic_supported()); - if (ret) - goto end_restore; + if (dmar_table_init_ret) + ret = 0; + else + ret = enable_IR(); - pr_info("Enabled Interrupt-remapping\n"); + if (!ret) { + /* IR is required if there is APIC ID > 255 even when running + * under KVM + */ + if (max_physical_apicid > 255 || !kvm_para_available()) + goto nox2apic; + /* + * without IR all CPUs can be addressed by IOAPIC/MSI + * only in physical mode + */ + x2apic_force_phys(); + } + + x2apic_enabled = 1; if (x2apic_supported() && !x2apic_mode) { x2apic_mode = 1; @@ -1416,41 +1443,25 @@ void __init enable_IR_x2apic(void) pr_info("Enabled x2apic\n"); } -end_restore: - if (ret) - /* - * IR enabling failed - */ +nox2apic: + if (!ret) /* IR enabling failed */ restore_IO_APIC_setup(ioapic_entries); - unmask_8259A(); local_irq_restore(flags); -end: +out: if (ioapic_entries) free_ioapic_entries(ioapic_entries); - if (!ret) + if (x2apic_enabled) return; -ir_failed: if (x2apic_preenabled) - panic("x2apic enabled by bios. But IR enabling failed"); + panic("x2apic: enabled by BIOS but kernel init failed."); else if (cpu_has_x2apic) - pr_info("Not enabling x2apic,Intr-remapping\n"); -#else - if (!cpu_has_x2apic) - return; - - if (x2apic_preenabled) - panic("x2apic enabled prior OS handover," - " enable CONFIG_X86_X2APIC, CONFIG_INTR_REMAP"); -#endif - - return; + pr_info("Not enabling x2apic, Intr-remapping init failed.\n"); } - #ifdef CONFIG_X86_64 /* * Detect and enable local APICs on non-SMP boards. @@ -1551,8 +1562,6 @@ no_apic: #ifdef CONFIG_X86_64 void __init early_init_lapic_mapping(void) { - unsigned long phys_addr; - /* * If no local APIC can be found then go out * : it means there is no mpatable and MADT @@ -1560,11 +1569,9 @@ void __init early_init_lapic_mapping(void) if (!smp_found_config) return; - phys_addr = mp_lapic_addr; - - set_fixmap_nocache(FIX_APIC_BASE, phys_addr); + set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", - APIC_BASE, phys_addr); + APIC_BASE, mp_lapic_addr); /* * Fetch the APIC ID of the BSP in case we have a @@ -1653,7 +1660,6 @@ int __init APIC_init_uniprocessor(void) APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { pr_err("BIOS bug, local APIC 0x%x not detected!...\n", boot_cpu_physical_apicid); - clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); return -1; } #endif diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 69328ac8de9..89174f847b4 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -167,7 +167,7 @@ static int es7000_apic_is_cluster(void) { /* MPENTIUMIII */ if (boot_cpu_data.x86 == 6 && - (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) + (boot_cpu_data.x86_model >= 7 && boot_cpu_data.x86_model <= 11)) return 1; return 0; @@ -652,7 +652,8 @@ static int es7000_mps_oem_check_cluster(struct mpc_table *mpc, char *oem, return ret && es7000_apic_is_cluster(); } -struct apic apic_es7000_cluster = { +/* We've been warned by a false positive warning.Use __refdata to keep calm. */ +struct apic __refdata apic_es7000_cluster = { .name = "es7000", .probe = probe_es7000, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 29d0752d951..3c8f9e75d03 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -66,6 +66,8 @@ #include <asm/apic.h> #define __apicdebuginit(type) static type __init +#define for_each_irq_pin(entry, head) \ + for (entry = head; entry; entry = entry->next) /* * Is the SiS APIC rmw bug present ? @@ -85,6 +87,9 @@ int nr_ioapic_registers[MAX_IO_APICS]; struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; +/* IO APIC gsi routing info */ +struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS]; + /* MP IRQ source entries */ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; @@ -116,15 +121,6 @@ static int __init parse_noapic(char *str) } early_param("noapic", parse_noapic); -struct irq_pin_list; - -/* - * This is performance-critical, we want to do it O(1) - * - * the indexing order of this array favors 1:1 mappings - * between pins and IRQs. - */ - struct irq_pin_list { int apic, pin; struct irq_pin_list *next; @@ -139,6 +135,11 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node) return pin; } +/* + * This is performance-critical, we want to do it O(1) + * + * Most irqs are mapped 1:1 with pins. + */ struct irq_cfg { struct irq_pin_list *irq_2_pin; cpumask_var_t domain; @@ -414,13 +415,10 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) unsigned long flags; spin_lock_irqsave(&ioapic_lock, flags); - entry = cfg->irq_2_pin; - for (;;) { + for_each_irq_pin(entry, cfg->irq_2_pin) { unsigned int reg; int pin; - if (!entry) - break; pin = entry->pin; reg = io_apic_read(entry->apic, 0x10 + pin*2); /* Is the remote IRR bit set? */ @@ -428,9 +426,6 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) spin_unlock_irqrestore(&ioapic_lock, flags); return true; } - if (!entry->next) - break; - entry = entry->next; } spin_unlock_irqrestore(&ioapic_lock, flags); @@ -498,72 +493,68 @@ static void ioapic_mask_entry(int apic, int pin) * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ -static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) +static int +add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin) { - struct irq_pin_list *entry; + struct irq_pin_list **last, *entry; - entry = cfg->irq_2_pin; - if (!entry) { - entry = get_one_free_irq_2_pin(node); - if (!entry) { - printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n", - apic, pin); - return; - } - cfg->irq_2_pin = entry; - entry->apic = apic; - entry->pin = pin; - return; - } - - while (entry->next) { - /* not again, please */ + /* don't allow duplicates */ + last = &cfg->irq_2_pin; + for_each_irq_pin(entry, cfg->irq_2_pin) { if (entry->apic == apic && entry->pin == pin) - return; - - entry = entry->next; + return 0; + last = &entry->next; } - entry->next = get_one_free_irq_2_pin(node); - entry = entry->next; + entry = get_one_free_irq_2_pin(node); + if (!entry) { + printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n", + node, apic, pin); + return -ENOMEM; + } entry->apic = apic; entry->pin = pin; + + *last = entry; + return 0; +} + +static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) +{ + if (add_pin_to_irq_node_nopanic(cfg, node, apic, pin)) + panic("IO-APIC: failed to add irq-pin. Can not proceed\n"); } /* * Reroute an IRQ to a different pin. */ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, - int oldapic, int oldpin, - int newapic, int newpin) + int oldapic, int oldpin, + int newapic, int newpin) { - struct irq_pin_list *entry = cfg->irq_2_pin; - int replaced = 0; + struct irq_pin_list *entry; - while (entry) { + for_each_irq_pin(entry, cfg->irq_2_pin) { if (entry->apic == oldapic && entry->pin == oldpin) { entry->apic = newapic; entry->pin = newpin; - replaced = 1; /* every one is different, right? */ - break; + return; } - entry = entry->next; } - /* why? call replace before add? */ - if (!replaced) - add_pin_to_irq_node(cfg, node, newapic, newpin); + /* old apic/pin didn't exist, so just add new ones */ + add_pin_to_irq_node(cfg, node, newapic, newpin); } -static inline void io_apic_modify_irq(struct irq_cfg *cfg, - int mask_and, int mask_or, - void (*final)(struct irq_pin_list *entry)) +static void io_apic_modify_irq(struct irq_cfg *cfg, + int mask_and, int mask_or, + void (*final)(struct irq_pin_list *entry)) { int pin; struct irq_pin_list *entry; - for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { + for_each_irq_pin(entry, cfg->irq_2_pin) { unsigned int reg; pin = entry->pin; reg = io_apic_read(entry->apic, 0x10 + pin * 2); @@ -580,7 +571,6 @@ static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); } -#ifdef CONFIG_X86_64 static void io_apic_sync(struct irq_pin_list *entry) { /* @@ -596,11 +586,6 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg) { io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); } -#else /* CONFIG_X86_32 */ -static void __mask_IO_APIC_irq(struct irq_cfg *cfg) -{ - io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL); -} static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) { @@ -613,7 +598,6 @@ static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg) io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, IO_APIC_REDIR_LEVEL_TRIGGER, NULL); } -#endif /* CONFIG_X86_32 */ static void mask_IO_APIC_irq_desc(struct irq_desc *desc) { @@ -1414,6 +1398,9 @@ int setup_ioapic_entry(int apic_id, int irq, irte.vector = vector; irte.dest_id = IRTE_DEST(destination); + /* Set source-id of interrupt request */ + set_ioapic_sid(&irte, apic_id); + modify_irte(irq, &irte); ir_entry->index2 = (index >> 15) & 0x1; @@ -1699,12 +1686,8 @@ __apicdebuginit(void) print_IO_APIC(void) if (!entry) continue; printk(KERN_DEBUG "IRQ%d ", irq); - for (;;) { + for_each_irq_pin(entry, cfg->irq_2_pin) printk("-> %d:%d", entry->apic, entry->pin); - if (!entry->next) - break; - entry = entry->next; - } printk("\n"); } @@ -1713,25 +1696,19 @@ __apicdebuginit(void) print_IO_APIC(void) return; } -__apicdebuginit(void) print_APIC_bitfield(int base) +__apicdebuginit(void) print_APIC_field(int base) { - unsigned int v; - int i, j; + int i; if (apic_verbosity == APIC_QUIET) return; - printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); - for (i = 0; i < 8; i++) { - v = apic_read(base + i*0x10); - for (j = 0; j < 32; j++) { - if (v & (1<<j)) - printk("1"); - else - printk("0"); - } - printk("\n"); - } + printk(KERN_DEBUG); + + for (i = 0; i < 8; i++) + printk(KERN_CONT "%08x", apic_read(base + i*0x10)); + + printk(KERN_CONT "\n"); } __apicdebuginit(void) print_local_APIC(void *dummy) @@ -1742,7 +1719,7 @@ __apicdebuginit(void) print_local_APIC(void *dummy) if (apic_verbosity == APIC_QUIET) return; - printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", + printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", smp_processor_id(), hard_smp_processor_id()); v = apic_read(APIC_ID); printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id()); @@ -1783,11 +1760,11 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk(KERN_DEBUG "... APIC SPIV: %08x\n", v); printk(KERN_DEBUG "... APIC ISR field:\n"); - print_APIC_bitfield(APIC_ISR); + print_APIC_field(APIC_ISR); printk(KERN_DEBUG "... APIC TMR field:\n"); - print_APIC_bitfield(APIC_TMR); + print_APIC_field(APIC_TMR); printk(KERN_DEBUG "... APIC IRR field:\n"); - print_APIC_bitfield(APIC_IRR); + print_APIC_field(APIC_IRR); if (APIC_INTEGRATED(ver)) { /* !82489DX */ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ @@ -2004,7 +1981,9 @@ void disable_IO_APIC(void) /* * Use virtual wire A mode when interrupt remapping is enabled. */ - disconnect_bsp_APIC(!intr_remapping_enabled && ioapic_i8259.pin != -1); + if (cpu_has_apic) + disconnect_bsp_APIC(!intr_remapping_enabled && + ioapic_i8259.pin != -1); } #ifdef CONFIG_X86_32 @@ -2212,7 +2191,6 @@ static unsigned int startup_ioapic_irq(unsigned int irq) return was_pending; } -#ifdef CONFIG_X86_64 static int ioapic_retrigger_irq(unsigned int irq) { @@ -2225,14 +2203,6 @@ static int ioapic_retrigger_irq(unsigned int irq) return 1; } -#else -static int ioapic_retrigger_irq(unsigned int irq) -{ - apic->send_IPI_self(irq_cfg(irq)->vector); - - return 1; -} -#endif /* * Level and edge triggered IO-APIC interrupts need different handling, @@ -2270,13 +2240,9 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq struct irq_pin_list *entry; u8 vector = cfg->vector; - entry = cfg->irq_2_pin; - for (;;) { + for_each_irq_pin(entry, cfg->irq_2_pin) { unsigned int reg; - if (!entry) - break; - apic = entry->apic; pin = entry->pin; /* @@ -2289,9 +2255,6 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq reg &= ~IO_APIC_REDIR_VECTOR_MASK; reg |= vector; io_apic_modify(apic, 0x10 + pin*2, reg); - if (!entry->next) - break; - entry = entry->next; } } @@ -2516,11 +2479,8 @@ atomic_t irq_mis_count; static void ack_apic_level(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); - -#ifdef CONFIG_X86_32 unsigned long v; int i; -#endif struct irq_cfg *cfg; int do_unmask_irq = 0; @@ -2533,31 +2493,28 @@ static void ack_apic_level(unsigned int irq) } #endif -#ifdef CONFIG_X86_32 /* - * It appears there is an erratum which affects at least version 0x11 - * of I/O APIC (that's the 82093AA and cores integrated into various - * chipsets). Under certain conditions a level-triggered interrupt is - * erroneously delivered as edge-triggered one but the respective IRR - * bit gets set nevertheless. As a result the I/O unit expects an EOI - * message but it will never arrive and further interrupts are blocked - * from the source. The exact reason is so far unknown, but the - * phenomenon was observed when two consecutive interrupt requests - * from a given source get delivered to the same CPU and the source is - * temporarily disabled in between. - * - * A workaround is to simulate an EOI message manually. We achieve it - * by setting the trigger mode to edge and then to level when the edge - * trigger mode gets detected in the TMR of a local APIC for a - * level-triggered interrupt. We mask the source for the time of the - * operation to prevent an edge-triggered interrupt escaping meanwhile. - * The idea is from Manfred Spraul. --macro - */ + * It appears there is an erratum which affects at least version 0x11 + * of I/O APIC (that's the 82093AA and cores integrated into various + * chipsets). Under certain conditions a level-triggered interrupt is + * erroneously delivered as edge-triggered one but the respective IRR + * bit gets set nevertheless. As a result the I/O unit expects an EOI + * message but it will never arrive and further interrupts are blocked + * from the source. The exact reason is so far unknown, but the + * phenomenon was observed when two consecutive interrupt requests + * from a given source get delivered to the same CPU and the source is + * temporarily disabled in between. + * + * A workaround is to simulate an EOI message manually. We achieve it + * by setting the trigger mode to edge and then to level when the edge + * trigger mode gets detected in the TMR of a local APIC for a + * level-triggered interrupt. We mask the source for the time of the + * operation to prevent an edge-triggered interrupt escaping meanwhile. + * The idea is from Manfred Spraul. --macro + */ cfg = desc->chip_data; i = cfg->vector; - v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); -#endif /* * We must acknowledge the irq before we move it or the acknowledge will @@ -2599,7 +2556,7 @@ static void ack_apic_level(unsigned int irq) unmask_IO_APIC_irq_desc(desc); } -#ifdef CONFIG_X86_32 + /* Tail end of version 0x11 I/O APIC bug workaround */ if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); spin_lock(&ioapic_lock); @@ -2607,26 +2564,15 @@ static void ack_apic_level(unsigned int irq) __unmask_and_level_IO_APIC_irq(cfg); spin_unlock(&ioapic_lock); } -#endif } #ifdef CONFIG_INTR_REMAP static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) { - int apic, pin; struct irq_pin_list *entry; - entry = cfg->irq_2_pin; - for (;;) { - - if (!entry) - break; - - apic = entry->apic; - pin = entry->pin; - io_apic_eoi(apic, pin); - entry = entry->next; - } + for_each_irq_pin(entry, cfg->irq_2_pin) + io_apic_eoi(entry->apic, entry->pin); } static void @@ -3242,8 +3188,7 @@ void destroy_irq(unsigned int irq) cfg = desc->chip_data; dynamic_irq_cleanup(irq); /* connect back irq_cfg */ - if (desc) - desc->chip_data = cfg; + desc->chip_data = cfg; free_irte(irq); spin_lock_irqsave(&vector_lock, flags); @@ -3288,6 +3233,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms irte.vector = cfg->vector; irte.dest_id = IRTE_DEST(dest); + /* Set source-id of interrupt request */ + set_msi_sid(&irte, pdev); + modify_irte(irq, &irte); msg->address_hi = MSI_ADDR_BASE_HI; @@ -3791,6 +3739,9 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, mmr_pnode = uv_blade_to_pnode(mmr_blade); uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + return irq; } @@ -3907,7 +3858,11 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq, */ if (irq >= NR_IRQS_LEGACY) { cfg = desc->chip_data; - add_pin_to_irq_node(cfg, node, ioapic, pin); + if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) { + printk(KERN_INFO "can not add pin %d for irq %d\n", + pin, irq); + return 0; + } } setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity); @@ -3936,11 +3891,28 @@ int io_apic_set_pci_routing(struct device *dev, int irq, return __io_apic_set_pci_routing(dev, irq, irq_attr); } -/* -------------------------------------------------------------------------- - ACPI-based IOAPIC Configuration - -------------------------------------------------------------------------- */ +u8 __init io_apic_unique_id(u8 id) +{ +#ifdef CONFIG_X86_32 + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && + !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) + return io_apic_get_unique_id(nr_ioapics, id); + else + return id; +#else + int i; + DECLARE_BITMAP(used, 256); -#ifdef CONFIG_ACPI + bitmap_zero(used, 256); + for (i = 0; i < nr_ioapics; i++) { + struct mpc_ioapic *ia = &mp_ioapics[i]; + __set_bit(ia->apicid, used); + } + if (!test_bit(id, used)) + return id; + return find_first_zero_bit(used, 256); +#endif +} #ifdef CONFIG_X86_32 int __init io_apic_get_unique_id(int ioapic, int apic_id) @@ -4049,8 +4021,6 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) return 0; } -#endif /* CONFIG_ACPI */ - /* * This function currently is only a helper for the i386 smp boot process where * we need to reprogram the ioredtbls to cater for the cpus which have come online @@ -4104,7 +4074,7 @@ void __init setup_ioapic_dest(void) static struct resource *ioapic_resources; -static struct resource * __init ioapic_setup_resources(void) +static struct resource * __init ioapic_setup_resources(int nr_ioapics) { unsigned long n; struct resource *res; @@ -4120,15 +4090,13 @@ static struct resource * __init ioapic_setup_resources(void) mem = alloc_bootmem(n); res = (void *)mem; - if (mem != NULL) { - mem += sizeof(struct resource) * nr_ioapics; + mem += sizeof(struct resource) * nr_ioapics; - for (i = 0; i < nr_ioapics; i++) { - res[i].name = mem; - res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; - sprintf(mem, "IOAPIC %u", i); - mem += IOAPIC_RESOURCE_NAME_SIZE; - } + for (i = 0; i < nr_ioapics; i++) { + res[i].name = mem; + res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; + sprintf(mem, "IOAPIC %u", i); + mem += IOAPIC_RESOURCE_NAME_SIZE; } ioapic_resources = res; @@ -4142,7 +4110,7 @@ void __init ioapic_init_mappings(void) struct resource *ioapic_res; int i; - ioapic_res = ioapic_setup_resources(); + ioapic_res = ioapic_setup_resources(nr_ioapics); for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { ioapic_phys = mp_ioapics[i].apicaddr; @@ -4171,36 +4139,99 @@ fake_ioapic_page: __fix_to_virt(idx), ioapic_phys); idx++; - if (ioapic_res != NULL) { - ioapic_res->start = ioapic_phys; - ioapic_res->end = ioapic_phys + (4 * 1024) - 1; - ioapic_res++; - } + ioapic_res->start = ioapic_phys; + ioapic_res->end = ioapic_phys + (4 * 1024) - 1; + ioapic_res++; } } -static int __init ioapic_insert_resources(void) +void __init ioapic_insert_resources(void) { int i; struct resource *r = ioapic_resources; if (!r) { - if (nr_ioapics > 0) { + if (nr_ioapics > 0) printk(KERN_ERR "IO APIC resources couldn't be allocated.\n"); - return -1; - } - return 0; + return; } for (i = 0; i < nr_ioapics; i++) { insert_resource(&iomem_resource, r); r++; } +} + +int mp_find_ioapic(int gsi) +{ + int i = 0; + + /* Find the IOAPIC that manages this GSI. */ + for (i = 0; i < nr_ioapics; i++) { + if ((gsi >= mp_gsi_routing[i].gsi_base) + && (gsi <= mp_gsi_routing[i].gsi_end)) + return i; + } + + printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); + return -1; +} + +int mp_find_ioapic_pin(int ioapic, int gsi) +{ + if (WARN_ON(ioapic == -1)) + return -1; + if (WARN_ON(gsi > mp_gsi_routing[ioapic].gsi_end)) + return -1; + + return gsi - mp_gsi_routing[ioapic].gsi_base; +} +static int bad_ioapic(unsigned long address) +{ + if (nr_ioapics >= MAX_IO_APICS) { + printk(KERN_WARNING "WARING: Max # of I/O APICs (%d) exceeded " + "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics); + return 1; + } + if (!address) { + printk(KERN_WARNING "WARNING: Bogus (zero) I/O APIC address" + " found in table, skipping!\n"); + return 1; + } return 0; } -/* Insert the IO APIC resources after PCI initialization has occured to handle - * IO APICS that are mapped in on a BAR in PCI space. */ -late_initcall(ioapic_insert_resources); +void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) +{ + int idx = 0; + + if (bad_ioapic(address)) + return; + + idx = nr_ioapics; + + mp_ioapics[idx].type = MP_IOAPIC; + mp_ioapics[idx].flags = MPC_APIC_USABLE; + mp_ioapics[idx].apicaddr = address; + + set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); + mp_ioapics[idx].apicid = io_apic_unique_id(id); + mp_ioapics[idx].apicver = io_apic_get_version(idx); + + /* + * Build basic GSI lookup table to facilitate gsi->io_apic lookups + * and to prevent reprogramming of IOAPIC pins (PCI GSIs). + */ + mp_gsi_routing[idx].gsi_base = gsi_base; + mp_gsi_routing[idx].gsi_end = gsi_base + + io_apic_get_redir_entries(idx); + + printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " + "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, + mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr, + mp_gsi_routing[idx].gsi_base, mp_gsi_routing[idx].gsi_end); + + nr_ioapics++; +} diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index dbf5445727a..08385e090a6 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -106,6 +106,9 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) unsigned long mask = cpumask_bits(cpumask)[0]; unsigned long flags; + if (WARN_ONCE(!mask, "empty IPI mask")) + return; + local_irq_save(flags); WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); __default_send_IPI_dest_field(mask, vector, apic->dest_logical); @@ -150,7 +153,7 @@ int safe_smp_processor_id(void) { int apicid, cpuid; - if (!boot_cpu_has(X86_FEATURE_APIC)) + if (!cpu_has_apic) return 0; apicid = hard_smp_processor_id(); diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index f4227289caf..cb66a22d98a 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -39,7 +39,7 @@ int unknown_nmi_panic; int nmi_watchdog_enabled; -static cpumask_var_t backtrace_mask; +static cpumask_t backtrace_mask __read_mostly; /* nmi_active: * >0: the lapic NMI watchdog is active, but can be disabled @@ -138,7 +138,6 @@ int __init check_nmi_watchdog(void) if (!prev_nmi_count) goto error; - alloc_cpumask_var(&backtrace_mask, GFP_KERNEL|__GFP_ZERO); printk(KERN_INFO "Testing NMI watchdog ... "); #ifdef CONFIG_SMP @@ -415,14 +414,17 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) } /* We can be called before check_nmi_watchdog, hence NULL check. */ - if (backtrace_mask != NULL && cpumask_test_cpu(cpu, backtrace_mask)) { + if (cpumask_test_cpu(cpu, &backtrace_mask)) { static DEFINE_SPINLOCK(lock); /* Serialise the printks */ spin_lock(&lock); printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); + show_regs(regs); dump_stack(); spin_unlock(&lock); - cpumask_clear_cpu(cpu, backtrace_mask); + cpumask_clear_cpu(cpu, &backtrace_mask); + + rc = 1; } /* Could check oops_in_progress here too, but it's safer not to */ @@ -552,14 +554,18 @@ int do_nmi_callback(struct pt_regs *regs, int cpu) return 0; } -void __trigger_all_cpu_backtrace(void) +void arch_trigger_all_cpu_backtrace(void) { int i; - cpumask_copy(backtrace_mask, cpu_online_mask); + cpumask_copy(&backtrace_mask, cpu_online_mask); + + printk(KERN_INFO "sending NMI to all CPUs:\n"); + apic->send_IPI_all(NMI_VECTOR); + /* Wait for up to 10 seconds for all CPUs to do the backtrace */ for (i = 0; i < 10 * 1000; i++) { - if (cpumask_empty(backtrace_mask)) + if (cpumask_empty(&backtrace_mask)) break; mdelay(1); } diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index 533e59c6fc8..ca96e68f0d2 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -493,7 +493,8 @@ static void numaq_setup_portio_remap(void) (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); } -struct apic apic_numaq = { +/* Use __refdata to keep false positive warning calm. */ +struct apic __refdata apic_numaq = { .name = "NUMAQ", .probe = probe_numaq, diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 440a8bccd91..0c0182cc947 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -20,23 +20,12 @@ #include <asm/apic.h> #include <asm/setup.h> -#include <linux/threads.h> -#include <linux/cpumask.h> -#include <asm/mpspec.h> -#include <asm/fixmap.h> -#include <asm/apicdef.h> -#include <linux/kernel.h> -#include <linux/string.h> #include <linux/smp.h> -#include <linux/init.h> #include <asm/ipi.h> -#include <linux/smp.h> -#include <linux/init.h> #include <linux/interrupt.h> #include <asm/acpi.h> #include <asm/e820.h> -#include <asm/setup.h> #ifdef CONFIG_HOTPLUG_CPU #define DEFAULT_SEND_IPI (1) diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index bc3e880f9b8..65edc180fc8 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -44,17 +44,22 @@ static struct apic *apic_probe[] __initdata = { NULL, }; +static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) +{ + return hard_smp_processor_id() >> index_msb; +} + /* * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. */ void __init default_setup_apic_routing(void) { #ifdef CONFIG_X86_X2APIC - if (x2apic_mode && (apic != &apic_x2apic_phys && + if (x2apic_mode #ifdef CONFIG_X86_UV - apic != &apic_x2apic_uv_x && + && apic != &apic_x2apic_uv_x #endif - apic != &apic_x2apic_cluster)) { + ) { if (x2apic_phys) apic = &apic_x2apic_phys; else @@ -69,6 +74,11 @@ void __init default_setup_apic_routing(void) printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); } + if (is_vsmp_box()) { + /* need to update phys_pkg_id */ + apic->phys_pkg_id = apicid_phys_pkg_id; + } + /* * Now that apic routing model is selected, configure the * fault handling for intr remapping. diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 344eee4ac0a..eafdfbd1ea9 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -44,7 +44,6 @@ #include <asm/ipi.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/init.h> #include <linux/gfp.h> #include <linux/smp.h> diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 8e4cbb255c3..a5371ec3677 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -17,11 +17,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return x2apic_enabled(); } -/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ - +/* + * need to use more than cpu 0, because we need more vectors when + * MSI-X are used. + */ static const struct cpumask *x2apic_target_cpus(void) { - return cpumask_of(0); + return cpu_online_mask; } /* @@ -170,7 +172,7 @@ static unsigned long set_apic_id(unsigned int id) static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb) { - return current_cpu_data.initial_apicid >> index_msb; + return initial_apicid >> index_msb; } static void x2apic_send_IPI_self(int vector) diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index a284359627e..a8989aadc99 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -27,11 +27,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ - +/* + * need to use more than cpu 0, because we need more vectors when + * MSI-X are used. + */ static const struct cpumask *x2apic_target_cpus(void) { - return cpumask_of(0); + return cpu_online_mask; } static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) @@ -162,7 +164,7 @@ static unsigned long set_apic_id(unsigned int id) static int x2apic_phys_pkg_id(int initial_apicid, int index_msb) { - return current_cpu_data.initial_apicid >> index_msb; + return initial_apicid >> index_msb; } static void x2apic_send_IPI_self(int vector) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 096d19aea2f..601159374e8 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -46,7 +46,7 @@ static int early_get_nodeid(void) return node_id.s.node_id; } -static int uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { if (!strcmp(oem_id, "SGI")) { if (!strcmp(oem_table_id, "UVL")) @@ -253,7 +253,7 @@ static void uv_send_IPI_self(int vector) apic_write(APIC_SELF_IPI, vector); } -struct apic apic_x2apic_uv_x = { +struct apic __refdata apic_x2apic_uv_x = { .name = "UV large system", .probe = NULL, @@ -261,7 +261,7 @@ struct apic apic_x2apic_uv_x = { .apic_id_registered = uv_apic_id_registered, .irq_delivery_mode = dest_Fixed, - .irq_dest_mode = 1, /* logical */ + .irq_dest_mode = 0, /* physical */ .target_cpus = uv_target_cpus, .disable_esr = 0, @@ -362,12 +362,6 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) BUG(); } -static __init void map_low_mmrs(void) -{ - init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE); - init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE); -} - enum map_type {map_wb, map_uc}; static __init void map_high(char *id, unsigned long base, int shift, @@ -395,26 +389,6 @@ static __init void map_gru_high(int max_pnode) map_high("GRU", gru.s.base, shift, max_pnode, map_wb); } -static __init void map_config_high(int max_pnode) -{ - union uvh_rh_gam_cfg_overlay_config_mmr_u cfg; - int shift = UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT; - - cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR); - if (cfg.s.enable) - map_high("CONFIG", cfg.s.base, shift, max_pnode, map_uc); -} - -static __init void map_mmr_high(int max_pnode) -{ - union uvh_rh_gam_mmr_overlay_config_mmr_u mmr; - int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT; - - mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); - if (mmr.s.enable) - map_high("MMR", mmr.s.base, shift, max_pnode, map_uc); -} - static __init void map_mmioh_high(int max_pnode) { union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; @@ -566,8 +540,6 @@ void __init uv_system_init(void) unsigned long mmr_base, present, paddr; unsigned short pnode_mask; - map_low_mmrs(); - m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); m_val = m_n_config.s.m_skt; n_val = m_n_config.s.n_skt; @@ -591,6 +563,8 @@ void __init uv_system_init(void) bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); uv_blade_info = kmalloc(bytes, GFP_KERNEL); BUG_ON(!uv_blade_info); + for (blade = 0; blade < uv_num_possible_blades(); blade++) + uv_blade_info[blade].memory_nid = -1; get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); @@ -629,6 +603,9 @@ void __init uv_system_init(void) lcpu = uv_blade_info[blade].nr_possible_cpus; uv_blade_info[blade].nr_possible_cpus++; + /* Any node on the blade, else will contain -1. */ + uv_blade_info[blade].memory_nid = nid; + uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size; uv_cpu_hub_info(cpu)->m_val = m_val; @@ -662,11 +639,10 @@ void __init uv_system_init(void) pnode = (paddr >> m_val) & pnode_mask; blade = boot_pnode_to_blade(pnode); uv_node_to_blade[nid] = blade; + max_pnode = max(pnode, max_pnode); } map_gru_high(max_pnode); - map_mmr_high(max_pnode); - map_config_high(max_pnode); map_mmioh_high(max_pnode); uv_cpu_init(); |