diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/kernel/acpi/boot.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/apic/apic.c | 30 | ||||
-rw-r--r-- | arch/x86/kernel/apic/io_apic.c | 297 | ||||
-rw-r--r-- | arch/x86/kernel/apm_32.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 14 | ||||
-rw-r--r-- | arch/x86/kernel/init_task.c | 42 | ||||
-rw-r--r-- | arch/x86/kernel/irq_32.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/microcode_intel.c | 14 | ||||
-rw-r--r-- | arch/x86/kernel/nmi.c | 83 | ||||
-rw-r--r-- | arch/x86/kernel/process.c | 34 | ||||
-rw-r--r-- | arch/x86/kernel/ptrace.c | 7 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 83 |
13 files changed, 156 insertions, 462 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 532d2e090e6..56ebd1f9844 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -2,7 +2,7 @@ # Makefile for the linux kernel. # -extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinux.lds +extra-y := head_$(BITS).o head$(BITS).o head.o vmlinux.lds CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index a415b1f4436..7c439fe4941 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -593,7 +593,7 @@ void __init acpi_set_irq_model_ioapic(void) #ifdef CONFIG_ACPI_HOTPLUG_CPU #include <acpi/processor.h> -static void __cpuinitdata acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) +static void __cpuinit acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) { #ifdef CONFIG_ACPI_NUMA int nid; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index edc24480469..3722179a49d 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -35,6 +35,7 @@ #include <linux/smp.h> #include <linux/mm.h> +#include <asm/irq_remapping.h> #include <asm/perf_event.h> #include <asm/x86_init.h> #include <asm/pgalloc.h> @@ -1441,8 +1442,8 @@ void __init bsp_end_local_APIC_setup(void) * Now that local APIC setup is completed for BP, configure the fault * handling for interrupt remapping. */ - if (intr_remapping_enabled) - enable_drhd_fault_handling(); + if (irq_remapping_enabled) + irq_remap_enable_fault_handling(); } @@ -1517,7 +1518,7 @@ void enable_x2apic(void) int __init enable_IR(void) { #ifdef CONFIG_IRQ_REMAP - if (!intr_remapping_supported()) { + if (!irq_remapping_supported()) { pr_debug("intr-remapping not supported\n"); return -1; } @@ -1528,7 +1529,7 @@ int __init enable_IR(void) return -1; } - return enable_intr_remapping(); + return irq_remapping_enable(); #endif return -1; } @@ -1537,10 +1538,13 @@ void __init enable_IR_x2apic(void) { unsigned long flags; int ret, x2apic_enabled = 0; - int dmar_table_init_ret; + int hardware_init_ret; - dmar_table_init_ret = dmar_table_init(); - if (dmar_table_init_ret && !x2apic_supported()) + /* Make sure irq_remap_ops are initialized */ + setup_irq_remapping_ops(); + + hardware_init_ret = irq_remapping_prepare(); + if (hardware_init_ret && !x2apic_supported()) return; ret = save_ioapic_entries(); @@ -1556,7 +1560,7 @@ void __init enable_IR_x2apic(void) if (x2apic_preenabled && nox2apic) disable_x2apic(); - if (dmar_table_init_ret) + if (hardware_init_ret) ret = -1; else ret = enable_IR(); @@ -2176,8 +2180,8 @@ static int lapic_suspend(void) local_irq_save(flags); disable_local_APIC(); - if (intr_remapping_enabled) - disable_intr_remapping(); + if (irq_remapping_enabled) + irq_remapping_disable(); local_irq_restore(flags); return 0; @@ -2193,7 +2197,7 @@ static void lapic_resume(void) return; local_irq_save(flags); - if (intr_remapping_enabled) { + if (irq_remapping_enabled) { /* * IO-APIC and PIC have their own resume routines. * We just mask them here to make sure the interrupt @@ -2245,8 +2249,8 @@ static void lapic_resume(void) apic_write(APIC_ESR, 0); apic_read(APIC_ESR); - if (intr_remapping_enabled) - reenable_intr_remapping(x2apic_mode); + if (irq_remapping_enabled) + irq_remapping_reenable(x2apic_mode); local_irq_restore(flags); } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index e88300d8e80..ef0648cd708 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -86,6 +86,22 @@ void __init set_io_apic_ops(const struct io_apic_ops *ops) io_apic_ops = *ops; } +#ifdef CONFIG_IRQ_REMAP +static void irq_remap_modify_chip_defaults(struct irq_chip *chip); +static inline bool irq_remapped(struct irq_cfg *cfg) +{ + return cfg->irq_2_iommu.iommu != NULL; +} +#else +static inline bool irq_remapped(struct irq_cfg *cfg) +{ + return false; +} +static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip) +{ +} +#endif + /* * Is the SiS APIC rmw bug present ? * -1 = don't know, 0 = no, 1 = yes @@ -1361,77 +1377,13 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg, fasteoi ? "fasteoi" : "edge"); } - -static int setup_ir_ioapic_entry(int irq, - struct IR_IO_APIC_route_entry *entry, - unsigned int destination, int vector, - struct io_apic_irq_attr *attr) -{ - int index; - struct irte irte; - int ioapic_id = mpc_ioapic_id(attr->ioapic); - struct intel_iommu *iommu = map_ioapic_to_ir(ioapic_id); - - if (!iommu) { - pr_warn("No mapping iommu for ioapic %d\n", ioapic_id); - return -ENODEV; - } - - index = alloc_irte(iommu, irq, 1); - if (index < 0) { - pr_warn("Failed to allocate IRTE for ioapic %d\n", ioapic_id); - return -ENOMEM; - } - - prepare_irte(&irte, vector, destination); - - /* Set source-id of interrupt request */ - set_ioapic_sid(&irte, ioapic_id); - - modify_irte(irq, &irte); - - apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: " - "Set IRTE entry (P:%d FPD:%d Dst_Mode:%d " - "Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X " - "Avail:%X Vector:%02X Dest:%08X " - "SID:%04X SQ:%X SVT:%X)\n", - attr->ioapic, irte.present, irte.fpd, irte.dst_mode, - irte.redir_hint, irte.trigger_mode, irte.dlvry_mode, - irte.avail, irte.vector, irte.dest_id, - irte.sid, irte.sq, irte.svt); - - memset(entry, 0, sizeof(*entry)); - - entry->index2 = (index >> 15) & 0x1; - entry->zero = 0; - entry->format = 1; - entry->index = (index & 0x7fff); - /* - * IO-APIC RTE will be configured with virtual vector. - * irq handler will do the explicit EOI to the io-apic. - */ - entry->vector = attr->ioapic_pin; - entry->mask = 0; /* enable IRQ */ - entry->trigger = attr->trigger; - entry->polarity = attr->polarity; - - /* Mask level triggered irqs. - * Use IRQ_DELAYED_DISABLE for edge triggered irqs. - */ - if (attr->trigger) - entry->mask = 1; - - return 0; -} - static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, unsigned int destination, int vector, struct io_apic_irq_attr *attr) { - if (intr_remapping_enabled) - return setup_ir_ioapic_entry(irq, - (struct IR_IO_APIC_route_entry *)entry, - destination, vector, attr); + if (irq_remapping_enabled) + return setup_ioapic_remapped_entry(irq, entry, destination, + vector, attr); memset(entry, 0, sizeof(*entry)); @@ -1588,7 +1540,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, { struct IO_APIC_route_entry entry; - if (intr_remapping_enabled) + if (irq_remapping_enabled) return; memset(&entry, 0, sizeof(entry)); @@ -1674,7 +1626,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx) printk(KERN_DEBUG ".... IRQ redirection table:\n"); - if (intr_remapping_enabled) { + if (irq_remapping_enabled) { printk(KERN_DEBUG " NR Indx Fmt Mask Trig IRR" " Pol Stat Indx2 Zero Vect:\n"); } else { @@ -1683,7 +1635,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx) } for (i = 0; i <= reg_01.bits.entries; i++) { - if (intr_remapping_enabled) { + if (irq_remapping_enabled) { struct IO_APIC_route_entry entry; struct IR_IO_APIC_route_entry *ir_entry; @@ -2050,7 +2002,7 @@ void disable_IO_APIC(void) * IOAPIC RTE as well as interrupt-remapping table entry). * As this gets called during crash dump, keep this simple for now. */ - if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) { + if (ioapic_i8259.pin != -1 && !irq_remapping_enabled) { struct IO_APIC_route_entry entry; memset(&entry, 0, sizeof(entry)); @@ -2074,7 +2026,7 @@ void disable_IO_APIC(void) * Use virtual wire A mode when interrupt remapping is enabled. */ if (cpu_has_apic || apic_from_smp_config()) - disconnect_bsp_APIC(!intr_remapping_enabled && + disconnect_bsp_APIC(!irq_remapping_enabled && ioapic_i8259.pin != -1); } @@ -2390,71 +2342,6 @@ ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, return ret; } -#ifdef CONFIG_IRQ_REMAP - -/* - * Migrate the IO-APIC irq in the presence of intr-remapping. - * - * For both level and edge triggered, irq migration is a simple atomic - * update(of vector and cpu destination) of IRTE and flush the hardware cache. - * - * For level triggered, we eliminate the io-apic RTE modification (with the - * updated vector information), by using a virtual vector (io-apic pin number). - * Real vector that is used for interrupting cpu will be coming from - * the interrupt-remapping table entry. - * - * As the migration is a simple atomic update of IRTE, the same mechanism - * is used to migrate MSI irq's in the presence of interrupt-remapping. - */ -static int -ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) -{ - struct irq_cfg *cfg = data->chip_data; - unsigned int dest, irq = data->irq; - struct irte irte; - - if (!cpumask_intersects(mask, cpu_online_mask)) - return -EINVAL; - - if (get_irte(irq, &irte)) - return -EBUSY; - - if (assign_irq_vector(irq, cfg, mask)) - return -EBUSY; - - dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); - - irte.vector = cfg->vector; - irte.dest_id = IRTE_DEST(dest); - - /* - * Atomically updates the IRTE with the new destination, vector - * and flushes the interrupt entry cache. - */ - modify_irte(irq, &irte); - - /* - * After this point, all the interrupts will start arriving - * at the new destination. So, time to cleanup the previous - * vector allocation. - */ - if (cfg->move_in_progress) - send_cleanup_vector(cfg); - - cpumask_copy(data->affinity, mask); - return 0; -} - -#else -static inline int -ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) -{ - return 0; -} -#endif - asmlinkage void smp_irq_move_cleanup_interrupt(void) { unsigned vector, me; @@ -2699,7 +2586,7 @@ static void irq_remap_modify_chip_defaults(struct irq_chip *chip) chip->irq_eoi = ir_ack_apic_level; #ifdef CONFIG_SMP - chip->irq_set_affinity = ir_ioapic_set_affinity; + chip->irq_set_affinity = set_remapped_irq_affinity; #endif } #endif /* CONFIG_IRQ_REMAP */ @@ -2912,7 +2799,7 @@ static inline void __init check_timer(void) * 8259A. */ if (pin1 == -1) { - if (intr_remapping_enabled) + if (irq_remapping_enabled) panic("BIOS bug: timer not connected to IO-APIC"); pin1 = pin2; apic1 = apic2; @@ -2945,7 +2832,7 @@ static inline void __init check_timer(void) clear_IO_APIC_pin(0, pin1); goto out; } - if (intr_remapping_enabled) + if (irq_remapping_enabled) panic("timer doesn't work through Interrupt-remapped IO-APIC"); local_irq_disable(); clear_IO_APIC_pin(apic1, pin1); @@ -3169,7 +3056,7 @@ void destroy_irq(unsigned int irq) irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); if (irq_remapped(cfg)) - free_irte(irq); + free_remapped_irq(irq); raw_spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq, cfg); raw_spin_unlock_irqrestore(&vector_lock, flags); @@ -3198,54 +3085,34 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); if (irq_remapped(cfg)) { - struct irte irte; - int ir_index; - u16 sub_handle; - - ir_index = map_irq_to_irte_handle(irq, &sub_handle); - BUG_ON(ir_index == -1); - - prepare_irte(&irte, cfg->vector, dest); - - /* Set source-id of interrupt request */ - if (pdev) - set_msi_sid(&irte, pdev); - else - set_hpet_sid(&irte, hpet_id); - - modify_irte(irq, &irte); + compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id); + return err; + } + if (x2apic_enabled()) + msg->address_hi = MSI_ADDR_BASE_HI | + MSI_ADDR_EXT_DEST_ID(dest); + else msg->address_hi = MSI_ADDR_BASE_HI; - msg->data = sub_handle; - msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | - MSI_ADDR_IR_SHV | - MSI_ADDR_IR_INDEX1(ir_index) | - MSI_ADDR_IR_INDEX2(ir_index); - } else { - if (x2apic_enabled()) - msg->address_hi = MSI_ADDR_BASE_HI | - MSI_ADDR_EXT_DEST_ID(dest); - else - msg->address_hi = MSI_ADDR_BASE_HI; - msg->address_lo = - MSI_ADDR_BASE_LO | - ((apic->irq_dest_mode == 0) ? - MSI_ADDR_DEST_MODE_PHYSICAL: - MSI_ADDR_DEST_MODE_LOGICAL) | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - MSI_ADDR_REDIRECTION_CPU: - MSI_ADDR_REDIRECTION_LOWPRI) | - MSI_ADDR_DEST_ID(dest); + msg->address_lo = + MSI_ADDR_BASE_LO | + ((apic->irq_dest_mode == 0) ? + MSI_ADDR_DEST_MODE_PHYSICAL: + MSI_ADDR_DEST_MODE_LOGICAL) | + ((apic->irq_delivery_mode != dest_LowestPrio) ? + MSI_ADDR_REDIRECTION_CPU: + MSI_ADDR_REDIRECTION_LOWPRI) | + MSI_ADDR_DEST_ID(dest); + + msg->data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + ((apic->irq_delivery_mode != dest_LowestPrio) ? + MSI_DATA_DELIVERY_FIXED: + MSI_DATA_DELIVERY_LOWPRI) | + MSI_DATA_VECTOR(cfg->vector); - msg->data = - MSI_DATA_TRIGGER_EDGE | - MSI_DATA_LEVEL_ASSERT | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - MSI_DATA_DELIVERY_FIXED: - MSI_DATA_DELIVERY_LOWPRI) | - MSI_DATA_VECTOR(cfg->vector); - } return err; } @@ -3288,33 +3155,6 @@ static struct irq_chip msi_chip = { .irq_retrigger = ioapic_retrigger_irq, }; -/* - * Map the PCI dev to the corresponding remapping hardware unit - * and allocate 'nvec' consecutive interrupt-remapping table entries - * in it. - */ -static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) -{ - struct intel_iommu *iommu; - int index; - - iommu = map_dev_to_ir(dev); - if (!iommu) { - printk(KERN_ERR - "Unable to map PCI %s to iommu\n", pci_name(dev)); - return -ENOENT; - } - - index = alloc_irte(iommu, irq, nvec); - if (index < 0) { - printk(KERN_ERR - "Unable to allocate %d IRTE for PCI %s\n", nvec, - pci_name(dev)); - return -ENOSPC; - } - return index; -} - static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) { struct irq_chip *chip = &msi_chip; @@ -3345,7 +3185,6 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) int node, ret, sub_handle, index = 0; unsigned int irq, irq_want; struct msi_desc *msidesc; - struct intel_iommu *iommu = NULL; /* x86 doesn't support multiple MSI yet */ if (type == PCI_CAP_ID_MSI && nvec > 1) @@ -3359,7 +3198,7 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) if (irq == 0) return -1; irq_want = irq + 1; - if (!intr_remapping_enabled) + if (!irq_remapping_enabled) goto no_ir; if (!sub_handle) { @@ -3367,23 +3206,16 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) * allocate the consecutive block of IRTE's * for 'nvec' */ - index = msi_alloc_irte(dev, irq, nvec); + index = msi_alloc_remapped_irq(dev, irq, nvec); if (index < 0) { ret = index; goto error; } } else { - iommu = map_dev_to_ir(dev); - if (!iommu) { - ret = -ENOENT; + ret = msi_setup_remapped_irq(dev, irq, index, + sub_handle); + if (ret < 0) goto error; - } - /* - * setup the mapping between the irq and the IRTE - * base index, the sub_handle pointing to the - * appropriate interrupt remap table entry. - */ - set_irte_irq(irq, iommu, index, sub_handle); } no_ir: ret = setup_msi_irq(dev, msidesc, irq); @@ -3501,15 +3333,8 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id) struct msi_msg msg; int ret; - if (intr_remapping_enabled) { - struct intel_iommu *iommu = map_hpet_to_ir(id); - int index; - - if (!iommu) - return -1; - - index = alloc_irte(iommu, irq, 1); - if (index < 0) + if (irq_remapping_enabled) { + if (!setup_hpet_msi_remapped(irq, id)) return -1; } @@ -3888,8 +3713,8 @@ void __init setup_ioapic_dest(void) else mask = apic->target_cpus(); - if (intr_remapping_enabled) - ir_ioapic_set_affinity(idata, mask, false); + if (irq_remapping_enabled) + set_remapped_irq_affinity(idata, mask, false); else ioapic_set_affinity(idata, mask, false); } diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 459e78cbf61..07b0c0db466 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -2401,7 +2401,7 @@ static void __exit apm_exit(void) * (pm_idle), Wait for all processors to update cached/local * copies of pm_idle before proceeding. */ - cpu_idle_wait(); + kick_all_cpus_sync(); } if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0) && (apm_info.connection_version > 0x0100)) { diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index c0276d5d9bd..297edb1b1fb 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -945,9 +945,10 @@ struct mce_info { atomic_t inuse; struct task_struct *t; __u64 paddr; + int restartable; } mce_info[MCE_INFO_MAX]; -static void mce_save_info(__u64 addr) +static void mce_save_info(__u64 addr, int c) { struct mce_info *mi; @@ -955,6 +956,7 @@ static void mce_save_info(__u64 addr) if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) { mi->t = current; mi->paddr = addr; + mi->restartable = c; return; } } @@ -1130,7 +1132,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) mce_panic("Fatal machine check on current CPU", &m, msg); if (worst == MCE_AR_SEVERITY) { /* schedule action before return to userland */ - mce_save_info(m.addr); + mce_save_info(m.addr, m.mcgstatus & MCG_STATUS_RIPV); set_thread_flag(TIF_MCE_NOTIFY); } else if (kill_it) { force_sig(SIGBUS, current); @@ -1179,7 +1181,13 @@ void mce_notify_process(void) pr_err("Uncorrected hardware memory error in user-access at %llx", mi->paddr); - if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0) { + /* + * We must call memory_failure() here even if the current process is + * doomed. We still need to mark the page as poisoned and alert any + * other users of the page. + */ + if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0 || + mi->restartable == 0) { pr_err("Memory error not recovered"); force_sig(SIGBUS, current); } diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c deleted file mode 100644 index 43e9ccf4494..00000000000 --- a/arch/x86/kernel/init_task.c +++ /dev/null @@ -1,42 +0,0 @@ -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/init.h> -#include <linux/init_task.h> -#include <linux/fs.h> -#include <linux/mqueue.h> - -#include <asm/uaccess.h> -#include <asm/pgtable.h> -#include <asm/desc.h> - -static struct signal_struct init_signals = INIT_SIGNALS(init_signals); -static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); - -/* - * Initial thread structure. - * - * We need to make sure that this is THREAD_SIZE aligned due to the - * way process stacks are handled. This is done by having a special - * "init_task" linker map entry.. - */ -union thread_union init_thread_union __init_task_data = - { INIT_THREAD_INFO(init_task) }; - -/* - * Initial task structure. - * - * All other task structs will be allocated on slabs in fork.c - */ -struct task_struct init_task = INIT_TASK(init_task); -EXPORT_SYMBOL(init_task); - -/* - * per-CPU TSS segments. Threads are completely 'soft' on Linux, - * no more per-task TSS's. The TSS size is kept cacheline-aligned - * so they are allowed to end up in the .data..cacheline_aligned - * section. Since TSS's are completely CPU-local, we want them - * on exact cacheline boundaries, to eliminate cacheline ping-pong. - */ -DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; - diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 58b7f27cb3e..344faf8d0d6 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -127,8 +127,8 @@ void __cpuinit irq_ctx_init(int cpu) return; irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), - THREAD_FLAGS, - THREAD_ORDER)); + THREADINFO_GFP, + THREAD_SIZE_ORDER)); memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); irqctx->tinfo.cpu = cpu; irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; @@ -137,8 +137,8 @@ void __cpuinit irq_ctx_init(int cpu) per_cpu(hardirq_ctx, cpu) = irqctx; irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), - THREAD_FLAGS, - THREAD_ORDER)); + THREADINFO_GFP, + THREAD_SIZE_ORDER)); memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); irqctx->tinfo.cpu = cpu; irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 3ca42d0e43a..0327e2b3c40 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -147,12 +147,6 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) memset(csig, 0, sizeof(*csig)); - if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || - cpu_has(c, X86_FEATURE_IA64)) { - pr_err("CPU%d not a capable Intel processor\n", cpu_num); - return -1; - } - csig->sig = cpuid_eax(0x00000001); if ((c->x86_model >= 5) || (c->x86 > 6)) { @@ -463,6 +457,14 @@ static struct microcode_ops microcode_intel_ops = { struct microcode_ops * __init init_intel_microcode(void) { + struct cpuinfo_x86 *c = &cpu_data(0); + + if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || + cpu_has(c, X86_FEATURE_IA64)) { + pr_err("Intel CPU family 0x%x not supported\n", c->x86); + return NULL; + } + return µcode_intel_ops; } diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 47acaf31916..585be4bd71a 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -31,14 +31,6 @@ #include <asm/nmi.h> #include <asm/x86_init.h> -#define NMI_MAX_NAMELEN 16 -struct nmiaction { - struct list_head list; - nmi_handler_t handler; - unsigned int flags; - char *name; -}; - struct nmi_desc { spinlock_t lock; struct list_head head; @@ -54,6 +46,14 @@ static struct nmi_desc nmi_desc[NMI_MAX] = .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock), .head = LIST_HEAD_INIT(nmi_desc[1].head), }, + { + .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[2].lock), + .head = LIST_HEAD_INIT(nmi_desc[2].head), + }, + { + .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[3].lock), + .head = LIST_HEAD_INIT(nmi_desc[3].head), + }, }; @@ -107,11 +107,14 @@ static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, return handled; } -static int __setup_nmi(unsigned int type, struct nmiaction *action) +int __register_nmi_handler(unsigned int type, struct nmiaction *action) { struct nmi_desc *desc = nmi_to_desc(type); unsigned long flags; + if (!action->handler) + return -EINVAL; + spin_lock_irqsave(&desc->lock, flags); /* @@ -120,6 +123,8 @@ static int __setup_nmi(unsigned int type, struct nmiaction *action) * to manage expectations */ WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head)); + WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head)); + WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head)); /* * some handlers need to be executed first otherwise a fake @@ -133,8 +138,9 @@ static int __setup_nmi(unsigned int type, struct nmiaction *action) spin_unlock_irqrestore(&desc->lock, flags); return 0; } +EXPORT_SYMBOL(__register_nmi_handler); -static struct nmiaction *__free_nmi(unsigned int type, const char *name) +void unregister_nmi_handler(unsigned int type, const char *name) { struct nmi_desc *desc = nmi_to_desc(type); struct nmiaction *n; @@ -157,61 +163,16 @@ static struct nmiaction *__free_nmi(unsigned int type, const char *name) spin_unlock_irqrestore(&desc->lock, flags); synchronize_rcu(); - return (n); } - -int register_nmi_handler(unsigned int type, nmi_handler_t handler, - unsigned long nmiflags, const char *devname) -{ - struct nmiaction *action; - int retval = -ENOMEM; - - if (!handler) - return -EINVAL; - - action = kzalloc(sizeof(struct nmiaction), GFP_KERNEL); - if (!action) - goto fail_action; - - action->handler = handler; - action->flags = nmiflags; - action->name = kstrndup(devname, NMI_MAX_NAMELEN, GFP_KERNEL); - if (!action->name) - goto fail_action_name; - - retval = __setup_nmi(type, action); - - if (retval) - goto fail_setup_nmi; - - return retval; - -fail_setup_nmi: - kfree(action->name); -fail_action_name: - kfree(action); -fail_action: - - return retval; -} -EXPORT_SYMBOL_GPL(register_nmi_handler); - -void unregister_nmi_handler(unsigned int type, const char *name) -{ - struct nmiaction *a; - - a = __free_nmi(type, name); - if (a) { - kfree(a->name); - kfree(a); - } -} - EXPORT_SYMBOL_GPL(unregister_nmi_handler); static notrace __kprobes void pci_serr_error(unsigned char reason, struct pt_regs *regs) { + /* check to see if anyone registered against these types of errors */ + if (nmi_handle(NMI_SERR, regs, false)) + return; + pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", reason, smp_processor_id()); @@ -241,6 +202,10 @@ io_check_error(unsigned char reason, struct pt_regs *regs) { unsigned long i; + /* check to see if anyone registered against these types of errors */ + if (nmi_handle(NMI_IO_CHECK, regs, false)) + return; + pr_emerg( "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", reason, smp_processor_id()); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 857adffb708..dc8ca8ea78c 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -27,6 +27,15 @@ #include <asm/debugreg.h> #include <asm/nmi.h> +/* + * per-CPU TSS segments. Threads are completely 'soft' on Linux, + * no more per-task TSS's. The TSS size is kept cacheline-aligned + * so they are allowed to end up in the .data..cacheline_aligned + * section. Since TSS's are completely CPU-local, we want them + * on exact cacheline boundaries, to eliminate cacheline ping-pong. + */ +DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; + #ifdef CONFIG_X86_64 static DEFINE_PER_CPU(unsigned char, is_idle); static ATOMIC_NOTIFIER_HEAD(idle_notifier); @@ -67,10 +76,9 @@ void free_thread_xstate(struct task_struct *tsk) fpu_free(&tsk->thread.fpu); } -void free_thread_info(struct thread_info *ti) +void arch_release_task_struct(struct task_struct *tsk) { - free_thread_xstate(ti->task); - free_pages((unsigned long)ti, THREAD_ORDER); + free_thread_xstate(tsk); } void arch_task_cache_init(void) @@ -516,26 +524,6 @@ void stop_this_cpu(void *dummy) } } -static void do_nothing(void *unused) -{ -} - -/* - * cpu_idle_wait - Used to ensure that all the CPUs discard old value of - * pm_idle and update to new pm_idle value. Required while changing pm_idle - * handler on SMP systems. - * - * Caller must have changed pm_idle to the new value before the call. Old - * pm_idle value will not be used by any CPU after the return of this function. - */ -void cpu_idle_wait(void) -{ - smp_mb(); - /* kick all the CPUs so that they exit out of pm_idle */ - smp_call_function(do_nothing, NULL, 1); -} -EXPORT_SYMBOL_GPL(cpu_idle_wait); - /* Default MONITOR/MWAIT with no hints, used for default C1 state */ static void mwait_idle(void) { diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 685845cf16e..13b1990c7c5 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1480,7 +1480,11 @@ long syscall_trace_enter(struct pt_regs *regs) regs->flags |= X86_EFLAGS_TF; /* do the secure computing check first */ - secure_computing(regs->orig_ax); + if (secure_computing(regs->orig_ax)) { + /* seccomp failures shouldn't expose any additional code. */ + ret = -1L; + goto out; + } if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) ret = -1L; @@ -1505,6 +1509,7 @@ long syscall_trace_enter(struct pt_regs *regs) regs->dx, regs->r10); #endif +out: return ret ?: regs->orig_ax; } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6e1e406038c..3acaf51dfdd 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -76,20 +76,8 @@ /* State of each CPU */ DEFINE_PER_CPU(int, cpu_state) = { 0 }; -/* Store all idle threads, this can be reused instead of creating -* a new thread. Also avoids complicated thread destroy functionality -* for idle threads. -*/ #ifdef CONFIG_HOTPLUG_CPU /* - * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is - * removed after init for !CONFIG_HOTPLUG_CPU. - */ -static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); -#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) -#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p)) - -/* * We need this for trampoline_base protection from concurrent accesses when * off- and onlining cores wildly. */ @@ -97,20 +85,16 @@ static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex); void cpu_hotplug_driver_lock(void) { - mutex_lock(&x86_cpu_hotplug_driver_mutex); + mutex_lock(&x86_cpu_hotplug_driver_mutex); } void cpu_hotplug_driver_unlock(void) { - mutex_unlock(&x86_cpu_hotplug_driver_mutex); + mutex_unlock(&x86_cpu_hotplug_driver_mutex); } ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; } ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; } -#else -static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; -#define get_idle_for_cpu(x) (idle_thread_array[(x)]) -#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p)) #endif /* Number of siblings per CPU package */ @@ -618,22 +602,6 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) return (send_status | accept_status); } -struct create_idle { - struct work_struct work; - struct task_struct *idle; - struct completion done; - int cpu; -}; - -static void __cpuinit do_fork_idle(struct work_struct *work) -{ - struct create_idle *c_idle = - container_of(work, struct create_idle, work); - - c_idle->idle = fork_idle(c_idle->cpu); - complete(&c_idle->done); -} - /* reduce the number of lines printed when booting a large cpu count system */ static void __cpuinit announce_cpu(int cpu, int apicid) { @@ -660,58 +628,31 @@ static void __cpuinit announce_cpu(int cpu, int apicid) * Returns zero if CPU booted OK, else error code from * ->wakeup_secondary_cpu. */ -static int __cpuinit do_boot_cpu(int apicid, int cpu) +static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) { unsigned long boot_error = 0; unsigned long start_ip; int timeout; - struct create_idle c_idle = { - .cpu = cpu, - .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), - }; - - INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle); alternatives_smp_switch(1); - c_idle.idle = get_idle_for_cpu(cpu); - - /* - * We can't use kernel_thread since we must avoid to - * reschedule the child. - */ - if (c_idle.idle) { - c_idle.idle->thread.sp = (unsigned long) (((struct pt_regs *) - (THREAD_SIZE + task_stack_page(c_idle.idle))) - 1); - init_idle(c_idle.idle, cpu); - goto do_rest; - } + idle->thread.sp = (unsigned long) (((struct pt_regs *) + (THREAD_SIZE + task_stack_page(idle))) - 1); + per_cpu(current_task, cpu) = idle; - schedule_work(&c_idle.work); - wait_for_completion(&c_idle.done); - - if (IS_ERR(c_idle.idle)) { - printk("failed fork for CPU %d\n", cpu); - destroy_work_on_stack(&c_idle.work); - return PTR_ERR(c_idle.idle); - } - - set_idle_for_cpu(cpu, c_idle.idle); -do_rest: - per_cpu(current_task, cpu) = c_idle.idle; #ifdef CONFIG_X86_32 /* Stack for startup_32 can be just as for start_secondary onwards */ irq_ctx_init(cpu); #else - clear_tsk_thread_flag(c_idle.idle, TIF_FORK); + clear_tsk_thread_flag(idle, TIF_FORK); initial_gs = per_cpu_offset(cpu); per_cpu(kernel_stack, cpu) = - (unsigned long)task_stack_page(c_idle.idle) - + (unsigned long)task_stack_page(idle) - KERNEL_STACK_OFFSET + THREAD_SIZE; #endif early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); initial_code = (unsigned long)start_secondary; - stack_start = c_idle.idle->thread.sp; + stack_start = idle->thread.sp; /* start_ip had better be page-aligned! */ start_ip = trampoline_address(); @@ -813,12 +754,10 @@ do_rest: */ smpboot_restore_warm_reset_vector(); } - - destroy_work_on_stack(&c_idle.work); return boot_error; } -int __cpuinit native_cpu_up(unsigned int cpu) +int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle) { int apicid = apic->cpu_present_to_apicid(cpu); unsigned long flags; @@ -851,7 +790,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; - err = do_boot_cpu(apicid, cpu); + err = do_boot_cpu(apicid, cpu, tidle); if (err) { pr_debug("do_boot_cpu failed %d\n", err); return -EIO; |