diff options
Diffstat (limited to 'arch/x86')
34 files changed, 765 insertions, 617 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 98876f55a2e..c940cb6f040 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -77,11 +77,11 @@ config X86 select GENERIC_CLOCKEVENTS_MIN_ADJUST select IRQ_FORCED_THREADING select USE_GENERIC_SMP_HELPERS if SMP - select HAVE_BPF_JIT if (X86_64 && NET) + select HAVE_BPF_JIT if X86_64 select CLKEVT_I8253 select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_IOMAP - select DCACHE_WORD_ACCESS if !DEBUG_PAGEALLOC + select DCACHE_WORD_ACCESS select GENERIC_SMP_IDLE_THREAD config INSTRUCTION_DECODER diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 3e48b26f67d..277418ff8b5 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -134,6 +134,9 @@ KBUILD_CFLAGS += $(call cc-option,-mno-avx,) KBUILD_CFLAGS += $(mflags-y) KBUILD_AFLAGS += $(mflags-y) +archscripts: + $(Q)$(MAKE) $(build)=arch/x86/tools relocs + ### # Syscall table generation diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index fd55a2ff3ad..e398bb5d63b 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -40,13 +40,12 @@ OBJCOPYFLAGS_vmlinux.bin := -R .comment -S $(obj)/vmlinux.bin: vmlinux FORCE $(call if_changed,objcopy) +targets += vmlinux.bin.all vmlinux.relocs -targets += vmlinux.bin.all vmlinux.relocs relocs -hostprogs-$(CONFIG_X86_NEED_RELOCS) += relocs - +CMD_RELOCS = arch/x86/tools/relocs quiet_cmd_relocs = RELOCS $@ - cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $< -$(obj)/vmlinux.relocs: vmlinux $(obj)/relocs FORCE + cmd_relocs = $(CMD_RELOCS) $< > $@;$(CMD_RELOCS) --abs-relocs $< +$(obj)/vmlinux.relocs: vmlinux FORCE $(call if_changed,relocs) vmlinux.bin.all-y := $(obj)/vmlinux.bin diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 4824fb45560..07b3a68d2d2 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -294,8 +294,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) /* OK, This is the point of no return */ set_personality(PER_LINUX); - set_thread_flag(TIF_IA32); - current->mm->context.ia32_compat = 1; + set_personality_ia32(false); setup_new_exec(bprm); diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 47d99934580..5fb9bbbd2f1 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -1,45 +1,101 @@ -#ifndef _ASM_X86_IRQ_REMAPPING_H -#define _ASM_X86_IRQ_REMAPPING_H +/* + * Copyright (C) 2012 Advanced Micro Devices, Inc. + * Author: Joerg Roedel <joerg.roedel@amd.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * This header file contains the interface of the interrupt remapping code to + * the x86 interrupt management code. + */ -#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8) +#ifndef __X86_IRQ_REMAPPING_H +#define __X86_IRQ_REMAPPING_H + +#include <asm/io_apic.h> #ifdef CONFIG_IRQ_REMAP -static void irq_remap_modify_chip_defaults(struct irq_chip *chip); -static inline void prepare_irte(struct irte *irte, int vector, - unsigned int dest) + +extern int irq_remapping_enabled; + +extern void setup_irq_remapping_ops(void); +extern int irq_remapping_supported(void); +extern int irq_remapping_prepare(void); +extern int irq_remapping_enable(void); +extern void irq_remapping_disable(void); +extern int irq_remapping_reenable(int); +extern int irq_remap_enable_fault_handling(void); +extern int setup_ioapic_remapped_entry(int irq, + struct IO_APIC_route_entry *entry, + unsigned int destination, + int vector, + struct io_apic_irq_attr *attr); +extern int set_remapped_irq_affinity(struct irq_data *data, + const struct cpumask *mask, + bool force); +extern void free_remapped_irq(int irq); +extern void compose_remapped_msi_msg(struct pci_dev *pdev, + unsigned int irq, unsigned int dest, + struct msi_msg *msg, u8 hpet_id); +extern int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec); +extern int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, + int index, int sub_handle); +extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id); + +#else /* CONFIG_IRQ_REMAP */ + +#define irq_remapping_enabled 0 + +static inline void setup_irq_remapping_ops(void) { } +static inline int irq_remapping_supported(void) { return 0; } +static inline int irq_remapping_prepare(void) { return -ENODEV; } +static inline int irq_remapping_enable(void) { return -ENODEV; } +static inline void irq_remapping_disable(void) { } +static inline int irq_remapping_reenable(int eim) { return -ENODEV; } +static inline int irq_remap_enable_fault_handling(void) { return -ENODEV; } +static inline int setup_ioapic_remapped_entry(int irq, + struct IO_APIC_route_entry *entry, + unsigned int destination, + int vector, + struct io_apic_irq_attr *attr) +{ + return -ENODEV; +} +static inline int set_remapped_irq_affinity(struct irq_data *data, + const struct cpumask *mask, + bool force) { - memset(irte, 0, sizeof(*irte)); - - irte->present = 1; - irte->dst_mode = apic->irq_dest_mode; - /* - * Trigger mode in the IRTE will always be edge, and for IO-APIC, the - * actual level or edge trigger will be setup in the IO-APIC - * RTE. This will help simplify level triggered irq migration. - * For more details, see the comments (in io_apic.c) explainig IO-APIC - * irq migration in the presence of interrupt-remapping. - */ - irte->trigger_mode = 0; - irte->dlvry_mode = apic->irq_delivery_mode; - irte->vector = vector; - irte->dest_id = IRTE_DEST(dest); - irte->redir_hint = 1; + return 0; } -static inline bool irq_remapped(struct irq_cfg *cfg) +static inline void free_remapped_irq(int irq) { } +static inline void compose_remapped_msi_msg(struct pci_dev *pdev, + unsigned int irq, unsigned int dest, + struct msi_msg *msg, u8 hpet_id) { - return cfg->irq_2_iommu.iommu != NULL; } -#else -static void prepare_irte(struct irte *irte, int vector, unsigned int dest) +static inline int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) { + return -ENODEV; } -static inline bool irq_remapped(struct irq_cfg *cfg) +static inline int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, + int index, int sub_handle) { - return false; + return -ENODEV; } -static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip) +static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id) { + return -ENODEV; } -#endif +#endif /* CONFIG_IRQ_REMAP */ -#endif /* _ASM_X86_IRQ_REMAPPING_H */ +#endif /* __X86_IRQ_REMAPPING_H */ diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 734c3767cfa..183922e13de 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -170,6 +170,9 @@ static inline int kvm_para_available(void) unsigned int eax, ebx, ecx, edx; char signature[13]; + if (boot_cpu_data.cpuid_level < 0) + return 0; /* So we don't blow up on old processors */ + cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx); memcpy(signature + 0, &ebx, 4); memcpy(signature + 4, &ecx, 4); diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index fd3f9f18cf3..0e3793b821e 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -27,6 +27,8 @@ void arch_trigger_all_cpu_backtrace(void); enum { NMI_LOCAL=0, NMI_UNKNOWN, + NMI_SERR, + NMI_IO_CHECK, NMI_MAX }; @@ -35,8 +37,24 @@ enum { typedef int (*nmi_handler_t)(unsigned int, struct pt_regs *); -int register_nmi_handler(unsigned int, nmi_handler_t, unsigned long, - const char *); +struct nmiaction { + struct list_head list; + nmi_handler_t handler; + unsigned long flags; + const char *name; +}; + +#define register_nmi_handler(t, fn, fg, n) \ +({ \ + static struct nmiaction fn##_na = { \ + .handler = (fn), \ + .name = (n), \ + .flags = (fg), \ + }; \ + __register_nmi_handler((t), &fn##_na); \ +}) + +int __register_nmi_handler(unsigned int, struct nmiaction *); void unregister_nmi_handler(unsigned int, const char *); diff --git a/arch/x86/include/asm/stat.h b/arch/x86/include/asm/stat.h index e0b1d9bbcbc..7b3ddc34858 100644 --- a/arch/x86/include/asm/stat.h +++ b/arch/x86/include/asm/stat.h @@ -25,6 +25,12 @@ struct stat { unsigned long __unused5; }; +/* We don't need to memset the whole thing just to initialize the padding */ +#define INIT_STRUCT_STAT_PADDING(st) do { \ + st.__unused4 = 0; \ + st.__unused5 = 0; \ +} while (0) + #define STAT64_HAS_BROKEN_ST_INO 1 /* This matches struct stat64 in glibc2.1, hence the absolutely @@ -63,6 +69,12 @@ struct stat64 { unsigned long long st_ino; }; +/* We don't need to memset the whole thing just to initialize the padding */ +#define INIT_STRUCT_STAT64_PADDING(st) do { \ + memset(&st.__pad0, 0, sizeof(st.__pad0)); \ + memset(&st.__pad3, 0, sizeof(st.__pad3)); \ +} while (0) + #else /* __i386__ */ struct stat { @@ -87,6 +99,15 @@ struct stat { unsigned long st_ctime_nsec; long __unused[3]; }; + +/* We don't need to memset the whole thing just to initialize the padding */ +#define INIT_STRUCT_STAT_PADDING(st) do { \ + st.__pad0 = 0; \ + st.__unused[0] = 0; \ + st.__unused[1] = 0; \ + st.__unused[2] = 0; \ +} while (0) + #endif /* for 32bit emulation and 32 bit kernels */ diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h index 6fe6767b712..e58f03b206c 100644 --- a/arch/x86/include/asm/word-at-a-time.h +++ b/arch/x86/include/asm/word-at-a-time.h @@ -43,4 +43,37 @@ static inline unsigned long has_zero(unsigned long a) return ((a - REPEAT_BYTE(0x01)) & ~a) & REPEAT_BYTE(0x80); } +/* + * Load an unaligned word from kernel space. + * + * In the (very unlikely) case of the word being a page-crosser + * and the next page not being mapped, take the exception and + * return zeroes in the non-existing part. + */ +static inline unsigned long load_unaligned_zeropad(const void *addr) +{ + unsigned long ret, dummy; + + asm( + "1:\tmov %2,%0\n" + "2:\n" + ".section .fixup,\"ax\"\n" + "3:\t" + "lea %2,%1\n\t" + "and %3,%1\n\t" + "mov (%1),%0\n\t" + "leal %2,%%ecx\n\t" + "andl %4,%%ecx\n\t" + "shll $3,%%ecx\n\t" + "shr %%cl,%0\n\t" + "jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + :"=&r" (ret),"=&c" (dummy) + :"m" (*(unsigned long *)addr), + "i" (-sizeof(unsigned long)), + "i" (sizeof(unsigned long)-1)); + return ret; +} + #endif /* _ASM_WORD_AT_A_TIME_H */ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index a415b1f4436..7c439fe4941 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -593,7 +593,7 @@ void __init acpi_set_irq_model_ioapic(void) #ifdef CONFIG_ACPI_HOTPLUG_CPU #include <acpi/processor.h> -static void __cpuinitdata acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) +static void __cpuinit acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) { #ifdef CONFIG_ACPI_NUMA int nid; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index edc24480469..3722179a49d 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -35,6 +35,7 @@ #include <linux/smp.h> #include <linux/mm.h> +#include <asm/irq_remapping.h> #include <asm/perf_event.h> #include <asm/x86_init.h> #include <asm/pgalloc.h> @@ -1441,8 +1442,8 @@ void __init bsp_end_local_APIC_setup(void) * Now that local APIC setup is completed for BP, configure the fault * handling for interrupt remapping. */ - if (intr_remapping_enabled) - enable_drhd_fault_handling(); + if (irq_remapping_enabled) + irq_remap_enable_fault_handling(); } @@ -1517,7 +1518,7 @@ void enable_x2apic(void) int __init enable_IR(void) { #ifdef CONFIG_IRQ_REMAP - if (!intr_remapping_supported()) { + if (!irq_remapping_supported()) { pr_debug("intr-remapping not supported\n"); return -1; } @@ -1528,7 +1529,7 @@ int __init enable_IR(void) return -1; } - return enable_intr_remapping(); + return irq_remapping_enable(); #endif return -1; } @@ -1537,10 +1538,13 @@ void __init enable_IR_x2apic(void) { unsigned long flags; int ret, x2apic_enabled = 0; - int dmar_table_init_ret; + int hardware_init_ret; - dmar_table_init_ret = dmar_table_init(); - if (dmar_table_init_ret && !x2apic_supported()) + /* Make sure irq_remap_ops are initialized */ + setup_irq_remapping_ops(); + + hardware_init_ret = irq_remapping_prepare(); + if (hardware_init_ret && !x2apic_supported()) return; ret = save_ioapic_entries(); @@ -1556,7 +1560,7 @@ void __init enable_IR_x2apic(void) if (x2apic_preenabled && nox2apic) disable_x2apic(); - if (dmar_table_init_ret) + if (hardware_init_ret) ret = -1; else ret = enable_IR(); @@ -2176,8 +2180,8 @@ static int lapic_suspend(void) local_irq_save(flags); disable_local_APIC(); - if (intr_remapping_enabled) - disable_intr_remapping(); + if (irq_remapping_enabled) + irq_remapping_disable(); local_irq_restore(flags); return 0; @@ -2193,7 +2197,7 @@ static void lapic_resume(void) return; local_irq_save(flags); - if (intr_remapping_enabled) { + if (irq_remapping_enabled) { /* * IO-APIC and PIC have their own resume routines. * We just mask them here to make sure the interrupt @@ -2245,8 +2249,8 @@ static void lapic_resume(void) apic_write(APIC_ESR, 0); apic_read(APIC_ESR); - if (intr_remapping_enabled) - reenable_intr_remapping(x2apic_mode); + if (irq_remapping_enabled) + irq_remapping_reenable(x2apic_mode); local_irq_restore(flags); } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index e88300d8e80..ef0648cd708 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -86,6 +86,22 @@ void __init set_io_apic_ops(const struct io_apic_ops *ops) io_apic_ops = *ops; } +#ifdef CONFIG_IRQ_REMAP +static void irq_remap_modify_chip_defaults(struct irq_chip *chip); +static inline bool irq_remapped(struct irq_cfg *cfg) +{ + return cfg->irq_2_iommu.iommu != NULL; +} +#else +static inline bool irq_remapped(struct irq_cfg *cfg) +{ + return false; +} +static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip) +{ +} +#endif + /* * Is the SiS APIC rmw bug present ? * -1 = don't know, 0 = no, 1 = yes @@ -1361,77 +1377,13 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg, fasteoi ? "fasteoi" : "edge"); } - -static int setup_ir_ioapic_entry(int irq, - struct IR_IO_APIC_route_entry *entry, - unsigned int destination, int vector, - struct io_apic_irq_attr *attr) -{ - int index; - struct irte irte; - int ioapic_id = mpc_ioapic_id(attr->ioapic); - struct intel_iommu *iommu = map_ioapic_to_ir(ioapic_id); - - if (!iommu) { - pr_warn("No mapping iommu for ioapic %d\n", ioapic_id); - return -ENODEV; - } - - index = alloc_irte(iommu, irq, 1); - if (index < 0) { - pr_warn("Failed to allocate IRTE for ioapic %d\n", ioapic_id); - return -ENOMEM; - } - - prepare_irte(&irte, vector, destination); - - /* Set source-id of interrupt request */ - set_ioapic_sid(&irte, ioapic_id); - - modify_irte(irq, &irte); - - apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: " - "Set IRTE entry (P:%d FPD:%d Dst_Mode:%d " - "Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X " - "Avail:%X Vector:%02X Dest:%08X " - "SID:%04X SQ:%X SVT:%X)\n", - attr->ioapic, irte.present, irte.fpd, irte.dst_mode, - irte.redir_hint, irte.trigger_mode, irte.dlvry_mode, - irte.avail, irte.vector, irte.dest_id, - irte.sid, irte.sq, irte.svt); - - memset(entry, 0, sizeof(*entry)); - - entry->index2 = (index >> 15) & 0x1; - entry->zero = 0; - entry->format = 1; - entry->index = (index & 0x7fff); - /* - * IO-APIC RTE will be configured with virtual vector. - * irq handler will do the explicit EOI to the io-apic. - */ - entry->vector = attr->ioapic_pin; - entry->mask = 0; /* enable IRQ */ - entry->trigger = attr->trigger; - entry->polarity = attr->polarity; - - /* Mask level triggered irqs. - * Use IRQ_DELAYED_DISABLE for edge triggered irqs. - */ - if (attr->trigger) - entry->mask = 1; - - return 0; -} - static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, unsigned int destination, int vector, struct io_apic_irq_attr *attr) { - if (intr_remapping_enabled) - return setup_ir_ioapic_entry(irq, - (struct IR_IO_APIC_route_entry *)entry, - destination, vector, attr); + if (irq_remapping_enabled) + return setup_ioapic_remapped_entry(irq, entry, destination, + vector, attr); memset(entry, 0, sizeof(*entry)); @@ -1588,7 +1540,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, { struct IO_APIC_route_entry entry; - if (intr_remapping_enabled) + if (irq_remapping_enabled) return; memset(&entry, 0, sizeof(entry)); @@ -1674,7 +1626,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx) printk(KERN_DEBUG ".... IRQ redirection table:\n"); - if (intr_remapping_enabled) { + if (irq_remapping_enabled) { printk(KERN_DEBUG " NR Indx Fmt Mask Trig IRR" " Pol Stat Indx2 Zero Vect:\n"); } else { @@ -1683,7 +1635,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx) } for (i = 0; i <= reg_01.bits.entries; i++) { - if (intr_remapping_enabled) { + if (irq_remapping_enabled) { struct IO_APIC_route_entry entry; struct IR_IO_APIC_route_entry *ir_entry; @@ -2050,7 +2002,7 @@ void disable_IO_APIC(void) * IOAPIC RTE as well as interrupt-remapping table entry). * As this gets called during crash dump, keep this simple for now. */ - if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) { + if (ioapic_i8259.pin != -1 && !irq_remapping_enabled) { struct IO_APIC_route_entry entry; memset(&entry, 0, sizeof(entry)); @@ -2074,7 +2026,7 @@ void disable_IO_APIC(void) * Use virtual wire A mode when interrupt remapping is enabled. */ if (cpu_has_apic || apic_from_smp_config()) - disconnect_bsp_APIC(!intr_remapping_enabled && + disconnect_bsp_APIC(!irq_remapping_enabled && ioapic_i8259.pin != -1); } @@ -2390,71 +2342,6 @@ ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, return ret; } -#ifdef CONFIG_IRQ_REMAP - -/* - * Migrate the IO-APIC irq in the presence of intr-remapping. - * - * For both level and edge triggered, irq migration is a simple atomic - * update(of vector and cpu destination) of IRTE and flush the hardware cache. - * - * For level triggered, we eliminate the io-apic RTE modification (with the - * updated vector information), by using a virtual vector (io-apic pin number). - * Real vector that is used for interrupting cpu will be coming from - * the interrupt-remapping table entry. - * - * As the migration is a simple atomic update of IRTE, the same mechanism - * is used to migrate MSI irq's in the presence of interrupt-remapping. - */ -static int -ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) -{ - struct irq_cfg *cfg = data->chip_data; - unsigned int dest, irq = data->irq; - struct irte irte; - - if (!cpumask_intersects(mask, cpu_online_mask)) - return -EINVAL; - - if (get_irte(irq, &irte)) - return -EBUSY; - - if (assign_irq_vector(irq, cfg, mask)) - return -EBUSY; - - dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); - - irte.vector = cfg->vector; - irte.dest_id = IRTE_DEST(dest); - - /* - * Atomically updates the IRTE with the new destination, vector - * and flushes the interrupt entry cache. - */ - modify_irte(irq, &irte); - - /* - * After this point, all the interrupts will start arriving - * at the new destination. So, time to cleanup the previous - * vector allocation. - */ - if (cfg->move_in_progress) - send_cleanup_vector(cfg); - - cpumask_copy(data->affinity, mask); - return 0; -} - -#else -static inline int -ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) -{ - return 0; -} -#endif - asmlinkage void smp_irq_move_cleanup_interrupt(void) { unsigned vector, me; @@ -2699,7 +2586,7 @@ static void irq_remap_modify_chip_defaults(struct irq_chip *chip) chip->irq_eoi = ir_ack_apic_level; #ifdef CONFIG_SMP - chip->irq_set_affinity = ir_ioapic_set_affinity; + chip->irq_set_affinity = set_remapped_irq_affinity; #endif } #endif /* CONFIG_IRQ_REMAP */ @@ -2912,7 +2799,7 @@ static inline void __init check_timer(void) * 8259A. */ if (pin1 == -1) { - if (intr_remapping_enabled) + if (irq_remapping_enabled) panic("BIOS bug: timer not connected to IO-APIC"); pin1 = pin2; apic1 = apic2; @@ -2945,7 +2832,7 @@ static inline void __init check_timer(void) clear_IO_APIC_pin(0, pin1); goto out; } - if (intr_remapping_enabled) + if (irq_remapping_enabled) panic("timer doesn't work through Interrupt-remapped IO-APIC"); local_irq_disable(); clear_IO_APIC_pin(apic1, pin1); @@ -3169,7 +3056,7 @@ void destroy_irq(unsigned int irq) irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); if (irq_remapped(cfg)) - free_irte(irq); + free_remapped_irq(irq); raw_spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq, cfg); raw_spin_unlock_irqrestore(&vector_lock, flags); @@ -3198,54 +3085,34 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); if (irq_remapped(cfg)) { - struct irte irte; - int ir_index; - u16 sub_handle; - - ir_index = map_irq_to_irte_handle(irq, &sub_handle); - BUG_ON(ir_index == -1); - - prepare_irte(&irte, cfg->vector, dest); - - /* Set source-id of interrupt request */ - if (pdev) - set_msi_sid(&irte, pdev); - else - set_hpet_sid(&irte, hpet_id); - - modify_irte(irq, &irte); + compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id); + return err; + } + if (x2apic_enabled()) + msg->address_hi = MSI_ADDR_BASE_HI | + MSI_ADDR_EXT_DEST_ID(dest); + else msg->address_hi = MSI_ADDR_BASE_HI; - msg->data = sub_handle; - msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | - MSI_ADDR_IR_SHV | - MSI_ADDR_IR_INDEX1(ir_index) | - MSI_ADDR_IR_INDEX2(ir_index); - } else { - if (x2apic_enabled()) - msg->address_hi = MSI_ADDR_BASE_HI | - MSI_ADDR_EXT_DEST_ID(dest); - else - msg->address_hi = MSI_ADDR_BASE_HI; - msg->address_lo = - MSI_ADDR_BASE_LO | - ((apic->irq_dest_mode == 0) ? - MSI_ADDR_DEST_MODE_PHYSICAL: - MSI_ADDR_DEST_MODE_LOGICAL) | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - MSI_ADDR_REDIRECTION_CPU: - MSI_ADDR_REDIRECTION_LOWPRI) | - MSI_ADDR_DEST_ID(dest); + msg->address_lo = + MSI_ADDR_BASE_LO | + ((apic->irq_dest_mode == 0) ? + MSI_ADDR_DEST_MODE_PHYSICAL: + MSI_ADDR_DEST_MODE_LOGICAL) | + ((apic->irq_delivery_mode != dest_LowestPrio) ? + MSI_ADDR_REDIRECTION_CPU: + MSI_ADDR_REDIRECTION_LOWPRI) | + MSI_ADDR_DEST_ID(dest); + + msg->data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + ((apic->irq_delivery_mode != dest_LowestPrio) ? + MSI_DATA_DELIVERY_FIXED: + MSI_DATA_DELIVERY_LOWPRI) | + MSI_DATA_VECTOR(cfg->vector); - msg->data = - MSI_DATA_TRIGGER_EDGE | - MSI_DATA_LEVEL_ASSERT | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - MSI_DATA_DELIVERY_FIXED: - MSI_DATA_DELIVERY_LOWPRI) | - MSI_DATA_VECTOR(cfg->vector); - } return err; } @@ -3288,33 +3155,6 @@ static struct irq_chip msi_chip = { .irq_retrigger = ioapic_retrigger_irq, }; -/* - * Map the PCI dev to the corresponding remapping hardware unit - * and allocate 'nvec' consecutive interrupt-remapping table entries - * in it. - */ -static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) -{ - struct intel_iommu *iommu; - int index; - - iommu = map_dev_to_ir(dev); - if (!iommu) { - printk(KERN_ERR - "Unable to map PCI %s to iommu\n", pci_name(dev)); - return -ENOENT; - } - - index = alloc_irte(iommu, irq, nvec); - if (index < 0) { - printk(KERN_ERR - "Unable to allocate %d IRTE for PCI %s\n", nvec, - pci_name(dev)); - return -ENOSPC; - } - return index; -} - static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) { struct irq_chip *chip = &msi_chip; @@ -3345,7 +3185,6 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) int node, ret, sub_handle, index = 0; unsigned int irq, irq_want; struct msi_desc *msidesc; - struct intel_iommu *iommu = NULL; /* x86 doesn't support multiple MSI yet */ if (type == PCI_CAP_ID_MSI && nvec > 1) @@ -3359,7 +3198,7 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) if (irq == 0) return -1; irq_want = irq + 1; - if (!intr_remapping_enabled) + if (!irq_remapping_enabled) goto no_ir; if (!sub_handle) { @@ -3367,23 +3206,16 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) * allocate the consecutive block of IRTE's * for 'nvec' */ - index = msi_alloc_irte(dev, irq, nvec); + index = msi_alloc_remapped_irq(dev, irq, nvec); if (index < 0) { ret = index; goto error; } } else { - iommu = map_dev_to_ir(dev); - if (!iommu) { - ret = -ENOENT; + ret = msi_setup_remapped_irq(dev, irq, index, + sub_handle); + if (ret < 0) goto error; - } - /* - * setup the mapping between the irq and the IRTE - * base index, the sub_handle pointing to the - * appropriate interrupt remap table entry. - */ - set_irte_irq(irq, iommu, index, sub_handle); } no_ir: ret = setup_msi_irq(dev, msidesc, irq); @@ -3501,15 +3333,8 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id) struct msi_msg msg; int ret; - if (intr_remapping_enabled) { - struct intel_iommu *iommu = map_hpet_to_ir(id); - int index; - - if (!iommu) - return -1; - - index = alloc_irte(iommu, irq, 1); - if (index < 0) + if (irq_remapping_enabled) { + if (!setup_hpet_msi_remapped(irq, id)) return -1; } @@ -3888,8 +3713,8 @@ void __init setup_ioapic_dest(void) else mask = apic->target_cpus(); - if (intr_remapping_enabled) - ir_ioapic_set_affinity(idata, mask, false); + if (irq_remapping_enabled) + set_remapped_irq_affinity(idata, mask, false); else ioapic_set_affinity(idata, mask, false); } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 1c67ca100e4..146bb6218ee 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -580,6 +580,24 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) } } + /* re-enable TopologyExtensions if switched off by BIOS */ + if ((c->x86 == 0x15) && + (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) && + !cpu_has(c, X86_FEATURE_TOPOEXT)) { + u64 val; + + if (!rdmsrl_amd_safe(0xc0011005, &val)) { + val |= 1ULL << 54; + wrmsrl_amd_safe(0xc0011005, val); + rdmsrl(0xc0011005, val); + if (val & (1ULL << 54)) { + set_cpu_cap(c, X86_FEATURE_TOPOEXT); + printk(KERN_INFO FW_INFO "CPU: Re-enabling " + "disabled Topology Extensions Support\n"); + } + } + } + cpu_detect_cache_sizes(c); /* Multi core CPU? */ diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index d086a09c087..11c9166c333 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -945,9 +945,10 @@ struct mce_info { atomic_t inuse; struct task_struct *t; __u64 paddr; + int restartable; } mce_info[MCE_INFO_MAX]; -static void mce_save_info(__u64 addr) +static void mce_save_info(__u64 addr, int c) { struct mce_info *mi; @@ -955,6 +956,7 @@ static void mce_save_info(__u64 addr) if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) { mi->t = current; mi->paddr = addr; + mi->restartable = c; return; } } @@ -1130,7 +1132,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) mce_panic("Fatal machine check on current CPU", &m, msg); if (worst == MCE_AR_SEVERITY) { /* schedule action before return to userland */ - mce_save_info(m.addr); + mce_save_info(m.addr, m.mcgstatus & MCG_STATUS_RIPV); set_thread_flag(TIF_MCE_NOTIFY); } else if (kill_it) { force_sig(SIGBUS, current); @@ -1179,7 +1181,13 @@ void mce_notify_process(void) pr_err("Uncorrected hardware memory error in user-access at %llx", mi->paddr); - if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0) { + /* + * We must call memory_failure() here even if the current process is + * doomed. We still need to mark the page as poisoned and alert any + * other users of the page. + */ + if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0 || + mi->restartable == 0) { pr_err("Memory error not recovered"); force_sig(SIGBUS, current); } diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index b8ba6e4a27e..e554e5ad2fe 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -79,7 +79,6 @@ struct kvm_task_sleep_node { u32 token; int cpu; bool halted; - struct mm_struct *mm; }; static struct kvm_task_sleep_head { @@ -126,9 +125,7 @@ void kvm_async_pf_task_wait(u32 token) n.token = token; n.cpu = smp_processor_id(); - n.mm = current->active_mm; n.halted = idle || preempt_count() > 1; - atomic_inc(&n.mm->mm_count); init_waitqueue_head(&n.wq); hlist_add_head(&n.link, &b->list); spin_unlock(&b->lock); @@ -161,9 +158,6 @@ EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait); static void apf_task_wake_one(struct kvm_task_sleep_node *n) { hlist_del_init(&n->link); - if (!n->mm) - return; - mmdrop(n->mm); if (n->halted) smp_send_reschedule(n->cpu); else if (waitqueue_active(&n->wq)) @@ -207,7 +201,7 @@ again: * async PF was not yet handled. * Add dummy entry for the token. */ - n = kmalloc(sizeof(*n), GFP_ATOMIC); + n = kzalloc(sizeof(*n), GFP_ATOMIC); if (!n) { /* * Allocation failed! Busy wait while other cpu @@ -219,7 +213,6 @@ again: } n->token = token; n->cpu = smp_processor_id(); - n->mm = NULL; init_waitqueue_head(&n->wq); hlist_add_head(&n->link, &b->list); } else diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 3ca42d0e43a..0327e2b3c40 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -147,12 +147,6 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) memset(csig, 0, sizeof(*csig)); - if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || - cpu_has(c, X86_FEATURE_IA64)) { - pr_err("CPU%d not a capable Intel processor\n", cpu_num); - return -1; - } - csig->sig = cpuid_eax(0x00000001); if ((c->x86_model >= 5) || (c->x86 > 6)) { @@ -463,6 +457,14 @@ static struct microcode_ops microcode_intel_ops = { struct microcode_ops * __init init_intel_microcode(void) { + struct cpuinfo_x86 *c = &cpu_data(0); + + if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || + cpu_has(c, X86_FEATURE_IA64)) { + pr_err("Intel CPU family 0x%x not supported\n", c->x86); + return NULL; + } + return µcode_intel_ops; } diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 47acaf31916..585be4bd71a 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -31,14 +31,6 @@ #include <asm/nmi.h> #include <asm/x86_init.h> -#define NMI_MAX_NAMELEN 16 -struct nmiaction { - struct list_head list; - nmi_handler_t handler; - unsigned int flags; - char *name; -}; - struct nmi_desc { spinlock_t lock; struct list_head head; @@ -54,6 +46,14 @@ static struct nmi_desc nmi_desc[NMI_MAX] = .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock), .head = LIST_HEAD_INIT(nmi_desc[1].head), }, + { + .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[2].lock), + .head = LIST_HEAD_INIT(nmi_desc[2].head), + }, + { + .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[3].lock), + .head = LIST_HEAD_INIT(nmi_desc[3].head), + }, }; @@ -107,11 +107,14 @@ static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, return handled; } -static int __setup_nmi(unsigned int type, struct nmiaction *action) +int __register_nmi_handler(unsigned int type, struct nmiaction *action) { struct nmi_desc *desc = nmi_to_desc(type); unsigned long flags; + if (!action->handler) + return -EINVAL; + spin_lock_irqsave(&desc->lock, flags); /* @@ -120,6 +123,8 @@ static int __setup_nmi(unsigned int type, struct nmiaction *action) * to manage expectations */ WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head)); + WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head)); + WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head)); /* * some handlers need to be executed first otherwise a fake @@ -133,8 +138,9 @@ static int __setup_nmi(unsigned int type, struct nmiaction *action) spin_unlock_irqrestore(&desc->lock, flags); return 0; } +EXPORT_SYMBOL(__register_nmi_handler); -static struct nmiaction *__free_nmi(unsigned int type, const char *name) +void unregister_nmi_handler(unsigned int type, const char *name) { struct nmi_desc *desc = nmi_to_desc(type); struct nmiaction *n; @@ -157,61 +163,16 @@ static struct nmiaction *__free_nmi(unsigned int type, const char *name) spin_unlock_irqrestore(&desc->lock, flags); synchronize_rcu(); - return (n); } - -int register_nmi_handler(unsigned int type, nmi_handler_t handler, - unsigned long nmiflags, const char *devname) -{ - struct nmiaction *action; - int retval = -ENOMEM; - - if (!handler) - return -EINVAL; - - action = kzalloc(sizeof(struct nmiaction), GFP_KERNEL); - if (!action) - goto fail_action; - - action->handler = handler; - action->flags = nmiflags; - action->name = kstrndup(devname, NMI_MAX_NAMELEN, GFP_KERNEL); - if (!action->name) - goto fail_action_name; - - retval = __setup_nmi(type, action); - - if (retval) - goto fail_setup_nmi; - - return retval; - -fail_setup_nmi: - kfree(action->name); -fail_action_name: - kfree(action); -fail_action: - - return retval; -} -EXPORT_SYMBOL_GPL(register_nmi_handler); - -void unregister_nmi_handler(unsigned int type, const char *name) -{ - struct nmiaction *a; - - a = __free_nmi(type, name); - if (a) { - kfree(a->name); - kfree(a); - } -} - EXPORT_SYMBOL_GPL(unregister_nmi_handler); static notrace __kprobes void pci_serr_error(unsigned char reason, struct pt_regs *regs) { + /* check to see if anyone registered against these types of errors */ + if (nmi_handle(NMI_SERR, regs, false)) + return; + pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", reason, smp_processor_id()); @@ -241,6 +202,10 @@ io_check_error(unsigned char reason, struct pt_regs *regs) { unsigned long i; + /* check to see if anyone registered against these types of errors */ + if (nmi_handle(NMI_IO_CHECK, regs, false)) + return; + pr_emerg( "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", reason, smp_processor_id()); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 733ca39f367..43d8b48b23e 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -423,6 +423,7 @@ void set_personality_ia32(bool x32) current_thread_info()->status |= TS_COMPAT; } } +EXPORT_SYMBOL_GPL(set_personality_ia32); unsigned long get_wchan(struct task_struct *p) { diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 71f4727da37..5a98aa27218 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -185,10 +185,22 @@ void __init setup_per_cpu_areas(void) #endif rc = -EINVAL; if (pcpu_chosen_fc != PCPU_FC_PAGE) { - const size_t atom_size = cpu_has_pse ? PMD_SIZE : PAGE_SIZE; const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE - PERCPU_FIRST_CHUNK_RESERVE; + size_t atom_size; + /* + * On 64bit, use PMD_SIZE for atom_size so that embedded + * percpu areas are aligned to PMD. This, in the future, + * can also allow using PMD mappings in vmalloc area. Use + * PAGE_SIZE on 32bit as vmalloc space is highly contended + * and large vmalloc area allocs can easily fail. + */ +#ifdef CONFIG_X86_64 + atom_size = PMD_SIZE; +#else + atom_size = PAGE_SIZE; +#endif rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, dyn_size, atom_size, pcpu_cpu_distance, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 91a5e989abc..185a2b823a2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6581,6 +6581,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, kvm_inject_page_fault(vcpu, &fault); } vcpu->arch.apf.halted = false; + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; } bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index d6ae30bbd7b..2e4e4b02c37 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -44,13 +44,6 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) } EXPORT_SYMBOL_GPL(copy_from_user_nmi); -static inline unsigned long count_bytes(unsigned long mask) -{ - mask = (mask - 1) & ~mask; - mask >>= 7; - return count_masked_bytes(mask); -} - /* * Do a strncpy, return length of string without final '\0'. * 'count' is the user-supplied count (return 'count' if we @@ -69,16 +62,19 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src, long max = count; while (max >= sizeof(unsigned long)) { - unsigned long c; + unsigned long c, mask; /* Fall back to byte-at-a-time if we get a page fault */ if (unlikely(__get_user(c,(unsigned long __user *)(src+res)))) break; - /* This can write a few bytes past the NUL character, but that's ok */ + mask = has_zero(c); + if (mask) { + mask = (mask - 1) & ~mask; + mask >>= 7; + *(unsigned long *)(dst+res) = c & mask; + return res + count_masked_bytes(mask); + } *(unsigned long *)(dst+res) = c; - c = has_zero(c); - if (c) - return res + count_bytes(c); res += sizeof(unsigned long); max -= sizeof(unsigned long); } diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index ed2835e148b..fc09c2754e0 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -9,11 +9,11 @@ struct pci_root_info { struct acpi_device *bridge; - char *name; + char name[16]; unsigned int res_num; struct resource *res; - struct list_head *resources; int busnum; + struct pci_sysdata sd; }; static bool pci_use_crs = true; @@ -245,13 +245,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data) return AE_OK; } -static bool resource_contains(struct resource *res, resource_size_t point) -{ - if (res->start <= point && point <= res->end) - return true; - return false; -} - static void coalesce_windows(struct pci_root_info *info, unsigned long type) { int i, j; @@ -272,10 +265,7 @@ static void coalesce_windows(struct pci_root_info *info, unsigned long type) * our resources no longer match the ACPI _CRS, but * the kernel resource tree doesn't allow overlaps. */ - if (resource_contains(res1, res2->start) || - resource_contains(res1, res2->end) || - resource_contains(res2, res1->start) || - resource_contains(res2, res1->end)) { + if (resource_overlaps(res1, res2)) { res1->start = min(res1->start, res2->start); res1->end = max(res1->end, res2->end); dev_info(&info->bridge->dev, @@ -287,7 +277,8 @@ static void coalesce_windows(struct pci_root_info *info, unsigned long type) } } -static void add_resources(struct pci_root_info *info) +static void add_resources(struct pci_root_info *info, + struct list_head *resources) { int i; struct resource *res, *root, *conflict; @@ -311,53 +302,74 @@ static void add_resources(struct pci_root_info *info) "ignoring host bridge window %pR (conflicts with %s %pR)\n", res, conflict->name, conflict); else - pci_add_resource(info->resources, res); + pci_add_resource(resources, res); } } +static void free_pci_root_info_res(struct pci_root_info *info) +{ + kfree(info->res); + info->res = NULL; + info->res_num = 0; +} + +static void __release_pci_root_info(struct pci_root_info *info) +{ + int i; + struct resource *res; + + for (i = 0; i < info->res_num; i++) { + res = &info->res[i]; + + if (!res->parent) + continue; + + if (!(res->flags & (IORESOURCE_MEM | IORESOURCE_IO))) + continue; + + release_resource(res); + } + + free_pci_root_info_res(info); + + kfree(info); +} +static void release_pci_root_info(struct pci_host_bridge *bridge) +{ + struct pci_root_info *info = bridge->release_data; + + __release_pci_root_info(info); +} + static void -get_current_resources(struct acpi_device *device, int busnum, - int domain, struct list_head *resources) +probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device, + int busnum, int domain) { - struct pci_root_info info; size_t size; - info.bridge = device; - info.res_num = 0; - info.resources = resources; + info->bridge = device; + info->res_num = 0; acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource, - &info); - if (!info.res_num) + info); + if (!info->res_num) return; - size = sizeof(*info.res) * info.res_num; - info.res = kmalloc(size, GFP_KERNEL); - if (!info.res) + size = sizeof(*info->res) * info->res_num; + info->res_num = 0; + info->res = kmalloc(size, GFP_KERNEL); + if (!info->res) return; - info.name = kasprintf(GFP_KERNEL, "PCI Bus %04x:%02x", domain, busnum); - if (!info.name) - goto name_alloc_fail; + sprintf(info->name, "PCI Bus %04x:%02x", domain, busnum); - info.res_num = 0; acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource, - &info); - - if (pci_use_crs) { - add_resources(&info); - - return; - } - - kfree(info.name); - -name_alloc_fail: - kfree(info.res); + info); } struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) { struct acpi_device *device = root->device; + struct pci_root_info *info = NULL; int domain = root->segment; int busnum = root->secondary.start; LIST_HEAD(resources); @@ -389,17 +401,14 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) if (node != -1 && !node_online(node)) node = -1; - /* Allocate per-root-bus (not per bus) arch-specific data. - * TODO: leak; this memory is never freed. - * It's arguable whether it's worth the trouble to care. - */ - sd = kzalloc(sizeof(*sd), GFP_KERNEL); - if (!sd) { + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { printk(KERN_WARNING "pci_bus %04x:%02x: " "ignored (out of memory)\n", domain, busnum); return NULL; } + sd = &info->sd; sd->domain = domain; sd->node = node; /* @@ -413,22 +422,32 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) * be replaced by sd. */ memcpy(bus->sysdata, sd, sizeof(*sd)); - kfree(sd); + kfree(info); } else { - get_current_resources(device, busnum, domain, &resources); + probe_pci_root_info(info, device, busnum, domain); /* * _CRS with no apertures is normal, so only fall back to * defaults or native bridge info if we're ignoring _CRS. */ - if (!pci_use_crs) + if (pci_use_crs) + add_resources(info, &resources); + else { + free_pci_root_info_res(info); x86_pci_root_bus_resources(busnum, &resources); + } + bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, sd, &resources); - if (bus) + if (bus) { bus->subordinate = pci_scan_child_bus(bus); - else + pci_set_host_bridge_release( + to_pci_host_bridge(bus->bridge), + release_pci_root_info, info); + } else { pci_free_resource_list(&resources); + __release_pci_root_info(info); + } } /* After the PCI-E bus has been walked and all devices discovered, @@ -445,9 +464,6 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) } } - if (!bus) - kfree(sd); - if (bus && node != -1) { #ifdef CONFIG_ACPI_NUMA if (pxm >= 0) diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 0567df3890e..5aed49bff05 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -32,6 +32,27 @@ static struct pci_hostbridge_probe pci_probes[] __initdata = { #define RANGE_NUM 16 +static struct pci_root_info __init *find_pci_root_info(int node, int link) +{ + struct pci_root_info *info; + + /* find the position */ + list_for_each_entry(info, &pci_root_infos, list) + if (info->node == node && info->link == link) + return info; + + return NULL; +} + +static void __init set_mp_bus_range_to_node(int min_bus, int max_bus, int node) +{ +#ifdef CONFIG_NUMA + int j; + + for (j = min_bus; j <= max_bus; j++) + set_mp_bus_to_node(j, node); +#endif +} /** * early_fill_mp_bus_to_node() * called before pcibios_scan_root and pci_scan_bus @@ -41,7 +62,6 @@ static struct pci_hostbridge_probe pci_probes[] __initdata = { static int __init early_fill_mp_bus_info(void) { int i; - int j; unsigned bus; unsigned slot; int node; @@ -50,7 +70,6 @@ static int __init early_fill_mp_bus_info(void) int def_link; struct pci_root_info *info; u32 reg; - struct resource *res; u64 start; u64 end; struct range range[RANGE_NUM]; @@ -86,7 +105,6 @@ static int __init early_fill_mp_bus_info(void) if (!found) return 0; - pci_root_num = 0; for (i = 0; i < 4; i++) { int min_bus; int max_bus; @@ -99,19 +117,11 @@ static int __init early_fill_mp_bus_info(void) min_bus = (reg >> 16) & 0xff; max_bus = (reg >> 24) & 0xff; node = (reg >> 4) & 0x07; -#ifdef CONFIG_NUMA - for (j = min_bus; j <= max_bus; j++) - set_mp_bus_to_node(j, node); -#endif + set_mp_bus_range_to_node(min_bus, max_bus, node); link = (reg >> 8) & 0x03; - info = &pci_root_info[pci_root_num]; - info->bus_min = min_bus; - info->bus_max = max_bus; - info->node = node; - info->link = link; + info = alloc_pci_root_info(min_bus, max_bus, node, link); sprintf(info->name, "PCI Bus #%02x", min_bus); - pci_root_num++; } /* get the default node and link for left over res */ @@ -134,16 +144,10 @@ static int __init early_fill_mp_bus_info(void) link = (reg >> 4) & 0x03; end = (reg & 0xfff000) | 0xfff; - /* find the position */ - for (j = 0; j < pci_root_num; j++) { - info = &pci_root_info[j]; - if (info->node == node && info->link == link) - break; - } - if (j == pci_root_num) + info = find_pci_root_info(node, link); + if (!info) continue; /* not found */ - info = &pci_root_info[j]; printk(KERN_DEBUG "node %d link %d: io port [%llx, %llx]\n", node, link, start, end); @@ -155,13 +159,8 @@ static int __init early_fill_mp_bus_info(void) } /* add left over io port range to def node/link, [0, 0xffff] */ /* find the position */ - for (j = 0; j < pci_root_num; j++) { - info = &pci_root_info[j]; - if (info->node == def_node && info->link == def_link) - break; - } - if (j < pci_root_num) { - info = &pci_root_info[j]; + info = find_pci_root_info(def_node, def_link); + if (info) { for (i = 0; i < RANGE_NUM; i++) { if (!range[i].end) continue; @@ -214,16 +213,10 @@ static int __init early_fill_mp_bus_info(void) end <<= 8; end |= 0xffff; - /* find the position */ - for (j = 0; j < pci_root_num; j++) { - info = &pci_root_info[j]; - if (info->node == node && info->link == link) - break; - } - if (j == pci_root_num) - continue; /* not found */ + info = find_pci_root_info(node, link); - info = &pci_root_info[j]; + if (!info) + continue; printk(KERN_DEBUG "node %d link %d: mmio [%llx, %llx]", node, link, start, end); @@ -291,14 +284,8 @@ static int __init early_fill_mp_bus_info(void) * add left over mmio range to def node/link ? * that is tricky, just record range in from start_min to 4G */ - for (j = 0; j < pci_root_num; j++) { - info = &pci_root_info[j]; - if (info->node == def_node && info->link == def_link) - break; - } - if (j < pci_root_num) { - info = &pci_root_info[j]; - + info = find_pci_root_info(def_node, def_link); + if (info) { for (i = 0; i < RANGE_NUM; i++) { if (!range[i].end) continue; @@ -309,20 +296,16 @@ static int __init early_fill_mp_bus_info(void) } } - for (i = 0; i < pci_root_num; i++) { - int res_num; + list_for_each_entry(info, &pci_root_infos, list) { int busnum; + struct pci_root_res *root_res; - info = &pci_root_info[i]; - res_num = info->res_num; busnum = info->bus_min; printk(KERN_DEBUG "bus: [%02x, %02x] on node %x link %x\n", info->bus_min, info->bus_max, info->node, info->link); - for (j = 0; j < res_num; j++) { - res = &info->res[j]; - printk(KERN_DEBUG "bus: %02x index %x %pR\n", - busnum, j, res); - } + list_for_each_entry(root_res, &info->resources, list) + printk(KERN_DEBUG "bus: %02x %pR\n", + busnum, &root_res->res); } return 0; diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c index f3a7c569a40..614392ced7d 100644 --- a/arch/x86/pci/broadcom_bus.c +++ b/arch/x86/pci/broadcom_bus.c @@ -22,19 +22,15 @@ static void __init cnb20le_res(u8 bus, u8 slot, u8 func) { struct pci_root_info *info; + struct pci_root_res *root_res; struct resource res; u16 word1, word2; u8 fbus, lbus; - int i; - - info = &pci_root_info[pci_root_num]; - pci_root_num++; /* read the PCI bus numbers */ fbus = read_pci_config_byte(bus, slot, func, 0x44); lbus = read_pci_config_byte(bus, slot, func, 0x45); - info->bus_min = fbus; - info->bus_max = lbus; + info = alloc_pci_root_info(fbus, lbus, 0, 0); /* * Add the legacy IDE ports on bus 0 @@ -86,8 +82,8 @@ static void __init cnb20le_res(u8 bus, u8 slot, u8 func) res.flags = IORESOURCE_BUS; printk(KERN_INFO "CNB20LE PCI Host Bridge (domain 0000 %pR)\n", &res); - for (i = 0; i < info->res_num; i++) - printk(KERN_INFO "host bridge window %pR\n", &info->res[i]); + list_for_each_entry(root_res, &info->resources, list) + printk(KERN_INFO "host bridge window %pR\n", &root_res->res); } static int __init broadcom_postcore_init(void) diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c index fd3f65510e9..306579f7d0f 100644 --- a/arch/x86/pci/bus_numa.c +++ b/arch/x86/pci/bus_numa.c @@ -4,35 +4,38 @@ #include "bus_numa.h" -int pci_root_num; -struct pci_root_info pci_root_info[PCI_ROOT_NR]; +LIST_HEAD(pci_root_infos); -void x86_pci_root_bus_resources(int bus, struct list_head *resources) +static struct pci_root_info *x86_find_pci_root_info(int bus) { - int i; - int j; struct pci_root_info *info; - if (!pci_root_num) - goto default_resources; + if (list_empty(&pci_root_infos)) + return NULL; - for (i = 0; i < pci_root_num; i++) { - if (pci_root_info[i].bus_min == bus) - break; - } + list_for_each_entry(info, &pci_root_infos, list) + if (info->bus_min == bus) + return info; + + return NULL; +} - if (i == pci_root_num) +void x86_pci_root_bus_resources(int bus, struct list_head *resources) +{ + struct pci_root_info *info = x86_find_pci_root_info(bus); + struct pci_root_res *root_res; + + if (!info) goto default_resources; printk(KERN_DEBUG "PCI: root bus %02x: hardware-probed resources\n", bus); - info = &pci_root_info[i]; - for (j = 0; j < info->res_num; j++) { + list_for_each_entry(root_res, &info->resources, list) { struct resource *res; struct resource *root; - res = &info->res[j]; + res = &root_res->res; pci_add_resource(resources, res); if (res->flags & IORESOURCE_IO) root = &ioport_resource; @@ -53,11 +56,32 @@ default_resources: pci_add_resource(resources, &iomem_resource); } +struct pci_root_info __init *alloc_pci_root_info(int bus_min, int bus_max, + int node, int link) +{ + struct pci_root_info *info; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + + if (!info) + return info; + + INIT_LIST_HEAD(&info->resources); + info->bus_min = bus_min; + info->bus_max = bus_max; + info->node = node; + info->link = link; + + list_add_tail(&info->list, &pci_root_infos); + + return info; +} + void __devinit update_res(struct pci_root_info *info, resource_size_t start, resource_size_t end, unsigned long flags, int merge) { - int i; struct resource *res; + struct pci_root_res *root_res; if (start > end) return; @@ -69,11 +93,11 @@ void __devinit update_res(struct pci_root_info *info, resource_size_t start, goto addit; /* try to merge it with old one */ - for (i = 0; i < info->res_num; i++) { + list_for_each_entry(root_res, &info->resources, list) { resource_size_t final_start, final_end; resource_size_t common_start, common_end; - res = &info->res[i]; + res = &root_res->res; if (res->flags != flags) continue; @@ -93,14 +117,15 @@ void __devinit update_res(struct pci_root_info *info, resource_size_t start, addit: /* need to add that */ - if (info->res_num >= RES_NUM) + root_res = kzalloc(sizeof(*root_res), GFP_KERNEL); + if (!root_res) return; - res = &info->res[info->res_num]; + res = &root_res->res; res->name = info->name; res->flags = flags; res->start = start; res->end = end; - res->child = NULL; - info->res_num++; + + list_add_tail(&root_res->list, &info->resources); } diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h index 804a4b40c31..226a466b2b2 100644 --- a/arch/x86/pci/bus_numa.h +++ b/arch/x86/pci/bus_numa.h @@ -4,22 +4,24 @@ * sub bus (transparent) will use entres from 3 to store extra from * root, so need to make sure we have enough slot there. */ -#define RES_NUM 16 +struct pci_root_res { + struct list_head list; + struct resource res; +}; + struct pci_root_info { + struct list_head list; char name[12]; - unsigned int res_num; - struct resource res[RES_NUM]; + struct list_head resources; int bus_min; int bus_max; int node; int link; }; -/* 4 at this time, it may become to 32 */ -#define PCI_ROOT_NR 4 -extern int pci_root_num; -extern struct pci_root_info pci_root_info[PCI_ROOT_NR]; - +extern struct list_head pci_root_infos; +struct pci_root_info *alloc_pci_root_info(int bus_min, int bus_max, + int node, int link); extern void update_res(struct pci_root_info *info, resource_size_t start, resource_size_t end, unsigned long flags, int merge); #endif diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 323481e06ef..0ad990a20d4 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -11,6 +11,7 @@ #include <linux/dmi.h> #include <linux/slab.h> +#include <asm-generic/pci-bridge.h> #include <asm/acpi.h> #include <asm/segment.h> #include <asm/io.h> @@ -229,6 +230,14 @@ static int __devinit assign_all_busses(const struct dmi_system_id *d) } #endif +static int __devinit set_scan_all(const struct dmi_system_id *d) +{ + printk(KERN_INFO "PCI: %s detected, enabling pci=pcie_scan_all\n", + d->ident); + pci_add_flags(PCI_SCAN_ALL_PCIE_DEVS); + return 0; +} + static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = { #ifdef __i386__ /* @@ -420,6 +429,13 @@ static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL585 G2"), }, }, + { + .callback = set_scan_all, + .ident = "Stratus/NEC ftServer", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ftServer"), + }, + }, {} }; @@ -430,9 +446,7 @@ void __init dmi_check_pciprobe(void) struct pci_bus * __devinit pcibios_scan_root(int busnum) { - LIST_HEAD(resources); struct pci_bus *bus = NULL; - struct pci_sysdata *sd; while ((bus = pci_find_next_bus(bus)) != NULL) { if (bus->number == busnum) { @@ -441,28 +455,10 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum) } } - /* Allocate per-root-bus (not per bus) arch-specific data. - * TODO: leak; this memory is never freed. - * It's arguable whether it's worth the trouble to care. - */ - sd = kzalloc(sizeof(*sd), GFP_KERNEL); - if (!sd) { - printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum); - return NULL; - } - - sd->node = get_mp_bus_to_node(busnum); - - printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum); - x86_pci_root_bus_resources(busnum, &resources); - bus = pci_scan_root_bus(NULL, busnum, &pci_root_ops, sd, &resources); - if (!bus) { - pci_free_resource_list(&resources); - kfree(sd); - } - - return bus; + return pci_scan_bus_on_node(busnum, &pci_root_ops, + get_mp_bus_to_node(busnum)); } + void __init pcibios_set_cache_line_size(void) { struct cpuinfo_x86 *c = &boot_cpu_data; @@ -656,6 +652,7 @@ struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops, } sd->node = node; x86_pci_root_bus_resources(busno, &resources); + printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busno); bus = pci_scan_root_bus(NULL, busno, ops, sd, &resources); if (!bus) { pci_free_resource_list(&resources); diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 831971e731f..dd8ca6f7223 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -57,7 +57,7 @@ static struct pcibios_fwaddrmap *pcibios_fwaddrmap_lookup(struct pci_dev *dev) { struct pcibios_fwaddrmap *map; - WARN_ON(!spin_is_locked(&pcibios_fwaddrmap_lock)); + WARN_ON_SMP(!spin_is_locked(&pcibios_fwaddrmap_lock)); list_for_each_entry(map, &pcibios_fwaddrmappings, list) if (map->dev == dev) diff --git a/arch/x86/platform/geode/net5501.c b/arch/x86/platform/geode/net5501.c index 66d377e334f..646e3b5b4bb 100644 --- a/arch/x86/platform/geode/net5501.c +++ b/arch/x86/platform/geode/net5501.c @@ -63,7 +63,7 @@ static struct gpio_led net5501_leds[] = { .name = "net5501:1", .gpio = 6, .default_trigger = "default-on", - .active_low = 1, + .active_low = 0, }, }; diff --git a/arch/x86/tools/.gitignore b/arch/x86/tools/.gitignore new file mode 100644 index 00000000000..be0ed065249 --- /dev/null +++ b/arch/x86/tools/.gitignore @@ -0,0 +1 @@ +relocs diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index d511aa97533..733057b435b 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile @@ -36,3 +36,7 @@ HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x $(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c $(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c + +HOST_EXTRACFLAGS += -I$(srctree)/tools/include +hostprogs-y += relocs +relocs: $(obj)/relocs diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/tools/relocs.c index d3c0b027766..b43cfcd9bf4 100644 --- a/arch/x86/boot/compressed/relocs.c +++ b/arch/x86/tools/relocs.c @@ -18,6 +18,8 @@ static void die(char *fmt, ...); static Elf32_Ehdr ehdr; static unsigned long reloc_count, reloc_idx; static unsigned long *relocs; +static unsigned long reloc16_count, reloc16_idx; +static unsigned long *relocs16; struct section { Elf32_Shdr shdr; @@ -28,52 +30,86 @@ struct section { }; static struct section *secs; +enum symtype { + S_ABS, + S_REL, + S_SEG, + S_LIN, + S_NSYMTYPES +}; + +static const char * const sym_regex_kernel[S_NSYMTYPES] = { /* * Following symbols have been audited. There values are constant and do * not change if bzImage is loaded at a different physical address than * the address for which it has been compiled. Don't warn user about * absolute relocations present w.r.t these symbols. */ -static const char abs_sym_regex[] = + [S_ABS] = "^(xen_irq_disable_direct_reloc$|" "xen_save_fl_direct_reloc$|" "VDSO|" - "__crc_)"; -static regex_t abs_sym_regex_c; -static int is_abs_reloc(const char *sym_name) -{ - return !regexec(&abs_sym_regex_c, sym_name, 0, NULL, 0); -} + "__crc_)", /* * These symbols are known to be relative, even if the linker marks them * as absolute (typically defined outside any section in the linker script.) */ -static const char rel_sym_regex[] = - "^_end$"; -static regex_t rel_sym_regex_c; -static int is_rel_reloc(const char *sym_name) + [S_REL] = + "^(__init_(begin|end)|" + "__x86_cpu_dev_(start|end)|" + "(__parainstructions|__alt_instructions)(|_end)|" + "(__iommu_table|__apicdrivers|__smp_locks)(|_end)|" + "_end)$" +}; + + +static const char * const sym_regex_realmode[S_NSYMTYPES] = { +/* + * These are 16-bit segment symbols when compiling 16-bit code. + */ + [S_SEG] = + "^real_mode_seg$", + +/* + * These are offsets belonging to segments, as opposed to linear addresses, + * when compiling 16-bit code. + */ + [S_LIN] = + "^pa_", +}; + +static const char * const *sym_regex; + +static regex_t sym_regex_c[S_NSYMTYPES]; +static int is_reloc(enum symtype type, const char *sym_name) { - return !regexec(&rel_sym_regex_c, sym_name, 0, NULL, 0); + return sym_regex[type] && + !regexec(&sym_regex_c[type], sym_name, 0, NULL, 0); } -static void regex_init(void) +static void regex_init(int use_real_mode) { char errbuf[128]; int err; - - err = regcomp(&abs_sym_regex_c, abs_sym_regex, - REG_EXTENDED|REG_NOSUB); - if (err) { - regerror(err, &abs_sym_regex_c, errbuf, sizeof errbuf); - die("%s", errbuf); - } + int i; + + if (use_real_mode) + sym_regex = sym_regex_realmode; + else + sym_regex = sym_regex_kernel; - err = regcomp(&rel_sym_regex_c, rel_sym_regex, - REG_EXTENDED|REG_NOSUB); - if (err) { - regerror(err, &rel_sym_regex_c, errbuf, sizeof errbuf); - die("%s", errbuf); + for (i = 0; i < S_NSYMTYPES; i++) { + if (!sym_regex[i]) + continue; + + err = regcomp(&sym_regex_c[i], sym_regex[i], + REG_EXTENDED|REG_NOSUB); + + if (err) { + regerror(err, &sym_regex_c[i], errbuf, sizeof errbuf); + die("%s", errbuf); + } } } @@ -154,6 +190,10 @@ static const char *rel_type(unsigned type) REL_TYPE(R_386_RELATIVE), REL_TYPE(R_386_GOTOFF), REL_TYPE(R_386_GOTPC), + REL_TYPE(R_386_8), + REL_TYPE(R_386_PC8), + REL_TYPE(R_386_16), + REL_TYPE(R_386_PC16), #undef REL_TYPE }; const char *name = "unknown type rel type name"; @@ -189,7 +229,7 @@ static const char *sym_name(const char *sym_strtab, Elf32_Sym *sym) name = sym_strtab + sym->st_name; } else { - name = sec_name(secs[sym->st_shndx].shdr.sh_name); + name = sec_name(sym->st_shndx); } return name; } @@ -403,13 +443,11 @@ static void print_absolute_symbols(void) for (i = 0; i < ehdr.e_shnum; i++) { struct section *sec = &secs[i]; char *sym_strtab; - Elf32_Sym *sh_symtab; int j; if (sec->shdr.sh_type != SHT_SYMTAB) { continue; } - sh_symtab = sec->symtab; sym_strtab = sec->link->strtab; for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Sym); j++) { Elf32_Sym *sym; @@ -474,7 +512,7 @@ static void print_absolute_relocs(void) * Before warning check if this absolute symbol * relocation is harmless. */ - if (is_abs_reloc(name) || is_rel_reloc(name)) + if (is_reloc(S_ABS, name) || is_reloc(S_REL, name)) continue; if (!printed) { @@ -498,7 +536,8 @@ static void print_absolute_relocs(void) printf("\n"); } -static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym)) +static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym), + int use_real_mode) { int i; /* Walk through the relocations */ @@ -523,30 +562,67 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym)) Elf32_Rel *rel; Elf32_Sym *sym; unsigned r_type; + const char *symname; + int shn_abs; + rel = &sec->reltab[j]; sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; r_type = ELF32_R_TYPE(rel->r_info); - /* Don't visit relocations to absolute symbols */ - if (sym->st_shndx == SHN_ABS && - !is_rel_reloc(sym_name(sym_strtab, sym))) { - continue; - } + + shn_abs = sym->st_shndx == SHN_ABS; + switch (r_type) { case R_386_NONE: case R_386_PC32: + case R_386_PC16: + case R_386_PC8: /* * NONE can be ignored and and PC relative * relocations don't need to be adjusted. */ break; + + case R_386_16: + symname = sym_name(sym_strtab, sym); + if (!use_real_mode) + goto bad; + if (shn_abs) { + if (is_reloc(S_ABS, symname)) + break; + else if (!is_reloc(S_SEG, symname)) + goto bad; + } else { + if (is_reloc(S_LIN, symname)) + goto bad; + else + break; + } + visit(rel, sym); + break; + case R_386_32: - /* Visit relocations that need to be adjusted */ + symname = sym_name(sym_strtab, sym); + if (shn_abs) { + if (is_reloc(S_ABS, symname)) + break; + else if (!is_reloc(S_REL, symname)) + goto bad; + } else { + if (use_real_mode && + !is_reloc(S_LIN, symname)) + break; + } visit(rel, sym); break; default: die("Unsupported relocation type: %s (%d)\n", rel_type(r_type), r_type); break; + bad: + symname = sym_name(sym_strtab, sym); + die("Invalid %s %s relocation: %s\n", + shn_abs ? "absolute" : "relative", + rel_type(r_type), symname); } } } @@ -554,13 +630,19 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym)) static void count_reloc(Elf32_Rel *rel, Elf32_Sym *sym) { - reloc_count += 1; + if (ELF32_R_TYPE(rel->r_info) == R_386_16) + reloc16_count++; + else + reloc_count++; } static void collect_reloc(Elf32_Rel *rel, Elf32_Sym *sym) { /* Remember the address that needs to be adjusted. */ - relocs[reloc_idx++] = rel->r_offset; + if (ELF32_R_TYPE(rel->r_info) == R_386_16) + relocs16[reloc16_idx++] = rel->r_offset; + else + relocs[reloc_idx++] = rel->r_offset; } static int cmp_relocs(const void *va, const void *vb) @@ -570,23 +652,41 @@ static int cmp_relocs(const void *va, const void *vb) return (*a == *b)? 0 : (*a > *b)? 1 : -1; } -static void emit_relocs(int as_text) +static int write32(unsigned int v, FILE *f) +{ + unsigned char buf[4]; + + put_unaligned_le32(v, buf); + return fwrite(buf, 1, 4, f) == 4 ? 0 : -1; +} + +static void emit_relocs(int as_text, int use_real_mode) { int i; /* Count how many relocations I have and allocate space for them. */ reloc_count = 0; - walk_relocs(count_reloc); + walk_relocs(count_reloc, use_real_mode); relocs = malloc(reloc_count * sizeof(relocs[0])); if (!relocs) { die("malloc of %d entries for relocs failed\n", reloc_count); } + + relocs16 = malloc(reloc16_count * sizeof(relocs[0])); + if (!relocs16) { + die("malloc of %d entries for relocs16 failed\n", + reloc16_count); + } /* Collect up the relocations */ reloc_idx = 0; - walk_relocs(collect_reloc); + walk_relocs(collect_reloc, use_real_mode); + + if (reloc16_count && !use_real_mode) + die("Segment relocations found but --realmode not specified\n"); /* Order the relocations for more efficient processing */ qsort(relocs, reloc_count, sizeof(relocs[0]), cmp_relocs); + qsort(relocs16, reloc16_count, sizeof(relocs16[0]), cmp_relocs); /* Print the relocations */ if (as_text) { @@ -595,58 +695,83 @@ static void emit_relocs(int as_text) */ printf(".section \".data.reloc\",\"a\"\n"); printf(".balign 4\n"); - for (i = 0; i < reloc_count; i++) { - printf("\t .long 0x%08lx\n", relocs[i]); + if (use_real_mode) { + printf("\t.long %lu\n", reloc16_count); + for (i = 0; i < reloc16_count; i++) + printf("\t.long 0x%08lx\n", relocs16[i]); + printf("\t.long %lu\n", reloc_count); + for (i = 0; i < reloc_count; i++) { + printf("\t.long 0x%08lx\n", relocs[i]); + } + } else { + /* Print a stop */ + printf("\t.long 0x%08lx\n", (unsigned long)0); + for (i = 0; i < reloc_count; i++) { + printf("\t.long 0x%08lx\n", relocs[i]); + } } + printf("\n"); } else { - unsigned char buf[4]; - /* Print a stop */ - fwrite("\0\0\0\0", 4, 1, stdout); - /* Now print each relocation */ - for (i = 0; i < reloc_count; i++) { - put_unaligned_le32(relocs[i], buf); - fwrite(buf, 4, 1, stdout); + if (use_real_mode) { + write32(reloc16_count, stdout); + for (i = 0; i < reloc16_count; i++) + write32(relocs16[i], stdout); + write32(reloc_count, stdout); + + /* Now print each relocation */ + for (i = 0; i < reloc_count; i++) + write32(relocs[i], stdout); + } else { + /* Print a stop */ + write32(0, stdout); + + /* Now print each relocation */ + for (i = 0; i < reloc_count; i++) { + write32(relocs[i], stdout); + } } } } static void usage(void) { - die("relocs [--abs-syms |--abs-relocs | --text] vmlinux\n"); + die("relocs [--abs-syms|--abs-relocs|--text|--realmode] vmlinux\n"); } int main(int argc, char **argv) { int show_absolute_syms, show_absolute_relocs; - int as_text; + int as_text, use_real_mode; const char *fname; FILE *fp; int i; - regex_init(); - show_absolute_syms = 0; show_absolute_relocs = 0; as_text = 0; + use_real_mode = 0; fname = NULL; for (i = 1; i < argc; i++) { char *arg = argv[i]; if (*arg == '-') { - if (strcmp(argv[1], "--abs-syms") == 0) { + if (strcmp(arg, "--abs-syms") == 0) { show_absolute_syms = 1; continue; } - - if (strcmp(argv[1], "--abs-relocs") == 0) { + if (strcmp(arg, "--abs-relocs") == 0) { show_absolute_relocs = 1; continue; } - else if (strcmp(argv[1], "--text") == 0) { + if (strcmp(arg, "--text") == 0) { as_text = 1; continue; } + if (strcmp(arg, "--realmode") == 0) { + use_real_mode = 1; + continue; + } } else if (!fname) { fname = arg; @@ -657,6 +782,7 @@ int main(int argc, char **argv) if (!fname) { usage(); } + regex_init(use_real_mode); fp = fopen(fname, "r"); if (!fp) { die("Cannot open %s: %s\n", @@ -675,6 +801,6 @@ int main(int argc, char **argv) print_absolute_relocs(); return 0; } - emit_relocs(as_text); + emit_relocs(as_text, use_real_mode); return 0; } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index a8f8844b8d3..95dccce8e97 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -63,6 +63,7 @@ #include <asm/stackprotector.h> #include <asm/hypervisor.h> #include <asm/mwait.h> +#include <asm/pci_x86.h> #ifdef CONFIG_ACPI #include <linux/acpi.h> @@ -809,9 +810,40 @@ static void xen_io_delay(void) } #ifdef CONFIG_X86_LOCAL_APIC +static unsigned long xen_set_apic_id(unsigned int x) +{ + WARN_ON(1); + return x; +} +static unsigned int xen_get_apic_id(unsigned long x) +{ + return ((x)>>24) & 0xFFu; +} static u32 xen_apic_read(u32 reg) { - return 0; + struct xen_platform_op op = { + .cmd = XENPF_get_cpuinfo, + .interface_version = XENPF_INTERFACE_VERSION, + .u.pcpu_info.xen_cpuid = 0, + }; + int ret = 0; + + /* Shouldn't need this as APIC is turned off for PV, and we only + * get called on the bootup processor. But just in case. */ + if (!xen_initial_domain() || smp_processor_id()) + return 0; + + if (reg == APIC_LVR) + return 0x10; + + if (reg != APIC_ID) + return 0; + + ret = HYPERVISOR_dom0_op(&op); + if (ret) + return 0; + + return op.u.pcpu_info.apic_id << 24; } static void xen_apic_write(u32 reg, u32 val) @@ -849,6 +881,8 @@ static void set_xen_basic_apic_ops(void) apic->icr_write = xen_apic_icr_write; apic->wait_icr_idle = xen_apic_wait_icr_idle; apic->safe_wait_icr_idle = xen_safe_apic_wait_icr_idle; + apic->set_apic_id = xen_set_apic_id; + apic->get_apic_id = xen_get_apic_id; } #endif @@ -1365,8 +1399,10 @@ asmlinkage void __init xen_start_kernel(void) /* Make sure ACS will be enabled */ pci_request_acs(); } - - +#ifdef CONFIG_PCI + /* PCI BIOS service won't work from a PV guest. */ + pci_probe &= ~PCI_PROBE_BIOS; +#endif xen_raw_console_write("about to get started...\n"); xen_setup_runstate_info(0); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index b8e279479a6..69f5857660a 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -353,8 +353,13 @@ static pteval_t pte_mfn_to_pfn(pteval_t val) { if (val & _PAGE_PRESENT) { unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; + unsigned long pfn = mfn_to_pfn(mfn); + pteval_t flags = val & PTE_FLAGS_MASK; - val = ((pteval_t)mfn_to_pfn(mfn) << PAGE_SHIFT) | flags; + if (unlikely(pfn == ~0)) + val = flags & ~_PAGE_PRESENT; + else + val = ((pteval_t)pfn << PAGE_SHIFT) | flags; } return val; |