diff options
Diffstat (limited to 'arch/x86_64')
42 files changed, 744 insertions, 542 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 010d2265f1c..d4275537b25 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -96,6 +96,19 @@ config AUDIT_ARCH bool default y +config GENERIC_BUG + bool + default y + depends on BUG + +config ARCH_HAS_ILOG2_U32 + bool + default n + +config ARCH_HAS_ILOG2_U64 + bool + default n + source "init/Kconfig" @@ -122,7 +135,7 @@ endchoice choice prompt "Processor family" - default MK8 + default GENERIC_CPU config MK8 bool "AMD-Opteron/Athlon64" @@ -130,16 +143,31 @@ config MK8 Optimize for AMD Opteron/Athlon64/Hammer/K8 CPUs. config MPSC - bool "Intel EM64T" + bool "Intel P4 / older Netburst based Xeon" help - Optimize for Intel Pentium 4 and Xeon CPUs with Intel - Extended Memory 64 Technology(EM64T). For details see + Optimize for Intel Pentium 4 and older Nocona/Dempsey Xeon CPUs + with Intel Extended Memory 64 Technology(EM64T). For details see <http://www.intel.com/technology/64bitextensions/>. + Note the the latest Xeons (Xeon 51xx and 53xx) are not based on the + Netburst core and shouldn't use this option. You can distingush them + using the cpu family field + in /proc/cpuinfo. Family 15 is a older Xeon, Family 6 a newer one + (this rule only applies to system that support EM64T) + +config MCORE2 + bool "Intel Core2 / newer Xeon" + help + Optimize for Intel Core2 and newer Xeons (51xx) + You can distingush the newer Xeons from the older ones using + the cpu family field in /proc/cpuinfo. 15 is a older Xeon + (use CONFIG_MPSC then), 6 is a newer one. This rule only + applies to CPUs that support EM64T. config GENERIC_CPU bool "Generic-x86-64" help Generic x86-64 CPU. + Run equally well on all x86-64 CPUs. endchoice @@ -149,12 +177,12 @@ endchoice config X86_L1_CACHE_BYTES int default "128" if GENERIC_CPU || MPSC - default "64" if MK8 + default "64" if MK8 || MCORE2 config X86_L1_CACHE_SHIFT int default "7" if GENERIC_CPU || MPSC - default "6" if MK8 + default "6" if MK8 || MCORE2 config X86_INTERNODE_CACHE_BYTES int @@ -344,11 +372,6 @@ config ARCH_DISCONTIGMEM_ENABLE depends on NUMA default y - -config ARCH_DISCONTIGMEM_ENABLE - def_bool y - depends on NUMA - config ARCH_DISCONTIGMEM_DEFAULT def_bool y depends on NUMA @@ -455,6 +478,17 @@ config CALGARY_IOMMU Normally the kernel will make the right choice by itself. If unsure, say Y. +config CALGARY_IOMMU_ENABLED_BY_DEFAULT + bool "Should Calgary be enabled by default?" + default y + depends on CALGARY_IOMMU + help + Should Calgary be enabled by default? if you choose 'y', Calgary + will be used (if it exists). If you choose 'n', Calgary will not be + used even if it exists. If you choose 'n' and would like to use + Calgary anyway, pass 'iommu=calgary' on the kernel command line. + If unsure, say Y. + # need this always selected by IOMMU for the VIA workaround config SWIOTLB bool @@ -550,7 +584,7 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. config CC_STACKPROTECTOR - bool "Enable -fstack-protector buffer overflow detection (EXPRIMENTAL)" + bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" depends on EXPERIMENTAL help This option turns on the -fstack-protector GCC feature. This diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile index 6e38d4daeed..2941a915d4e 100644 --- a/arch/x86_64/Makefile +++ b/arch/x86_64/Makefile @@ -30,6 +30,10 @@ cflags-y := cflags-kernel-y := cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) +# gcc doesn't support -march=core2 yet as of gcc 4.3, but I hope it +# will eventually. Use -mtune=generic as fallback +cflags-$(CONFIG_MCORE2) += \ + $(call cc-option,-march=core2,$(call cc-option,-mtune=generic)) cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) cflags-y += -m64 @@ -41,9 +45,7 @@ cflags-kernel-$(CONFIG_REORDER) += -ffunction-sections # actually it makes the kernel smaller too. cflags-y += -fno-reorder-blocks cflags-y += -Wno-sign-compare -ifneq ($(CONFIG_UNWIND_INFO),y) cflags-y += -fno-asynchronous-unwind-tables -endif ifneq ($(CONFIG_DEBUG_INFO),y) # -fweb shrinks the kernel a bit, but the difference is very small # it also messes up debugging, so don't use it for now. diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig index 0f5d44e86be..69584c29530 100644 --- a/arch/x86_64/defconfig +++ b/arch/x86_64/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.19-rc2-git4 -# Sat Oct 21 03:38:52 2006 +# Linux kernel version: 2.6.20-rc3 +# Fri Jan 5 11:54:41 2007 # CONFIG_X86_64=y CONFIG_64BIT=y @@ -22,6 +22,9 @@ CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_ARCH_POPULATES_NODE_MAP=y CONFIG_DMI=y CONFIG_AUDIT_ARCH=y +CONFIG_GENERIC_BUG=y +# CONFIG_ARCH_HAS_ILOG2_U32 is not set +# CONFIG_ARCH_HAS_ILOG2_U64 is not set CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" # @@ -47,13 +50,14 @@ CONFIG_POSIX_MQUEUE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y # CONFIG_CPUSETS is not set +CONFIG_SYSFS_DEPRECATED=y # CONFIG_RELAY is not set CONFIG_INITRAMFS_SOURCE="" CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_SYSCTL=y # CONFIG_EMBEDDED is not set CONFIG_UID16=y -# CONFIG_SYSCTL_SYSCALL is not set +CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y # CONFIG_KALLSYMS_EXTRA_PASS is not set @@ -87,9 +91,7 @@ CONFIG_STOP_MACHINE=y # Block layer # CONFIG_BLOCK=y -CONFIG_LBD=y # CONFIG_BLK_DEV_IO_TRACE is not set -# CONFIG_LSF is not set # # IO Schedulers @@ -111,10 +113,11 @@ CONFIG_X86_PC=y # CONFIG_X86_VSMP is not set # CONFIG_MK8 is not set # CONFIG_MPSC is not set -CONFIG_GENERIC_CPU=y -CONFIG_X86_L1_CACHE_BYTES=128 -CONFIG_X86_L1_CACHE_SHIFT=7 -CONFIG_X86_INTERNODE_CACHE_BYTES=128 +CONFIG_MCORE2=y +# CONFIG_GENERIC_CPU is not set +CONFIG_X86_L1_CACHE_BYTES=64 +CONFIG_X86_L1_CACHE_SHIFT=6 +CONFIG_X86_INTERNODE_CACHE_BYTES=64 CONFIG_X86_TSC=y CONFIG_X86_GOOD_APIC=y # CONFIG_MICROCODE is not set @@ -170,6 +173,7 @@ CONFIG_SECCOMP=y # CONFIG_CC_STACKPROTECTOR is not set # CONFIG_HZ_100 is not set CONFIG_HZ_250=y +# CONFIG_HZ_300 is not set # CONFIG_HZ_1000 is not set CONFIG_HZ=250 # CONFIG_REORDER is not set @@ -322,6 +326,7 @@ CONFIG_INET_TCP_DIAG=y # CONFIG_TCP_CONG_ADVANCED is not set CONFIG_TCP_CONG_CUBIC=y CONFIG_DEFAULT_TCP_CONG="cubic" +# CONFIG_TCP_MD5SIG is not set CONFIG_IPV6=y # CONFIG_IPV6_PRIVACY is not set # CONFIG_IPV6_ROUTER_PREF is not set @@ -513,6 +518,7 @@ CONFIG_IDEDMA_AUTO=y # # CONFIG_RAID_ATTRS is not set CONFIG_SCSI=y +# CONFIG_SCSI_TGT is not set CONFIG_SCSI_NETLINK=y # CONFIG_SCSI_PROC_FS is not set @@ -533,6 +539,7 @@ CONFIG_CHR_DEV_SG=y # CONFIG_SCSI_MULTI_LUN is not set CONFIG_SCSI_CONSTANTS=y # CONFIG_SCSI_LOGGING is not set +# CONFIG_SCSI_SCAN_ASYNC is not set # # SCSI Transports @@ -586,6 +593,7 @@ CONFIG_MEGARAID_SAS=y # CONFIG_SCSI_DC395x is not set # CONFIG_SCSI_DC390T is not set # CONFIG_SCSI_DEBUG is not set +# CONFIG_SCSI_SRP is not set # # Serial ATA (prod) and Parallel ATA (experimental) drivers @@ -624,6 +632,7 @@ CONFIG_SATA_INTEL_COMBINED=y # CONFIG_PATA_IT821X is not set # CONFIG_PATA_JMICRON is not set # CONFIG_PATA_TRIFLEX is not set +# CONFIG_PATA_MARVELL is not set # CONFIG_PATA_MPIIX is not set # CONFIG_PATA_OLDPIIX is not set # CONFIG_PATA_NETCELL is not set @@ -795,6 +804,7 @@ CONFIG_BNX2=y CONFIG_S2IO=m # CONFIG_S2IO_NAPI is not set # CONFIG_MYRI10GE is not set +# CONFIG_NETXEN_NIC is not set # # Token Ring devices @@ -927,10 +937,6 @@ CONFIG_RTC=y # CONFIG_DTLK is not set # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set - -# -# Ftape, the floppy tape device driver -# CONFIG_AGP=y CONFIG_AGP_AMD64=y CONFIG_AGP_INTEL=y @@ -1050,6 +1056,7 @@ CONFIG_HWMON=y # CONFIG_SENSORS_LM92 is not set # CONFIG_SENSORS_MAX1619 is not set # CONFIG_SENSORS_PC87360 is not set +# CONFIG_SENSORS_PC87427 is not set # CONFIG_SENSORS_SIS5595 is not set # CONFIG_SENSORS_SMSC47M1 is not set # CONFIG_SENSORS_SMSC47M192 is not set @@ -1060,6 +1067,7 @@ CONFIG_SENSORS_SMSC47B397=m # CONFIG_SENSORS_W83781D is not set # CONFIG_SENSORS_W83791D is not set # CONFIG_SENSORS_W83792D is not set +# CONFIG_SENSORS_W83793 is not set # CONFIG_SENSORS_W83L785TS is not set # CONFIG_SENSORS_W83627HF is not set # CONFIG_SENSORS_W83627EHF is not set @@ -1107,10 +1115,7 @@ CONFIG_SOUND=y # Open Sound System # CONFIG_SOUND_PRIME=y -CONFIG_OSS_OBSOLETE_DRIVER=y # CONFIG_SOUND_BT878 is not set -# CONFIG_SOUND_EMU10K1 is not set -# CONFIG_SOUND_FUSION is not set # CONFIG_SOUND_ES1371 is not set CONFIG_SOUND_ICH=y # CONFIG_SOUND_TRIDENT is not set @@ -1120,6 +1125,11 @@ CONFIG_SOUND_ICH=y # CONFIG_SOUND_OSS is not set # +# HID Devices +# +CONFIG_HID=y + +# # USB support # CONFIG_USB_ARCH_HAS_HCD=y @@ -1135,6 +1145,7 @@ CONFIG_USB_DEVICEFS=y # CONFIG_USB_BANDWIDTH is not set # CONFIG_USB_DYNAMIC_MINORS is not set # CONFIG_USB_SUSPEND is not set +# CONFIG_USB_MULTITHREAD_PROBE is not set # CONFIG_USB_OTG is not set # @@ -1182,7 +1193,6 @@ CONFIG_USB_STORAGE=y # USB Input Devices # CONFIG_USB_HID=y -CONFIG_USB_HIDINPUT=y # CONFIG_USB_HIDINPUT_POWERBOOK is not set # CONFIG_HID_FF is not set # CONFIG_USB_HIDDEV is not set @@ -1212,6 +1222,7 @@ CONFIG_USB_HIDINPUT=y # CONFIG_USB_KAWETH is not set # CONFIG_USB_PEGASUS is not set # CONFIG_USB_RTL8150 is not set +# CONFIG_USB_USBNET_MII is not set # CONFIG_USB_USBNET is not set CONFIG_USB_MON=y @@ -1302,6 +1313,11 @@ CONFIG_USB_MON=y # # +# Virtualization +# +# CONFIG_KVM is not set + +# # Firmware Drivers # # CONFIG_EDD is not set @@ -1474,6 +1490,11 @@ CONFIG_NLS_ISO8859_15=y CONFIG_NLS_UTF8=y # +# Distributed Lock Manager +# +# CONFIG_DLM is not set + +# # Instrumentation Support # CONFIG_PROFILING=y @@ -1488,6 +1509,8 @@ CONFIG_TRACE_IRQFLAGS_SUPPORT=y # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_MAGIC_SYSRQ=y CONFIG_UNUSED_SYMBOLS=y +CONFIG_DEBUG_FS=y +# CONFIG_HEADERS_CHECK is not set CONFIG_DEBUG_KERNEL=y CONFIG_LOG_BUF_SHIFT=18 CONFIG_DETECT_SOFTLOCKUP=y @@ -1503,15 +1526,12 @@ CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_DEBUG_SPINLOCK_SLEEP is not set # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set # CONFIG_DEBUG_KOBJECT is not set +CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_INFO is not set -CONFIG_DEBUG_FS=y # CONFIG_DEBUG_VM is not set # CONFIG_DEBUG_LIST is not set # CONFIG_FRAME_POINTER is not set -CONFIG_UNWIND_INFO=y -CONFIG_STACK_UNWIND=y # CONFIG_FORCED_INLINING is not set -# CONFIG_HEADERS_CHECK is not set # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_LKDTM is not set # CONFIG_DEBUG_RODATA is not set @@ -1533,9 +1553,11 @@ CONFIG_DEBUG_STACKOVERFLOW=y # # Library routines # +CONFIG_BITREVERSE=y # CONFIG_CRC_CCITT is not set # CONFIG_CRC16 is not set CONFIG_CRC32=y # CONFIG_LIBCRC32C is not set CONFIG_ZLIB_INFLATE=y CONFIG_PLIST=y +CONFIG_IOMAP_COPY=y diff --git a/arch/x86_64/ia32/ia32_aout.c b/arch/x86_64/ia32/ia32_aout.c index 396d3c10001..fe83edb93c1 100644 --- a/arch/x86_64/ia32/ia32_aout.c +++ b/arch/x86_64/ia32/ia32_aout.c @@ -241,7 +241,7 @@ static u32 __user *create_aout_tables(char __user *p, struct linux_binprm *bprm) get_user(c,p++); } while (c); } - put_user(NULL,argv); + put_user(0, argv); current->mm->arg_end = current->mm->env_start = (unsigned long) p; while (envc-->0) { char c; @@ -250,7 +250,7 @@ static u32 __user *create_aout_tables(char __user *p, struct linux_binprm *bprm) get_user(c,p++); } while (c); } - put_user(NULL,envp); + put_user(0, envp); current->mm->env_end = (unsigned long) p; return sp; } @@ -272,7 +272,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC && N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) || N_TRSIZE(ex) || N_DRSIZE(ex) || - i_size_read(bprm->file->f_dentry->d_inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { + i_size_read(bprm->file->f_path.dentry->d_inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { return -ENOEXEC; } @@ -357,7 +357,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) { printk(KERN_WARNING "fd_offset is not page aligned. Please convert program: %s\n", - bprm->file->f_dentry->d_name.name); + bprm->file->f_path.dentry->d_name.name); error_time = jiffies; } #endif @@ -440,7 +440,7 @@ static int load_aout_library(struct file *file) int retval; struct exec ex; - inode = file->f_dentry->d_inode; + inode = file->f_path.dentry->d_inode; retval = -ENOEXEC; error = kernel_read(file, 0, (char *) &ex, sizeof(ex)); @@ -471,7 +471,7 @@ static int load_aout_library(struct file *file) { printk(KERN_WARNING "N_TXTOFF is not page aligned. Please convert library: %s\n", - file->f_dentry->d_name.name); + file->f_path.dentry->d_name.name); error_time = jiffies; } #endif diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c index 82ef182de6a..543ef4f405e 100644 --- a/arch/x86_64/ia32/ia32_binfmt.c +++ b/arch/x86_64/ia32/ia32_binfmt.c @@ -305,8 +305,6 @@ MODULE_AUTHOR("Eric Youngdale, Andi Kleen"); #undef MODULE_DESCRIPTION #undef MODULE_AUTHOR -#define elf_addr_t __u32 - static void elf32_init(struct pt_regs *); #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 @@ -351,7 +349,7 @@ int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, bprm->loader += stack_base; bprm->exec += stack_base; - mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + mpnt = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!mpnt) return -ENOMEM; diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c index 0e0a266d976..ff499ef2a1b 100644 --- a/arch/x86_64/ia32/ia32_signal.c +++ b/arch/x86_64/ia32/ia32_signal.c @@ -584,6 +584,11 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs->rdx = (unsigned long) &frame->info; regs->rcx = (unsigned long) &frame->uc; + /* Make -mregparm=3 work */ + regs->rax = sig; + regs->rdx = (unsigned long) &frame->info; + regs->rcx = (unsigned long) &frame->uc; + asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c index 3a01329473a..3e5ed20cba4 100644 --- a/arch/x86_64/ia32/syscall32.c +++ b/arch/x86_64/ia32/syscall32.c @@ -49,7 +49,7 @@ int syscall32_setup_pages(struct linux_binprm *bprm, int exstack) struct mm_struct *mm = current->mm; int ret; - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!vma) return -ENOMEM; diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index 4d9d5ed942b..124b2d27b4a 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -25,6 +25,7 @@ #include <linux/kernel_stat.h> #include <linux/sysdev.h> #include <linux/module.h> +#include <linux/ioport.h> #include <asm/atomic.h> #include <asm/smp.h> @@ -45,6 +46,12 @@ int apic_calibrate_pmtmr __initdata; int disable_apic_timer __initdata; +static struct resource *ioapic_resources; +static struct resource lapic_resource = { + .name = "Local APIC", + .flags = IORESOURCE_MEM | IORESOURCE_BUSY, +}; + /* * cpu_mask that denotes the CPUs that needs timer interrupt coming in as * IPIs in place of local APIC timers @@ -133,7 +140,6 @@ void clear_local_APIC(void) apic_write(APIC_LVTERR, APIC_LVT_MASKED); if (maxlvt >= 4) apic_write(APIC_LVTPC, APIC_LVT_MASKED); - v = GET_APIC_VERSION(apic_read(APIC_LVR)); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); } @@ -452,23 +458,30 @@ static struct { static int lapic_suspend(struct sys_device *dev, pm_message_t state) { unsigned long flags; + int maxlvt; if (!apic_pm_state.active) return 0; + maxlvt = get_maxlvt(); + apic_pm_state.apic_id = apic_read(APIC_ID); apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); apic_pm_state.apic_ldr = apic_read(APIC_LDR); apic_pm_state.apic_dfr = apic_read(APIC_DFR); apic_pm_state.apic_spiv = apic_read(APIC_SPIV); apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); - apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); + if (maxlvt >= 4) + apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); - apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); +#ifdef CONFIG_X86_MCE_INTEL + if (maxlvt >= 5) + apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); +#endif local_irq_save(flags); disable_local_APIC(); local_irq_restore(flags); @@ -479,10 +492,13 @@ static int lapic_resume(struct sys_device *dev) { unsigned int l, h; unsigned long flags; + int maxlvt; if (!apic_pm_state.active) return 0; + maxlvt = get_maxlvt(); + local_irq_save(flags); rdmsr(MSR_IA32_APICBASE, l, h); l &= ~MSR_IA32_APICBASE_BASE; @@ -496,8 +512,12 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_SPIV, apic_pm_state.apic_spiv); apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); - apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); - apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); +#ifdef CONFIG_X86_MCE_INTEL + if (maxlvt >= 5) + apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); +#endif + if (maxlvt >= 4) + apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); apic_write(APIC_TMICT, apic_pm_state.apic_tmict); @@ -585,6 +605,64 @@ static int __init detect_init_APIC (void) return 0; } +#ifdef CONFIG_X86_IO_APIC +static struct resource * __init ioapic_setup_resources(void) +{ +#define IOAPIC_RESOURCE_NAME_SIZE 11 + unsigned long n; + struct resource *res; + char *mem; + int i; + + if (nr_ioapics <= 0) + return NULL; + + n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); + n *= nr_ioapics; + + mem = alloc_bootmem(n); + res = (void *)mem; + + if (mem != NULL) { + memset(mem, 0, n); + mem += sizeof(struct resource) * nr_ioapics; + + for (i = 0; i < nr_ioapics; i++) { + res[i].name = mem; + res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; + sprintf(mem, "IOAPIC %u", i); + mem += IOAPIC_RESOURCE_NAME_SIZE; + } + } + + ioapic_resources = res; + + return res; +} + +static int __init ioapic_insert_resources(void) +{ + int i; + struct resource *r = ioapic_resources; + + if (!r) { + printk("IO APIC resources could be not be allocated.\n"); + return -1; + } + + for (i = 0; i < nr_ioapics; i++) { + insert_resource(&iomem_resource, r); + r++; + } + + return 0; +} + +/* Insert the IO APIC resources after PCI initialization has occured to handle + * IO APICS that are mapped in on a BAR in PCI space. */ +late_initcall(ioapic_insert_resources); +#endif + void __init init_apic_mappings(void) { unsigned long apic_phys; @@ -604,6 +682,11 @@ void __init init_apic_mappings(void) apic_mapped = 1; apic_printk(APIC_VERBOSE,"mapped APIC to %16lx (%16lx)\n", APIC_BASE, apic_phys); + /* Put local APIC into the resource map. */ + lapic_resource.start = apic_phys; + lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1; + insert_resource(&iomem_resource, &lapic_resource); + /* * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). @@ -613,7 +696,9 @@ void __init init_apic_mappings(void) { unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; int i; + struct resource *ioapic_res; + ioapic_res = ioapic_setup_resources(); for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { ioapic_phys = mp_ioapics[i].mpc_apicaddr; @@ -625,6 +710,12 @@ void __init init_apic_mappings(void) apic_printk(APIC_VERBOSE,"mapped IOAPIC to %016lx (%016lx)\n", __fix_to_virt(idx), ioapic_phys); idx++; + + if (ioapic_res != NULL) { + ioapic_res->start = ioapic_phys; + ioapic_res->end = ioapic_phys + (4 * 1024) - 1; + ioapic_res++; + } } } } @@ -644,10 +735,9 @@ void __init init_apic_mappings(void) static void __setup_APIC_LVTT(unsigned int clocks) { - unsigned int lvtt_value, tmp_value, ver; + unsigned int lvtt_value, tmp_value; int cpu = smp_processor_id(); - ver = GET_APIC_VERSION(apic_read(APIC_LVR)); lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; if (cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) diff --git a/arch/x86_64/kernel/cpufreq/Kconfig b/arch/x86_64/kernel/cpufreq/Kconfig index 81f1562e539..45a6a1fd14a 100644 --- a/arch/x86_64/kernel/cpufreq/Kconfig +++ b/arch/x86_64/kernel/cpufreq/Kconfig @@ -27,10 +27,13 @@ config X86_POWERNOW_K8_ACPI default y config X86_SPEEDSTEP_CENTRINO - tristate "Intel Enhanced SpeedStep" + tristate "Intel Enhanced SpeedStep (deprecated)" select CPU_FREQ_TABLE depends on ACPI_PROCESSOR help + This is deprecated and this functionality is now merged into + acpi_cpufreq (X86_ACPI_CPUFREQ). Use that driver instead of + speedstep_centrino. This adds the CPUFreq driver for Enhanced SpeedStep enabled mobile CPUs. This means Intel Pentium M (Centrino) CPUs or 64bit enabled Intel Xeons. @@ -46,10 +49,12 @@ config X86_SPEEDSTEP_CENTRINO_ACPI config X86_ACPI_CPUFREQ tristate "ACPI Processor P-States driver" + select CPU_FREQ_TABLE depends on ACPI_PROCESSOR help This driver adds a CPUFreq driver which utilizes the ACPI Processor Performance States. + This driver also supports Intel Enhanced Speedstep. For details, take a look at <file:Documentation/cpu-freq/>. diff --git a/arch/x86_64/kernel/cpufreq/Makefile b/arch/x86_64/kernel/cpufreq/Makefile index d8b59387922..753ce1dd418 100644 --- a/arch/x86_64/kernel/cpufreq/Makefile +++ b/arch/x86_64/kernel/cpufreq/Makefile @@ -5,8 +5,8 @@ SRCDIR := ../../../i386/kernel/cpu/cpufreq obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o -obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o +obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o diff --git a/arch/x86_64/kernel/crash.c b/arch/x86_64/kernel/crash.c index 3525f884af8..95a7a2c1313 100644 --- a/arch/x86_64/kernel/crash.c +++ b/arch/x86_64/kernel/crash.c @@ -28,71 +28,6 @@ /* This keeps a track of which one is crashing cpu. */ static int crashing_cpu; -static u32 *append_elf_note(u32 *buf, char *name, unsigned type, - void *data, size_t data_len) -{ - struct elf_note note; - - note.n_namesz = strlen(name) + 1; - note.n_descsz = data_len; - note.n_type = type; - memcpy(buf, ¬e, sizeof(note)); - buf += (sizeof(note) +3)/4; - memcpy(buf, name, note.n_namesz); - buf += (note.n_namesz + 3)/4; - memcpy(buf, data, note.n_descsz); - buf += (note.n_descsz + 3)/4; - - return buf; -} - -static void final_note(u32 *buf) -{ - struct elf_note note; - - note.n_namesz = 0; - note.n_descsz = 0; - note.n_type = 0; - memcpy(buf, ¬e, sizeof(note)); -} - -static void crash_save_this_cpu(struct pt_regs *regs, int cpu) -{ - struct elf_prstatus prstatus; - u32 *buf; - - if ((cpu < 0) || (cpu >= NR_CPUS)) - return; - - /* Using ELF notes here is opportunistic. - * I need a well defined structure format - * for the data I pass, and I need tags - * on the data to indicate what information I have - * squirrelled away. ELF notes happen to provide - * all of that, no need to invent something new. - */ - - buf = (u32*)per_cpu_ptr(crash_notes, cpu); - - if (!buf) - return; - - memset(&prstatus, 0, sizeof(prstatus)); - prstatus.pr_pid = current->pid; - elf_core_copy_regs(&prstatus.pr_reg, regs); - buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus, - sizeof(prstatus)); - final_note(buf); -} - -static void crash_save_self(struct pt_regs *regs) -{ - int cpu; - - cpu = smp_processor_id(); - crash_save_this_cpu(regs, cpu); -} - #ifdef CONFIG_SMP static atomic_t waiting_for_crash_ipi; @@ -117,7 +52,7 @@ static int crash_nmi_callback(struct notifier_block *self, return NOTIFY_STOP; local_irq_disable(); - crash_save_this_cpu(regs, cpu); + crash_save_cpu(regs, cpu); disable_local_APIC(); atomic_dec(&waiting_for_crash_ipi); /* Assume hlt works */ @@ -196,5 +131,5 @@ void machine_crash_shutdown(struct pt_regs *regs) disable_IO_APIC(); - crash_save_self(regs); + crash_save_cpu(regs, smp_processor_id()); } diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c index 68273bff58c..49802f1bee9 100644 --- a/arch/x86_64/kernel/early-quirks.c +++ b/arch/x86_64/kernel/early-quirks.c @@ -76,6 +76,18 @@ static void ati_bugs(void) } } +static void intel_bugs(void) +{ + u16 device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID); + +#ifdef CONFIG_SMP + if (device == PCI_DEVICE_ID_INTEL_E7320_MCH || + device == PCI_DEVICE_ID_INTEL_E7520_MCH || + device == PCI_DEVICE_ID_INTEL_E7525_MCH) + quirk_intel_irqbalance(); +#endif +} + struct chipset { u16 vendor; void (*f)(void); @@ -85,6 +97,7 @@ static struct chipset early_qrk[] = { { PCI_VENDOR_ID_NVIDIA, nvidia_bugs }, { PCI_VENDOR_ID_VIA, via_bugs }, { PCI_VENDOR_ID_ATI, ati_bugs }, + { PCI_VENDOR_ID_INTEL, intel_bugs}, {} }; diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 7d401b00d82..9f5dac64aa8 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -230,7 +230,6 @@ ENTRY(system_call) CFI_REL_OFFSET rip,RIP-ARGOFFSET GET_THREAD_INFO(%rcx) testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) - CFI_REMEMBER_STATE jnz tracesys cmpq $__NR_syscall_max,%rax ja badsys @@ -241,7 +240,6 @@ ENTRY(system_call) * Syscall return path ending with SYSRET (fast path) * Has incomplete stack frame and undefined top of stack. */ - .globl ret_from_sys_call ret_from_sys_call: movl $_TIF_ALLWORK_MASK,%edi /* edi: flagmask */ @@ -251,8 +249,8 @@ sysret_check: TRACE_IRQS_OFF movl threadinfo_flags(%rcx),%edx andl %edi,%edx - CFI_REMEMBER_STATE jnz sysret_careful + CFI_REMEMBER_STATE /* * sysretq will re-enable interrupts: */ @@ -265,10 +263,10 @@ sysret_check: swapgs sysretq + CFI_RESTORE_STATE /* Handle reschedules */ /* edx: work, edi: workmask */ sysret_careful: - CFI_RESTORE_STATE bt $TIF_NEED_RESCHED,%edx jnc sysret_signal TRACE_IRQS_ON @@ -306,7 +304,6 @@ badsys: /* Do syscall tracing */ tracesys: - CFI_RESTORE_STATE SAVE_REST movq $-ENOSYS,RAX(%rsp) FIXUP_TOP_OF_STACK %rdi @@ -322,32 +319,13 @@ tracesys: call *sys_call_table(,%rax,8) 1: movq %rax,RAX-ARGOFFSET(%rsp) /* Use IRET because user could have changed frame */ - jmp int_ret_from_sys_call - CFI_ENDPROC -END(system_call) /* * Syscall return path ending with IRET. * Has correct top of stack, but partial stack frame. - */ -ENTRY(int_ret_from_sys_call) - CFI_STARTPROC simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,SS+8-ARGOFFSET - /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/ - CFI_REL_OFFSET rsp,RSP-ARGOFFSET - /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ - /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/ - CFI_REL_OFFSET rip,RIP-ARGOFFSET - CFI_REL_OFFSET rdx,RDX-ARGOFFSET - CFI_REL_OFFSET rcx,RCX-ARGOFFSET - CFI_REL_OFFSET rax,RAX-ARGOFFSET - CFI_REL_OFFSET rdi,RDI-ARGOFFSET - CFI_REL_OFFSET rsi,RSI-ARGOFFSET - CFI_REL_OFFSET r8,R8-ARGOFFSET - CFI_REL_OFFSET r9,R9-ARGOFFSET - CFI_REL_OFFSET r10,R10-ARGOFFSET - CFI_REL_OFFSET r11,R11-ARGOFFSET + */ + .globl int_ret_from_sys_call +int_ret_from_sys_call: cli TRACE_IRQS_OFF testl $3,CS-ARGOFFSET(%rsp) @@ -394,8 +372,6 @@ int_very_careful: popq %rdi CFI_ADJUST_CFA_OFFSET -8 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi - cli - TRACE_IRQS_OFF jmp int_restore_rest int_signal: @@ -411,7 +387,7 @@ int_restore_rest: TRACE_IRQS_OFF jmp int_with_check CFI_ENDPROC -END(int_ret_from_sys_call) +END(system_call) /* * Certain special system calls that need to save a complete full stack frame. @@ -1179,36 +1155,3 @@ ENTRY(call_softirq) ret CFI_ENDPROC ENDPROC(call_softirq) - -#ifdef CONFIG_STACK_UNWIND -ENTRY(arch_unwind_init_running) - CFI_STARTPROC - movq %r15, R15(%rdi) - movq %r14, R14(%rdi) - xchgq %rsi, %rdx - movq %r13, R13(%rdi) - movq %r12, R12(%rdi) - xorl %eax, %eax - movq %rbp, RBP(%rdi) - movq %rbx, RBX(%rdi) - movq (%rsp), %rcx - movq %rax, R11(%rdi) - movq %rax, R10(%rdi) - movq %rax, R9(%rdi) - movq %rax, R8(%rdi) - movq %rax, RAX(%rdi) - movq %rax, RCX(%rdi) - movq %rax, RDX(%rdi) - movq %rax, RSI(%rdi) - movq %rax, RDI(%rdi) - movq %rax, ORIG_RAX(%rdi) - movq %rcx, RIP(%rdi) - leaq 8(%rsp), %rcx - movq $__KERNEL_CS, CS(%rdi) - movq %rax, EFLAGS(%rdi) - movq %rcx, RSP(%rdi) - movq $__KERNEL_DS, SS(%rdi) - jmpq *%rdx - CFI_ENDPROC -ENDPROC(arch_unwind_init_running) -#endif diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c index 8e78a75d186..b007433f96b 100644 --- a/arch/x86_64/kernel/genapic.c +++ b/arch/x86_64/kernel/genapic.c @@ -33,7 +33,7 @@ extern struct genapic apic_flat; extern struct genapic apic_physflat; struct genapic *genapic = &apic_flat; - +struct genapic *genapic_force; /* * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. @@ -46,6 +46,13 @@ void __init clustered_apic_check(void) u8 cluster_cnt[NUM_APIC_CLUSTERS]; int max_apic = 0; + /* genapic selection can be forced because of certain quirks. + */ + if (genapic_force) { + genapic = genapic_force; + goto print; + } + #if defined(CONFIG_ACPI) /* * Some x86_64 machines use physical APIC mode regardless of how many diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c index 9561eb3c5b5..cc230b93cd1 100644 --- a/arch/x86_64/kernel/head64.c +++ b/arch/x86_64/kernel/head64.c @@ -57,10 +57,12 @@ void __init x86_64_start_kernel(char * real_mode_data) { int i; - for (i = 0; i < 256; i++) + /* clear bss before set_intr_gate with early_idt_handler */ + clear_bss(); + + for (i = 0; i < IDT_ENTRIES; i++) set_intr_gate(i, early_idt_handler); asm volatile("lidt %0" :: "m" (idt_descr)); - clear_bss(); early_printk("Kernel alive\n"); diff --git a/arch/x86_64/kernel/i387.c b/arch/x86_64/kernel/i387.c index 3aa1e9bb781..1d58c13bc6b 100644 --- a/arch/x86_64/kernel/i387.c +++ b/arch/x86_64/kernel/i387.c @@ -82,11 +82,8 @@ int save_i387(struct _fpstate __user *buf) struct task_struct *tsk = current; int err = 0; - { - extern void bad_user_i387_struct(void); - if (sizeof(struct user_i387_struct) != sizeof(tsk->thread.i387.fxsave)) - bad_user_i387_struct(); - } + BUILD_BUG_ON(sizeof(struct user_i387_struct) != + sizeof(tsk->thread.i387.fxsave)); if ((unsigned long)buf % 16) printk("save_i387: bad fpstate %p\n",buf); diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c index c4ef801b765..d73c79e821f 100644 --- a/arch/x86_64/kernel/i8259.c +++ b/arch/x86_64/kernel/i8259.c @@ -76,7 +76,8 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf) IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) -void (*interrupt[NR_IRQS])(void) = { +/* for the irq vectors */ +static void (*interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = { IRQLIST_16(0x2), IRQLIST_16(0x3), IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index c80081a6ba4..d7bad90a5ad 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -178,14 +178,20 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) * the interrupt, and we need to make sure the entry is fully populated * before that happens. */ -static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) +static void +__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) { - unsigned long flags; union entry_union eu; eu.entry = e; - spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0x11 + 2*pin, eu.w2); io_apic_write(apic, 0x10 + 2*pin, eu.w1); +} + +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) +{ + unsigned long flags; + spin_lock_irqsave(&ioapic_lock, flags); + __ioapic_write_entry(apic, pin, e); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -750,6 +756,22 @@ static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result) return vector; } +static void __clear_irq_vector(int irq) +{ + cpumask_t mask; + int cpu, vector; + + BUG_ON(!irq_vector[irq]); + + vector = irq_vector[irq]; + cpus_and(mask, irq_domain[irq], cpu_online_map); + for_each_cpu_mask(cpu, mask) + per_cpu(vector_irq, cpu)[vector] = -1; + + irq_vector[irq] = 0; + irq_domain[irq] = CPU_MASK_NONE; +} + void __setup_vector_irq(int cpu) { /* Initialize vector_irq on a new cpu */ @@ -794,27 +816,65 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger) handle_edge_irq, "edge"); } } - -static void __init setup_IO_APIC_irqs(void) +static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq) { struct IO_APIC_route_entry entry; - int apic, pin, idx, irq, first_notcon = 1, vector; + int vector; unsigned long flags; - apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + /* + * add it to the IO-APIC irq-routing table: + */ + memset(&entry,0,sizeof(entry)); - /* - * add it to the IO-APIC irq-routing table: - */ - memset(&entry,0,sizeof(entry)); + entry.delivery_mode = INT_DELIVERY_MODE; + entry.dest_mode = INT_DEST_MODE; + entry.mask = 0; /* enable IRQ */ + entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); - entry.delivery_mode = INT_DELIVERY_MODE; - entry.dest_mode = INT_DEST_MODE; - entry.mask = 0; /* enable IRQ */ + entry.trigger = irq_trigger(idx); + entry.polarity = irq_polarity(idx); + + if (irq_trigger(idx)) { + entry.trigger = 1; + entry.mask = 1; entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); + } + + if (!apic && !IO_APIC_IRQ(irq)) + return; + + if (IO_APIC_IRQ(irq)) { + cpumask_t mask; + vector = assign_irq_vector(irq, TARGET_CPUS, &mask); + if (vector < 0) + return; + + entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); + entry.vector = vector; + + ioapic_register_intr(irq, vector, IOAPIC_AUTO); + if (!apic && (irq < 16)) + disable_8259A_irq(irq); + } + + ioapic_write_entry(apic, pin, entry); + + spin_lock_irqsave(&ioapic_lock, flags); + set_native_irq_info(irq, TARGET_CPUS); + spin_unlock_irqrestore(&ioapic_lock, flags); + +} + +static void __init setup_IO_APIC_irqs(void) +{ + int apic, pin, idx, irq, first_notcon = 1; + + apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); + + for (apic = 0; apic < nr_ioapics; apic++) { + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { idx = find_irq_entry(apic,pin,mp_INT); if (idx == -1) { @@ -826,39 +886,11 @@ static void __init setup_IO_APIC_irqs(void) continue; } - entry.trigger = irq_trigger(idx); - entry.polarity = irq_polarity(idx); - - if (irq_trigger(idx)) { - entry.trigger = 1; - entry.mask = 1; - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); - } - irq = pin_2_irq(idx, apic, pin); add_pin_to_irq(irq, apic, pin); - if (!apic && !IO_APIC_IRQ(irq)) - continue; - - if (IO_APIC_IRQ(irq)) { - cpumask_t mask; - vector = assign_irq_vector(irq, TARGET_CPUS, &mask); - if (vector < 0) - continue; - - entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); - entry.vector = vector; + setup_IO_APIC_irq(apic, pin, idx, irq); - ioapic_register_intr(irq, vector, IOAPIC_AUTO); - if (!apic && (irq < 16)) - disable_8259A_irq(irq); - } - ioapic_write_entry(apic, pin, entry); - - spin_lock_irqsave(&ioapic_lock, flags); - set_native_irq_info(irq, TARGET_CPUS); - spin_unlock_irqrestore(&ioapic_lock, flags); } } @@ -1837,7 +1869,7 @@ void destroy_irq(unsigned int irq) dynamic_irq_cleanup(irq); spin_lock_irqsave(&vector_lock, flags); - irq_vector[irq] = 0; + __clear_irq_vector(irq); spin_unlock_irqrestore(&vector_lock, flags); } @@ -2139,7 +2171,15 @@ void __init setup_ioapic_dest(void) if (irq_entry == -1) continue; irq = pin_2_irq(irq_entry, ioapic, pin); - set_ioapic_affinity_irq(irq, TARGET_CPUS); + + /* setup_IO_APIC_irqs could fail to get vector for some device + * when you have too many devices, because at that time only boot + * cpu is online. + */ + if(!irq_vector[irq]) + setup_IO_APIC_irq(ioapic, pin, irq_entry, irq); + else + set_ioapic_affinity_irq(irq, TARGET_CPUS); } } diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c index e46c55856d4..0c06af6c13b 100644 --- a/arch/x86_64/kernel/irq.c +++ b/arch/x86_64/kernel/irq.c @@ -120,7 +120,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) if (likely(irq < NR_IRQS)) generic_handle_irq(irq); - else + else if (printk_ratelimit()) printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n", __func__, smp_processor_id(), vector); diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index ac241567e68..209c8c0bec7 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -224,7 +224,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { mutex_lock(&kprobe_mutex); - free_insn_slot(p->ainsn.insn); + free_insn_slot(p->ainsn.insn, 0); mutex_unlock(&kprobe_mutex); } diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index bbea88801d8..ac085038af2 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -306,8 +306,8 @@ void mce_log_therm_throt_event(unsigned int cpu, __u64 status) */ static int check_interval = 5 * 60; /* 5 minutes */ -static void mcheck_timer(void *data); -static DECLARE_WORK(mcheck_work, mcheck_timer, NULL); +static void mcheck_timer(struct work_struct *work); +static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer); static void mcheck_check_cpu(void *info) { @@ -315,7 +315,7 @@ static void mcheck_check_cpu(void *info) do_machine_check(NULL, 0); } -static void mcheck_timer(void *data) +static void mcheck_timer(struct work_struct *work) { on_each_cpu(mcheck_check_cpu, NULL, 1, 1); schedule_delayed_work(&mcheck_work, check_interval * HZ); @@ -641,7 +641,6 @@ static __cpuinit int mce_create_device(unsigned int cpu) return err; } -#ifdef CONFIG_HOTPLUG_CPU static void mce_remove_device(unsigned int cpu) { int i; @@ -652,6 +651,7 @@ static void mce_remove_device(unsigned int cpu) sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant); sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval); sysdev_unregister(&per_cpu(device_mce,cpu)); + memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); } /* Get notified when a cpu comes on/off. Be hotplug friendly. */ @@ -674,7 +674,6 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) static struct notifier_block mce_cpu_notifier = { .notifier_call = mce_cpu_callback, }; -#endif static __init int mce_init_device(void) { diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c index 883fe747f64..fa09debad4b 100644 --- a/arch/x86_64/kernel/mce_amd.c +++ b/arch/x86_64/kernel/mce_amd.c @@ -551,7 +551,6 @@ out: return err; } -#ifdef CONFIG_HOTPLUG_CPU /* * let's be hotplug friendly. * in case of multiple core processors, the first core always takes ownership @@ -594,12 +593,14 @@ static void threshold_remove_bank(unsigned int cpu, int bank) sprintf(name, "threshold_bank%i", bank); +#ifdef CONFIG_SMP /* sibling symlink */ if (shared_bank[bank] && b->blocks->cpu != cpu) { sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name); per_cpu(threshold_banks, cpu)[bank] = NULL; return; } +#endif /* remove all sibling symlinks before unregistering */ for_each_cpu_mask(i, b->cpus) { @@ -656,7 +657,6 @@ static int threshold_cpu_callback(struct notifier_block *nfb, static struct notifier_block threshold_cpu_notifier = { .notifier_call = threshold_cpu_callback, }; -#endif /* CONFIG_HOTPLUG_CPU */ static __init int threshold_init_device(void) { diff --git a/arch/x86_64/kernel/module.c b/arch/x86_64/kernel/module.c index 9d0958ff547..a888e67f587 100644 --- a/arch/x86_64/kernel/module.c +++ b/arch/x86_64/kernel/module.c @@ -23,6 +23,7 @@ #include <linux/string.h> #include <linux/kernel.h> #include <linux/slab.h> +#include <linux/bug.h> #include <asm/system.h> #include <asm/page.h> @@ -173,10 +174,12 @@ int module_finalize(const Elf_Ehdr *hdr, lseg, lseg + locks->sh_size, tseg, tseg + text->sh_size); } - return 0; + + return module_bug_finalize(hdr, sechdrs, me); } void module_arch_cleanup(struct module *mod) { alternatives_smp_module_del(mod); + module_bug_cleanup(mod); } diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index b147ab19fbd..08072568847 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -35,8 +35,6 @@ int smp_found_config; unsigned int __initdata maxcpus = NR_CPUS; -int acpi_found_madt; - /* * Various Linux-internal data structures created from the * MP-table. diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 7af9cb3e2d9..186aebbae32 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -12,14 +12,15 @@ * Mikael Pettersson : PM converted to driver model. Disable/enable API. */ +#include <linux/nmi.h> #include <linux/mm.h> #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/module.h> #include <linux/sysdev.h> -#include <linux/nmi.h> #include <linux/sysctl.h> #include <linux/kprobes.h> +#include <linux/cpumask.h> #include <asm/smp.h> #include <asm/nmi.h> @@ -41,6 +42,8 @@ int panic_on_unrecovered_nmi; static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner); static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]); +static cpumask_t backtrace_mask = CPU_MASK_NONE; + /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) */ @@ -190,6 +193,8 @@ void nmi_watchdog_default(void) nmi_watchdog = NMI_IO_APIC; } +static int endflag __initdata = 0; + #ifdef CONFIG_SMP /* The performance counters used by NMI_LOCAL_APIC don't trigger when * the CPU is idle. To make sure the NMI watchdog really ticks on all @@ -197,7 +202,6 @@ void nmi_watchdog_default(void) */ static __init void nmi_cpu_busy(void *data) { - volatile int *endflag = data; local_irq_enable_in_hardirq(); /* Intentionally don't use cpu_relax here. This is to make sure that the performance counter really ticks, @@ -205,14 +209,13 @@ static __init void nmi_cpu_busy(void *data) pause instruction. On a real HT machine this is fine because all other CPUs are busy with "useless" delay loops and don't care if they get somewhat less cycles. */ - while (*endflag == 0) - barrier(); + while (endflag == 0) + mb(); } #endif int __init check_nmi_watchdog (void) { - volatile int endflag = 0; int *counts; int cpu; @@ -253,6 +256,7 @@ int __init check_nmi_watchdog (void) if (!atomic_read(&nmi_active)) { kfree(counts); atomic_set(&nmi_active, -1); + endflag = 1; return -1; } endflag = 1; @@ -782,6 +786,7 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) { int sum; int touched = 0; + int cpu = smp_processor_id(); struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); u64 dummy; int rc=0; @@ -799,6 +804,16 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) touched = 1; } + if (cpu_isset(cpu, backtrace_mask)) { + static DEFINE_SPINLOCK(lock); /* Serialise the printks */ + + spin_lock(&lock); + printk("NMI backtrace for cpu %d\n", cpu); + dump_stack(); + spin_unlock(&lock); + cpu_clear(cpu, backtrace_mask); + } + #ifdef CONFIG_X86_MCE /* Could check oops_in_progress here too, but it's safer not too */ @@ -931,6 +946,19 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, #endif +void __trigger_all_cpu_backtrace(void) +{ + int i; + + backtrace_mask = cpu_online_map; + /* Wait for up to 10 seconds for all CPUs to do the backtrace */ + for (i = 0; i < 10 * 1000; i++) { + if (cpus_empty(backtrace_mask)) + break; + mdelay(1); + } +} + EXPORT_SYMBOL(nmi_active); EXPORT_SYMBOL(nmi_watchdog); EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c index 37a770859e7..3d65b1d4c2b 100644 --- a/arch/x86_64/kernel/pci-calgary.c +++ b/arch/x86_64/kernel/pci-calgary.c @@ -41,6 +41,13 @@ #include <asm/pci-direct.h> #include <asm/system.h> #include <asm/dma.h> +#include <asm/rio.h> + +#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT +int use_calgary __read_mostly = 1; +#else +int use_calgary __read_mostly = 0; +#endif /* CONFIG_CALGARY_DEFAULT_ENABLED */ #define PCI_DEVICE_ID_IBM_CALGARY 0x02a1 #define PCI_VENDOR_DEVICE_ID_CALGARY \ @@ -115,14 +122,35 @@ static const unsigned long phb_offsets[] = { 0xB000 /* PHB3 */ }; +/* PHB debug registers */ + +static const unsigned long phb_debug_offsets[] = { + 0x4000 /* PHB 0 DEBUG */, + 0x5000 /* PHB 1 DEBUG */, + 0x6000 /* PHB 2 DEBUG */, + 0x7000 /* PHB 3 DEBUG */ +}; + +/* + * STUFF register for each debug PHB, + * byte 1 = start bus number, byte 2 = end bus number + */ + +#define PHB_DEBUG_STUFF_OFFSET 0x0020 + unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED; static int translate_empty_slots __read_mostly = 0; static int calgary_detected __read_mostly = 0; +static struct rio_table_hdr *rio_table_hdr __initdata; +static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; +static struct rio_detail *rio_devs[MAX_NUMNODES * 4] __initdata; + struct calgary_bus_info { void *tce_space; unsigned char translation_disabled; signed char phbid; + void __iomem *bbar; }; static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, }; @@ -475,6 +503,11 @@ static struct dma_mapping_ops calgary_dma_ops = { .unmap_sg = calgary_unmap_sg, }; +static inline void __iomem * busno_to_bbar(unsigned char num) +{ + return bus_info[num].bbar; +} + static inline int busno_to_phbid(unsigned char num) { return bus_info[num].phbid; @@ -620,14 +653,9 @@ static void __init calgary_reserve_peripheral_mem_2(struct pci_dev *dev) static void __init calgary_reserve_regions(struct pci_dev *dev) { unsigned int npages; - void __iomem *bbar; - unsigned char busnum; u64 start; struct iommu_table *tbl = dev->sysdata; - bbar = tbl->bbar; - busnum = dev->bus->number; - /* reserve bad_dma_address in case it's a legal address */ iommu_range_reserve(tbl, bad_dma_address, 1); @@ -740,7 +768,7 @@ static void __init calgary_increase_split_completion_timeout(void __iomem *bbar, { u64 val64; void __iomem *target; - unsigned long phb_shift = -1; + unsigned int phb_shift = ~0; /* silence gcc */ u64 mask; switch (busno_to_phbid(busnum)) { @@ -828,33 +856,6 @@ static void __init calgary_disable_translation(struct pci_dev *dev) del_timer_sync(&tbl->watchdog_timer); } -static inline unsigned int __init locate_register_space(struct pci_dev *dev) -{ - int rionodeid; - u32 address; - - /* - * Each Calgary has four busses. The first four busses (first Calgary) - * have RIO node ID 2, then the next four (second Calgary) have RIO - * node ID 3, the next four (third Calgary) have node ID 2 again, etc. - * We use a gross hack - relying on the dev->bus->number ordering, - * modulo 14 - to decide which Calgary a given bus is on. Busses 0, 1, - * 2 and 4 are on the first Calgary (id 2), 6, 8, a and c are on the - * second (id 3), and then it repeats modulo 14. - */ - rionodeid = (dev->bus->number % 14 > 4) ? 3 : 2; - /* - * register space address calculation as follows: - * FE0MB-8MB*OneBasedChassisNumber+1MB*(RioNodeId-ChassisBase) - * ChassisBase is always zero for x366/x260/x460 - * RioNodeId is 2 for first Calgary, 3 for second Calgary - */ - address = START_ADDRESS - - (0x800000 * (ONE_BASED_CHASSIS_NUM + dev->bus->number / 14)) + - (0x100000) * (rionodeid - CHASSIS_BASE); - return address; -} - static void __init calgary_init_one_nontraslated(struct pci_dev *dev) { pci_dev_get(dev); @@ -864,23 +865,15 @@ static void __init calgary_init_one_nontraslated(struct pci_dev *dev) static int __init calgary_init_one(struct pci_dev *dev) { - u32 address; void __iomem *bbar; int ret; BUG_ON(dev->bus->number >= MAX_PHB_BUS_NUM); - address = locate_register_space(dev); - /* map entire 1MB of Calgary config space */ - bbar = ioremap_nocache(address, 1024 * 1024); - if (!bbar) { - ret = -ENODATA; - goto done; - } - + bbar = busno_to_bbar(dev->bus->number); ret = calgary_setup_tar(dev, bbar); if (ret) - goto iounmap; + goto done; pci_dev_get(dev); dev->bus->self = dev; @@ -888,17 +881,66 @@ static int __init calgary_init_one(struct pci_dev *dev) return 0; -iounmap: - iounmap(bbar); done: return ret; } +static int __init calgary_locate_bbars(void) +{ + int ret; + int rioidx, phb, bus; + void __iomem *bbar; + void __iomem *target; + unsigned long offset; + u8 start_bus, end_bus; + u32 val; + + ret = -ENODATA; + for (rioidx = 0; rioidx < rio_table_hdr->num_rio_dev; rioidx++) { + struct rio_detail *rio = rio_devs[rioidx]; + + if ((rio->type != COMPAT_CALGARY) && (rio->type != ALT_CALGARY)) + continue; + + /* map entire 1MB of Calgary config space */ + bbar = ioremap_nocache(rio->BBAR, 1024 * 1024); + if (!bbar) + goto error; + + for (phb = 0; phb < PHBS_PER_CALGARY; phb++) { + offset = phb_debug_offsets[phb] | PHB_DEBUG_STUFF_OFFSET; + target = calgary_reg(bbar, offset); + + val = be32_to_cpu(readl(target)); + start_bus = (u8)((val & 0x00FF0000) >> 16); + end_bus = (u8)((val & 0x0000FF00) >> 8); + for (bus = start_bus; bus <= end_bus; bus++) { + bus_info[bus].bbar = bbar; + bus_info[bus].phbid = phb; + } + } + } + + return 0; + +error: + /* scan bus_info and iounmap any bbars we previously ioremap'd */ + for (bus = 0; bus < ARRAY_SIZE(bus_info); bus++) + if (bus_info[bus].bbar) + iounmap(bus_info[bus].bbar); + + return ret; +} + static int __init calgary_init(void) { - int ret = -ENODEV; + int ret; struct pci_dev *dev = NULL; + ret = calgary_locate_bbars(); + if (ret) + return ret; + do { dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CALGARY, @@ -921,7 +963,7 @@ static int __init calgary_init(void) error: do { - dev = pci_find_device_reverse(PCI_VENDOR_ID_IBM, + dev = pci_get_device_reverse(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CALGARY, dev); if (!dev) @@ -962,13 +1004,56 @@ static inline int __init determine_tce_table_size(u64 ram) return ret; } +static int __init build_detail_arrays(void) +{ + unsigned long ptr; + int i, scal_detail_size, rio_detail_size; + + if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){ + printk(KERN_WARNING + "Calgary: MAX_NUMNODES too low! Defined as %d, " + "but system has %d nodes.\n", + MAX_NUMNODES, rio_table_hdr->num_scal_dev); + return -ENODEV; + } + + switch (rio_table_hdr->version){ + case 2: + scal_detail_size = 11; + rio_detail_size = 13; + break; + case 3: + scal_detail_size = 12; + rio_detail_size = 15; + break; + default: + printk(KERN_WARNING + "Calgary: Invalid Rio Grande Table Version: %d\n", + rio_table_hdr->version); + return -EPROTO; + } + + ptr = ((unsigned long)rio_table_hdr) + 3; + for (i = 0; i < rio_table_hdr->num_scal_dev; + i++, ptr += scal_detail_size) + scal_devs[i] = (struct scal_detail *)ptr; + + for (i = 0; i < rio_table_hdr->num_rio_dev; + i++, ptr += rio_detail_size) + rio_devs[i] = (struct rio_detail *)ptr; + + return 0; +} + void __init detect_calgary(void) { u32 val; int bus; void *tbl; int calgary_found = 0; - int phb = -1; + unsigned long ptr; + unsigned int offset, prev_offset; + int ret; /* * if the user specified iommu=off or iommu=soft or we found @@ -977,25 +1062,54 @@ void __init detect_calgary(void) if (swiotlb || no_iommu || iommu_detected) return; + if (!use_calgary) + return; + if (!early_pci_allowed()) return; + printk(KERN_DEBUG "Calgary: detecting Calgary via BIOS EBDA area\n"); + + ptr = (unsigned long)phys_to_virt(get_bios_ebda()); + + rio_table_hdr = NULL; + prev_offset = 0; + offset = 0x180; + /* + * The next offset is stored in the 1st word. + * Only parse up until the offset increases: + */ + while (offset > prev_offset) { + /* The block id is stored in the 2nd word */ + if (*((unsigned short *)(ptr + offset + 2)) == 0x4752){ + /* set the pointer past the offset & block id */ + rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4); + break; + } + prev_offset = offset; + offset = *((unsigned short *)(ptr + offset)); + } + if (!rio_table_hdr) { + printk(KERN_DEBUG "Calgary: Unable to locate Rio Grande table " + "in EBDA - bailing!\n"); + return; + } + + ret = build_detail_arrays(); + if (ret) { + printk(KERN_DEBUG "Calgary: build_detail_arrays ret %d\n", ret); + return; + } + specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE); for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { int dev; struct calgary_bus_info *info = &bus_info[bus]; - info->phbid = -1; if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY) continue; - /* - * There are 4 PHBs per Calgary chip. Set phb to which phb (0-3) - * it is connected to releative to the clagary chip. - */ - phb = (phb + 1) % PHBS_PER_CALGARY; - if (info->translation_disabled) continue; @@ -1010,13 +1124,15 @@ void __init detect_calgary(void) if (!tbl) goto cleanup; info->tce_space = tbl; - info->phbid = phb; calgary_found = 1; break; } } } + printk(KERN_DEBUG "Calgary: finished detection, Calgary %s\n", + calgary_found ? "found" : "not found"); + if (calgary_found) { iommu_detected = 1; calgary_detected = 1; diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c index f8d857453f8..683b7a5c1ab 100644 --- a/arch/x86_64/kernel/pci-dma.c +++ b/arch/x86_64/kernel/pci-dma.c @@ -296,6 +296,11 @@ __init int iommu_setup(char *p) gart_parse_options(p); #endif +#ifdef CONFIG_CALGARY_IOMMU + if (!strncmp(p, "calgary", 7)) + use_calgary = 1; +#endif /* CONFIG_CALGARY_IOMMU */ + p += strcspn(p, ","); if (*p == ',') ++p; diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index 16261a8a330..fc1960f1f24 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c @@ -601,10 +601,9 @@ void __init gart_iommu_init(void) (!force_iommu && end_pfn <= MAX_DMA32_PFN) || !iommu_aperture || (no_agp && init_k8_gatt(&info) < 0)) { - printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n"); if (end_pfn > MAX_DMA32_PFN) { printk(KERN_ERR "WARNING more than 4GB of memory " - "but IOMMU not available.\n" + "but GART IOMMU not available.\n" KERN_ERR "WARNING 32bit PCI may malfunction.\n"); } return; diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 7451a4c43c1..cbbc6adc1a9 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -108,17 +108,19 @@ void exit_idle(void) */ static void default_idle(void) { - local_irq_enable(); - current_thread_info()->status &= ~TS_POLLING; - smp_mb__after_clear_bit(); - while (!need_resched()) { - local_irq_disable(); - if (!need_resched()) - safe_halt(); - else - local_irq_enable(); - } + /* + * TS_POLLING-cleared state must be visible before we + * test NEED_RESCHED: + */ + smp_mb(); + local_irq_disable(); + if (!need_resched()) { + /* Enables interrupts one instruction before HLT. + x86 special cases this so there is no race. */ + safe_halt(); + } else + local_irq_enable(); current_thread_info()->status |= TS_POLLING; } @@ -130,15 +132,7 @@ static void default_idle(void) static void poll_idle (void) { local_irq_enable(); - - asm volatile( - "2:" - "testl %0,%1;" - "rep; nop;" - "je 2b;" - : : - "i" (_TIF_NEED_RESCHED), - "m" (current_thread_info()->flags)); + cpu_relax(); } void cpu_idle_wait(void) @@ -219,6 +213,12 @@ void cpu_idle (void) idle = default_idle; if (cpu_is_offline(smp_processor_id())) play_dead(); + /* + * Idle routines should keep interrupts disabled + * from here on, until they go to idle. + * Otherwise, idle callbacks can misfire. + */ + local_irq_disable(); enter_idle(); idle(); /* In many cases the interrupt that ended idle @@ -256,9 +256,16 @@ void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) /* Default MONITOR/MWAIT with no hints, used for default C1 state */ static void mwait_idle(void) { - local_irq_enable(); - while (!need_resched()) - mwait_idle_with_hints(0,0); + if (!need_resched()) { + __monitor((void *)¤t_thread_info()->flags, 0, 0); + smp_mb(); + if (!need_resched()) + __sti_mwait(0, 0); + else + local_irq_enable(); + } else { + local_irq_enable(); + } } void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index fc944b5e8f4..af425a8049f 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -471,8 +471,7 @@ void __init setup_arch(char **cmdline_p) if (LOADER_TYPE && INITRD_START) { if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) { reserve_bootmem_generic(INITRD_START, INITRD_SIZE); - initrd_start = - INITRD_START ? INITRD_START + PAGE_OFFSET : 0; + initrd_start = INITRD_START + PAGE_OFFSET; initrd_end = initrd_start+INITRD_SIZE; } else { @@ -732,11 +731,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) /* Fix cpuid4 emulation for more */ num_cache_leaves = 3; - /* When there is only one core no need to synchronize RDTSC */ - if (num_possible_cpus() == 1) - set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); - else - clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); + /* RDTSC can be speculated around */ + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); } static void __cpuinit detect_ht(struct cpuinfo_x86 *c) @@ -835,6 +831,15 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability); } + if (cpu_has_ds) { + unsigned int l1, l2; + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); + if (!(l1 & (1<<11))) + set_bit(X86_FEATURE_BTS, c->x86_capability); + if (!(l1 & (1<<12))) + set_bit(X86_FEATURE_PEBS, c->x86_capability); + } + n = c->extended_cpuid_level; if (n >= 0x80000008) { unsigned eax = cpuid_eax(0x80000008); @@ -854,7 +859,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); if (c->x86 == 6) set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); - set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); + if (c->x86 == 15) + set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); + else + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); c->x86_max_cores = intel_num_cpu_cores(c); srat_detect_node(); diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index 9f74c883568..af1ec4d23cf 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c @@ -379,12 +379,17 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info, put_cpu(); return 0; } + + /* Can deadlock when called with interrupts disabled */ + WARN_ON(irqs_disabled()); + spin_lock_bh(&call_lock); __smp_call_function_single(cpu, func, info, nonatomic, wait); spin_unlock_bh(&call_lock); put_cpu(); return 0; } +EXPORT_SYMBOL(smp_call_function_single); /* * this function sends a 'generic call function' IPI to all other CPUs diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 62c2e747af5..daf19332f0d 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -60,6 +60,7 @@ #include <asm/irq.h> #include <asm/hw_irq.h> #include <asm/numa.h> +#include <asm/genapic.h> /* Number of siblings per CPU package */ int smp_num_siblings = 1; @@ -753,14 +754,16 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta } struct create_idle { + struct work_struct work; struct task_struct *idle; struct completion done; int cpu; }; -void do_fork_idle(void *_c_idle) +void do_fork_idle(struct work_struct *work) { - struct create_idle *c_idle = _c_idle; + struct create_idle *c_idle = + container_of(work, struct create_idle, work); c_idle->idle = fork_idle(c_idle->cpu); complete(&c_idle->done); @@ -775,10 +778,10 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid) int timeout; unsigned long start_rip; struct create_idle c_idle = { + .work = __WORK_INITIALIZER(c_idle.work, do_fork_idle), .cpu = cpu, .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), }; - DECLARE_WORK(work, do_fork_idle, &c_idle); /* allocate memory for gdts of secondary cpus. Hotplug is considered */ if (!cpu_gdt_descr[cpu].address && @@ -825,9 +828,9 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid) * thread. */ if (!keventd_up() || current_is_keventd()) - work.func(work.data); + c_idle.work.func(&c_idle.work); else { - schedule_work(&work); + schedule_work(&c_idle.work); wait_for_completion(&c_idle.done); } @@ -1167,6 +1170,13 @@ int __cpuinit __cpu_up(unsigned int cpu) while (!cpu_isset(cpu, cpu_online_map)) cpu_relax(); + + if (num_online_cpus() > 8 && genapic == &apic_flat) { + printk(KERN_WARNING + "flat APIC routing can't be used with > 8 cpus\n"); + BUG(); + } + err = 0; return err; diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index e3ef544d2cf..5cc76d0d331 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -563,7 +563,7 @@ static unsigned int cpufreq_delayed_issched = 0; static unsigned int cpufreq_init = 0; static struct work_struct cpufreq_delayed_get_work; -static void handle_cpufreq_delayed_get(void *v) +static void handle_cpufreq_delayed_get(struct work_struct *v) { unsigned int cpu; for_each_online_cpu(cpu) { @@ -639,7 +639,7 @@ static struct notifier_block time_cpufreq_notifier_block = { static int __init cpufreq_tsc(void) { - INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL); + INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get); if (!cpufreq_register_notifier(&time_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER)) cpufreq_init = 1; @@ -656,6 +656,25 @@ core_initcall(cpufreq_tsc); */ #define TICK_COUNT 100000000 +#define TICK_MIN 5000 + +/* + * Some platforms take periodic SMI interrupts with 5ms duration. Make sure none + * occurs between the reads of the hpet & TSC. + */ +static void __init read_hpet_tsc(int *hpet, int *tsc) +{ + int tsc1, tsc2, hpet1; + + do { + tsc1 = get_cycles_sync(); + hpet1 = hpet_readl(HPET_COUNTER); + tsc2 = get_cycles_sync(); + } while (tsc2 - tsc1 > TICK_MIN); + *hpet = hpet1; + *tsc = tsc2; +} + static unsigned int __init hpet_calibrate_tsc(void) { @@ -666,13 +685,11 @@ static unsigned int __init hpet_calibrate_tsc(void) local_irq_save(flags); local_irq_disable(); - hpet_start = hpet_readl(HPET_COUNTER); - rdtscl(tsc_start); + read_hpet_tsc(&hpet_start, &tsc_start); do { local_irq_disable(); - hpet_now = hpet_readl(HPET_COUNTER); - tsc_now = get_cycles_sync(); + read_hpet_tsc(&hpet_now, &tsc_now); local_irq_restore(flags); } while ((tsc_now - tsc_start) < TICK_COUNT && (hpet_now - hpet_start) < TICK_COUNT); diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 0d65b22f229..09d2e8a10a4 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -30,9 +30,10 @@ #include <linux/kprobes.h> #include <linux/kexec.h> #include <linux/unwind.h> +#include <linux/uaccess.h> +#include <linux/bug.h> #include <asm/system.h> -#include <asm/uaccess.h> #include <asm/io.h> #include <asm/atomic.h> #include <asm/debugreg.h> @@ -108,12 +109,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) preempt_enable_no_resched(); } -static int kstack_depth_to_print = 12; -#ifdef CONFIG_STACK_UNWIND -static int call_trace = 1; -#else -#define call_trace (-1) -#endif +int kstack_depth_to_print = 12; #ifdef CONFIG_KALLSYMS void printk_address(unsigned long address) @@ -216,24 +212,7 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, return NULL; } -struct ops_and_data { - struct stacktrace_ops *ops; - void *data; -}; - -static int dump_trace_unwind(struct unwind_frame_info *info, void *context) -{ - struct ops_and_data *oad = (struct ops_and_data *)context; - int n = 0; - - while (unwind(info) == 0 && UNW_PC(info)) { - n++; - oad->ops->address(oad->data, UNW_PC(info)); - if (arch_unw_user_mode(info)) - break; - } - return n; -} +#define MSG(txt) ops->warning(data, txt) /* * x86-64 can have upto three kernel stacks: @@ -248,61 +227,24 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) return p > t && p < t + THREAD_SIZE - 3; } -void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack, +void dump_trace(struct task_struct *tsk, struct pt_regs *regs, + unsigned long *stack, struct stacktrace_ops *ops, void *data) { - const unsigned cpu = smp_processor_id(); - unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; + const unsigned cpu = get_cpu(); + unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; unsigned used = 0; struct thread_info *tinfo; if (!tsk) tsk = current; - if (call_trace >= 0) { - int unw_ret = 0; - struct unwind_frame_info info; - struct ops_and_data oad = { .ops = ops, .data = data }; - - if (regs) { - if (unwind_init_frame_info(&info, tsk, regs) == 0) - unw_ret = dump_trace_unwind(&info, &oad); - } else if (tsk == current) - unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad); - else { - if (unwind_init_blocked(&info, tsk) == 0) - unw_ret = dump_trace_unwind(&info, &oad); - } - if (unw_ret > 0) { - if (call_trace == 1 && !arch_unw_user_mode(&info)) { - ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n", - UNW_PC(&info)); - if ((long)UNW_SP(&info) < 0) { - ops->warning(data, "Leftover inexact backtrace:\n"); - stack = (unsigned long *)UNW_SP(&info); - if (!stack) - return; - } else - ops->warning(data, "Full inexact backtrace again:\n"); - } else if (call_trace >= 1) - return; - else - ops->warning(data, "Full inexact backtrace again:\n"); - } else - ops->warning(data, "Inexact backtrace:\n"); - } if (!stack) { unsigned long dummy; stack = &dummy; if (tsk && tsk != current) stack = (unsigned long *)tsk->thread.rsp; } - /* - * Align the stack pointer on word boundary, later loops - * rely on that (and corruption / debug info bugs can cause - * unaligned values here): - */ - stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1)); /* * Print function call entries within a stack. 'cond' is the @@ -312,9 +254,9 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s #define HANDLE_STACK(cond) \ do while (cond) { \ unsigned long addr = *stack++; \ - if (oops_in_progress ? \ - __kernel_text_address(addr) : \ - kernel_text_address(addr)) { \ + /* Use unlocked access here because except for NMIs \ + we should be already protected against module unloads */ \ + if (__kernel_text_address(addr)) { \ /* \ * If the address is either in the text segment of the \ * kernel, or in the region which contains vmalloc'ed \ @@ -377,9 +319,10 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s /* * This handles the process stack: */ - tinfo = current_thread_info(); + tinfo = task_thread_info(tsk); HANDLE_STACK (valid_stack_ptr(tinfo, stack)); #undef HANDLE_STACK + put_cpu(); } EXPORT_SYMBOL(dump_trace); @@ -516,30 +459,15 @@ bad: printk("\n"); } -void handle_BUG(struct pt_regs *regs) -{ - struct bug_frame f; - long len; - const char *prefix = ""; +int is_valid_bugaddr(unsigned long rip) +{ + unsigned short ud2; - if (user_mode(regs)) - return; - if (__copy_from_user(&f, (const void __user *) regs->rip, - sizeof(struct bug_frame))) - return; - if (f.filename >= 0 || - f.ud2[0] != 0x0f || f.ud2[1] != 0x0b) - return; - len = __strnlen_user((char *)(long)f.filename, PATH_MAX) - 1; - if (len < 0 || len >= PATH_MAX) - f.filename = (int)(long)"unmapped filename"; - else if (len > 50) { - f.filename += len - 50; - prefix = "..."; - } - printk("----------- [cut here ] --------- [please bite here ] ---------\n"); - printk(KERN_ALERT "Kernel BUG at %s%.50s:%d\n", prefix, (char *)(long)f.filename, f.line); -} + if (__copy_from_user(&ud2, (const void __user *) rip, sizeof(ud2))) + return 0; + + return ud2 == 0x0b0f; +} #ifdef CONFIG_BUG void out_of_line_bug(void) @@ -619,7 +547,9 @@ void die(const char * str, struct pt_regs * regs, long err) { unsigned long flags = oops_begin(); - handle_BUG(regs); + if (!user_mode(regs)) + report_bug(regs->rip); + __die(str, regs, err); oops_end(flags); do_exit(SIGSEGV); @@ -786,8 +716,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs) { printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", reason); - printk(KERN_EMERG "You probably have a hardware problem with your " - "RAM chips\n"); + printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); if (panic_on_unrecovered_nmi) panic("NMI: Not continuing"); @@ -1193,21 +1122,3 @@ static int __init kstack_setup(char *s) return 0; } early_param("kstack", kstack_setup); - -#ifdef CONFIG_STACK_UNWIND -static int __init call_trace_setup(char *s) -{ - if (!s) - return -EINVAL; - if (strcmp(s, "old") == 0) - call_trace = -1; - else if (strcmp(s, "both") == 0) - call_trace = 0; - else if (strcmp(s, "newfallback") == 0) - call_trace = 1; - else if (strcmp(s, "new") == 0) - call_trace = 2; - return 0; -} -early_param("call_trace", call_trace_setup); -#endif diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index d9534e750d4..1e54ddf2338 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -13,6 +13,7 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") OUTPUT_ARCH(i386:x86-64) ENTRY(phys_startup_64) jiffies_64 = jiffies; +_proxy_pda = 0; PHDRS { text PT_LOAD FLAGS(5); /* R_E */ data PT_LOAD FLAGS(7); /* RWE */ @@ -51,14 +52,7 @@ SECTIONS RODATA -#ifdef CONFIG_STACK_UNWIND - . = ALIGN(8); - .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) { - __start_unwind = .; - *(.eh_frame) - __end_unwind = .; - } -#endif + BUG_TABLE . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ /* Data */ @@ -227,9 +221,7 @@ SECTIONS /* Sections to be discarded */ /DISCARD/ : { *(.exitcall.exit) -#ifndef CONFIG_UNWIND_INFO *(.eh_frame) -#endif } STABS_DEBUG diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index 92546c1526f..2433d6fc68b 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c @@ -42,6 +42,7 @@ #include <asm/topology.h> #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) +#define __syscall_clobber "r11","rcx","memory" int __sysctl_vsyscall __section_sysctl_vsyscall = 1; seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; @@ -224,8 +225,7 @@ out: static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, - void **context) + void __user *newval, size_t newlen) { return -ENOSYS; } @@ -274,7 +274,6 @@ static void __cpuinit cpu_vsyscall_init(void *arg) vsyscall_set_cpu(raw_smp_processor_id()); } -#ifdef CONFIG_HOTPLUG_CPU static int __cpuinit cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) { @@ -283,13 +282,13 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1); return NOTIFY_DONE; } -#endif static void __init map_vsyscall(void) { extern char __vsyscall_0; unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); + /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */ __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); } diff --git a/arch/x86_64/lib/csum-partial.c b/arch/x86_64/lib/csum-partial.c index c493735218d..bc503f50690 100644 --- a/arch/x86_64/lib/csum-partial.c +++ b/arch/x86_64/lib/csum-partial.c @@ -9,8 +9,6 @@ #include <linux/module.h> #include <asm/checksum.h> -#define __force_inline inline __attribute__((always_inline)) - static inline unsigned short from32to16(unsigned a) { unsigned short b = a >> 16; @@ -33,7 +31,7 @@ static inline unsigned short from32to16(unsigned a) * Unrolling to an 128 bytes inner loop. * Using interleaving with more registers to break the carry chains. */ -static __force_inline unsigned do_csum(const unsigned char *buff, unsigned len) +static unsigned do_csum(const unsigned char *buff, unsigned len) { unsigned odd, count; unsigned long result = 0; @@ -132,9 +130,10 @@ static __force_inline unsigned do_csum(const unsigned char *buff, unsigned len) * * it's best to have buff aligned on a 64-bit boundary */ -unsigned csum_partial(const unsigned char *buff, unsigned len, unsigned sum) +__wsum csum_partial(const void *buff, int len, __wsum sum) { - return add32_with_carry(do_csum(buff, len), sum); + return (__force __wsum)add32_with_carry(do_csum(buff, len), + (__force u32)sum); } EXPORT_SYMBOL(csum_partial); @@ -143,7 +142,7 @@ EXPORT_SYMBOL(csum_partial); * this routine is used for miscellaneous IP-like checksums, mainly * in icmp.c */ -unsigned short ip_compute_csum(unsigned char * buff, int len) +__sum16 ip_compute_csum(const void *buff, int len) { return csum_fold(csum_partial(buff,len,0)); } diff --git a/arch/x86_64/lib/csum-wrappers.c b/arch/x86_64/lib/csum-wrappers.c index b1320ec5842..fd42a4a095f 100644 --- a/arch/x86_64/lib/csum-wrappers.c +++ b/arch/x86_64/lib/csum-wrappers.c @@ -18,9 +18,9 @@ * Returns an 32bit unfolded checksum of the buffer. * src and dst are best aligned to 64bits. */ -unsigned int -csum_partial_copy_from_user(const unsigned char __user *src, unsigned char *dst, - int len, unsigned int isum, int *errp) +__wsum +csum_partial_copy_from_user(const void __user *src, void *dst, + int len, __wsum isum, int *errp) { might_sleep(); *errp = 0; @@ -34,17 +34,19 @@ csum_partial_copy_from_user(const unsigned char __user *src, unsigned char *dst, if (unlikely((unsigned long)src & 6)) { while (((unsigned long)src & 6) && len >= 2) { __u16 val16; - *errp = __get_user(val16, (__u16 __user *)src); + *errp = __get_user(val16, (const __u16 __user *)src); if (*errp) return isum; *(__u16 *)dst = val16; - isum = add32_with_carry(isum, val16); + isum = (__force __wsum)add32_with_carry( + (__force unsigned)isum, val16); src += 2; dst += 2; len -= 2; } } - isum = csum_partial_copy_generic((__force void *)src,dst,len,isum,errp,NULL); + isum = csum_partial_copy_generic((__force const void *)src, + dst, len, isum, errp, NULL); if (likely(*errp == 0)) return isum; } @@ -66,9 +68,9 @@ EXPORT_SYMBOL(csum_partial_copy_from_user); * Returns an 32bit unfolded checksum of the buffer. * src and dst are best aligned to 64bits. */ -unsigned int -csum_partial_copy_to_user(unsigned const char *src, unsigned char __user *dst, - int len, unsigned int isum, int *errp) +__wsum +csum_partial_copy_to_user(const void *src, void __user *dst, + int len, __wsum isum, int *errp) { might_sleep(); if (unlikely(!access_ok(VERIFY_WRITE, dst, len))) { @@ -79,7 +81,8 @@ csum_partial_copy_to_user(unsigned const char *src, unsigned char __user *dst, if (unlikely((unsigned long)dst & 6)) { while (((unsigned long)dst & 6) && len >= 2) { __u16 val16 = *(__u16 *)src; - isum = add32_with_carry(isum, val16); + isum = (__force __wsum)add32_with_carry( + (__force unsigned)isum, val16); *errp = __put_user(val16, (__u16 __user *)dst); if (*errp) return isum; @@ -104,19 +107,21 @@ EXPORT_SYMBOL(csum_partial_copy_to_user); * * Returns an 32bit unfolded checksum of the buffer. */ -unsigned int -csum_partial_copy_nocheck(const unsigned char *src, unsigned char *dst, int len, unsigned int sum) +__wsum +csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum) { return csum_partial_copy_generic(src,dst,len,sum,NULL,NULL); } EXPORT_SYMBOL(csum_partial_copy_nocheck); -unsigned short csum_ipv6_magic(struct in6_addr *saddr, struct in6_addr *daddr, - __u32 len, unsigned short proto, unsigned int sum) +__sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, unsigned short proto, __wsum sum) { __u64 rest, sum64; - rest = (__u64)htonl(len) + (__u64)htons(proto) + (__u64)sum; + rest = (__force __u64)htonl(len) + (__force __u64)htons(proto) + + (__force __u64)sum; asm(" addq (%[saddr]),%[sum]\n" " adcq 8(%[saddr]),%[sum]\n" " adcq (%[daddr]),%[sum]\n" @@ -124,7 +129,7 @@ unsigned short csum_ipv6_magic(struct in6_addr *saddr, struct in6_addr *daddr, " adcq $0,%[sum]\n" : [sum] "=r" (sum64) : "[sum]" (rest),[saddr] "r" (saddr), [daddr] "r" (daddr)); - return csum_fold(add32_with_carry(sum64 & 0xffffffff, sum64>>32)); + return csum_fold((__force __wsum)add32_with_carry(sum64 & 0xffffffff, sum64>>32)); } EXPORT_SYMBOL(csum_ipv6_magic); diff --git a/arch/x86_64/lib/delay.c b/arch/x86_64/lib/delay.c index 50be90975d0..2dbebd30834 100644 --- a/arch/x86_64/lib/delay.c +++ b/arch/x86_64/lib/delay.c @@ -40,13 +40,13 @@ EXPORT_SYMBOL(__delay); inline void __const_udelay(unsigned long xloops) { - __delay((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32); + __delay(((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32) + 1); } EXPORT_SYMBOL(__const_udelay); void __udelay(unsigned long usecs) { - __const_udelay(usecs * 0x000010c6); /* 2**32 / 1000000 */ + __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ } EXPORT_SYMBOL(__udelay); diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 3751b4788e2..a65fc6f1dca 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -23,9 +23,9 @@ #include <linux/compiler.h> #include <linux/module.h> #include <linux/kprobes.h> +#include <linux/uaccess.h> #include <asm/system.h> -#include <asm/uaccess.h> #include <asm/pgalloc.h> #include <asm/smp.h> #include <asm/tlbflush.h> @@ -96,7 +96,7 @@ void bust_spinlocks(int yes) static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, unsigned long error_code) { - unsigned char __user *instr; + unsigned char *instr; int scan_more = 1; int prefetch = 0; unsigned char *max_instr; @@ -116,7 +116,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, unsigned char instr_hi; unsigned char instr_lo; - if (__get_user(opcode, (char __user *)instr)) + if (probe_kernel_address(instr, opcode)) break; instr_hi = opcode & 0xf0; @@ -154,7 +154,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, case 0x00: /* Prefetch instruction is 0x0F0D or 0x0F18 */ scan_more = 0; - if (__get_user(opcode, (char __user *)instr)) + if (probe_kernel_address(instr, opcode)) break; prefetch = (instr_lo == 0xF) && (opcode == 0x0D || opcode == 0x18); @@ -170,7 +170,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, static int bad_address(void *p) { unsigned long dummy; - return __get_user(dummy, (unsigned long __user *)p); + return probe_kernel_address((unsigned long *)p, dummy); } void dump_pagetable(unsigned long address) diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 4c0c00ef3ca..2968b90ef8a 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -730,14 +730,15 @@ static __init int x8664_sysctl_init(void) __initcall(x8664_sysctl_init); #endif -/* A pseudo VMAs to allow ptrace access for the vsyscall page. This only +/* A pseudo VMA to allow ptrace access for the vsyscall page. This only covers the 64bit vsyscall page now. 32bit has a real VMA now and does not need special handling anymore. */ static struct vm_area_struct gate_vma = { .vm_start = VSYSCALL_START, - .vm_end = VSYSCALL_END, - .vm_page_prot = PAGE_READONLY + .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT), + .vm_page_prot = PAGE_READONLY_EXEC, + .vm_flags = VM_READ | VM_EXEC }; struct vm_area_struct *get_gate_vma(struct task_struct *tsk) diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c index 3e231d762aa..ccb91dd996a 100644 --- a/arch/x86_64/mm/pageattr.c +++ b/arch/x86_64/mm/pageattr.c @@ -61,34 +61,40 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot, return base; } - -static void flush_kernel_map(void *address) +static void cache_flush_page(void *adr) { - if (0 && address && cpu_has_clflush) { - /* is this worth it? */ - int i; - for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) - asm volatile("clflush (%0)" :: "r" (address + i)); - } else - asm volatile("wbinvd":::"memory"); - if (address) - __flush_tlb_one(address); - else - __flush_tlb_all(); + int i; + for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) + asm volatile("clflush (%0)" :: "r" (adr + i)); } +static void flush_kernel_map(void *arg) +{ + struct list_head *l = (struct list_head *)arg; + struct page *pg; + + /* When clflush is available always use it because it is + much cheaper than WBINVD */ + if (!cpu_has_clflush) + asm volatile("wbinvd" ::: "memory"); + list_for_each_entry(pg, l, lru) { + void *adr = page_address(pg); + if (cpu_has_clflush) + cache_flush_page(adr); + __flush_tlb_one(adr); + } +} -static inline void flush_map(unsigned long address) +static inline void flush_map(struct list_head *l) { - on_each_cpu(flush_kernel_map, (void *)address, 1, 1); + on_each_cpu(flush_kernel_map, l, 1, 1); } -static struct page *deferred_pages; /* protected by init_mm.mmap_sem */ +static LIST_HEAD(deferred_pages); /* protected by init_mm.mmap_sem */ static inline void save_page(struct page *fpage) { - fpage->lru.next = (struct list_head *)deferred_pages; - deferred_pages = fpage; + list_add(&fpage->lru, &deferred_pages); } /* @@ -207,18 +213,18 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot) void global_flush_tlb(void) { - struct page *dpage; + struct page *pg, *next; + struct list_head l; down_read(&init_mm.mmap_sem); - dpage = xchg(&deferred_pages, NULL); + list_replace_init(&deferred_pages, &l); up_read(&init_mm.mmap_sem); - flush_map((dpage && !dpage->lru.next) ? (unsigned long)page_address(dpage) : 0); - while (dpage) { - struct page *tmp = dpage; - dpage = (struct page *)dpage->lru.next; - ClearPagePrivate(tmp); - __free_page(tmp); + flush_map(&l); + + list_for_each_entry_safe(pg, next, &l, lru) { + ClearPagePrivate(pg); + __free_page(pg); } } |