summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/Kconfig.cpu5
-rw-r--r--arch/x86/boot/compressed/mkpiggy.c7
-rw-r--r--arch/x86/ia32/ia32entry.S32
-rw-r--r--arch/x86/include/asm/acpi.h1
-rw-r--r--arch/x86/include/asm/ce4100.h6
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/frame.h6
-rw-r--r--arch/x86/include/asm/futex.h22
-rw-r--r--arch/x86/include/asm/kdebug.h1
-rw-r--r--arch/x86/include/asm/msr-index.h8
-rw-r--r--arch/x86/include/asm/nmi.h1
-rw-r--r--arch/x86/include/asm/perf_event_p4.h1
-rw-r--r--arch/x86/include/asm/processor.h4
-rw-r--r--arch/x86/include/asm/rwsem.h80
-rw-r--r--arch/x86/include/asm/smp.h17
-rw-r--r--arch/x86/include/asm/smpboot_hooks.h2
-rw-r--r--arch/x86/include/asm/system.h2
-rw-r--r--arch/x86/include/asm/unistd_32.h5
-rw-r--r--arch/x86/include/asm/unistd_64.h6
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h2
-rw-r--r--arch/x86/include/asm/xen/hypercall.h15
-rw-r--r--arch/x86/include/asm/xen/page.h47
-rw-r--r--arch/x86/include/asm/xen/pci.h8
-rw-r--r--arch/x86/kernel/acpi/boot.c14
-rw-r--r--arch/x86/kernel/apb_timer.c2
-rw-r--r--arch/x86/kernel/apic/apic.c39
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c1
-rw-r--r--arch/x86/kernel/asm-offsets.c65
-rw-r--r--arch/x86/kernel/asm-offsets_32.c69
-rw-r--r--arch/x86/kernel/asm-offsets_64.c90
-rw-r--r--arch/x86/kernel/check.c8
-rw-r--r--arch/x86/kernel/cpu/common.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c13
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event.c170
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c175
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c417
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c97
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c19
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c4
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c4
-rw-r--r--arch/x86/kernel/dumpstack.c25
-rw-r--r--arch/x86/kernel/early-quirks.c16
-rw-r--r--arch/x86/kernel/entry_32.S11
-rw-r--r--arch/x86/kernel/entry_64.S8
-rw-r--r--arch/x86/kernel/ftrace.c15
-rw-r--r--arch/x86/kernel/head_32.S8
-rw-r--r--arch/x86/kernel/ioport.c20
-rw-r--r--arch/x86/kernel/kgdb.c9
-rw-r--r--arch/x86/kernel/kprobes.c8
-rw-r--r--arch/x86/kernel/process.c9
-rw-r--r--arch/x86/kernel/reboot.c8
-rw-r--r--arch/x86/kernel/smpboot.c44
-rw-r--r--arch/x86/kernel/syscall_table_32.S3
-rw-r--r--arch/x86/kernel/vmlinux.lds.S3
-rw-r--r--arch/x86/kvm/svm.c2
-rw-r--r--arch/x86/kvm/trace.h8
-rw-r--r--arch/x86/lib/atomic64_386_32.S6
-rw-r--r--arch/x86/lib/atomic64_cx8_32.S6
-rw-r--r--arch/x86/lib/checksum_32.S63
-rw-r--r--arch/x86/lib/rwsem_64.S56
-rw-r--r--arch/x86/lib/semaphore_32.S38
-rw-r--r--arch/x86/lib/thunk_32.S18
-rw-r--r--arch/x86/lib/thunk_64.S27
-rw-r--r--arch/x86/mm/fault.c14
-rw-r--r--arch/x86/mm/init_64.c6
-rw-r--r--arch/x86/mm/numa_64.c6
-rw-r--r--arch/x86/mm/pageattr.c18
-rw-r--r--arch/x86/mm/pgtable.c11
-rw-r--r--arch/x86/pci/ce4100.c7
-rw-r--r--arch/x86/pci/xen.c159
-rw-r--r--arch/x86/platform/ce4100/ce4100.c2
-rw-r--r--arch/x86/platform/olpc/olpc_dt.c3
-rw-r--r--arch/x86/platform/uv/tlb_uv.c4
-rw-r--r--arch/x86/xen/Kconfig8
-rw-r--r--arch/x86/xen/enlighten.c8
-rw-r--r--arch/x86/xen/mmu.c82
-rw-r--r--arch/x86/xen/p2m.c330
-rw-r--r--arch/x86/xen/setup.c68
-rw-r--r--arch/x86/xen/smp.c38
-rw-r--r--arch/x86/xen/suspend.c8
-rw-r--r--arch/x86/xen/time.c4
-rw-r--r--arch/x86/xen/xen-head.S4
-rw-r--r--arch/x86/xen/xen-ops.h2
88 files changed, 1871 insertions, 814 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e7ef769eabf..a42660c7356 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -64,6 +64,8 @@ config X86
select HAVE_TEXT_POKE_SMP
select HAVE_GENERIC_HARDIRQS
select HAVE_SPARSE_IRQ
+ select GENERIC_FIND_FIRST_BIT
+ select GENERIC_FIND_NEXT_BIT
select GENERIC_IRQ_PROBE
select GENERIC_PENDING_IRQ if SMP
select GENERIC_IRQ_SHOW
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 283c5a6a03a..ed47e6e1747 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -294,11 +294,6 @@ config X86_GENERIC
endif
-config X86_CPU
- def_bool y
- select GENERIC_FIND_FIRST_BIT
- select GENERIC_FIND_NEXT_BIT
-
#
# Define implied options from the CPU selection here
config X86_INTERNODE_CACHE_SHIFT
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
index 646aa78ba5f..46a82388243 100644
--- a/arch/x86/boot/compressed/mkpiggy.c
+++ b/arch/x86/boot/compressed/mkpiggy.c
@@ -62,7 +62,12 @@ int main(int argc, char *argv[])
if (fseek(f, -4L, SEEK_END)) {
perror(argv[1]);
}
- fread(&olen, sizeof olen, 1, f);
+
+ if (fread(&olen, sizeof(olen), 1, f) != 1) {
+ perror(argv[1]);
+ return 1;
+ }
+
ilen = ftell(f);
olen = getle32(&olen);
fclose(f);
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 518bb99c339..430312ba6e3 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -25,6 +25,8 @@
#define sysretl_audit ia32_ret_from_sys_call
#endif
+ .section .entry.text, "ax"
+
#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
.macro IA32_ARG_FIXUP noebp=0
@@ -126,26 +128,20 @@ ENTRY(ia32_sysenter_target)
*/
ENABLE_INTERRUPTS(CLBR_NONE)
movl %ebp,%ebp /* zero extension */
- pushq $__USER32_DS
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi $__USER32_DS
/*CFI_REL_OFFSET ss,0*/
- pushq %rbp
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi %rbp
CFI_REL_OFFSET rsp,0
- pushfq
- CFI_ADJUST_CFA_OFFSET 8
+ pushfq_cfi
/*CFI_REL_OFFSET rflags,0*/
movl 8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d
CFI_REGISTER rip,r10
- pushq $__USER32_CS
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi $__USER32_CS
/*CFI_REL_OFFSET cs,0*/
movl %eax, %eax
- pushq %r10
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi %r10
CFI_REL_OFFSET rip,0
- pushq %rax
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi %rax
cld
SAVE_ARGS 0,0,1
/* no need to do an access_ok check here because rbp has been
@@ -182,11 +178,9 @@ sysexit_from_sys_call:
xorq %r9,%r9
xorq %r10,%r10
xorq %r11,%r11
- popfq
- CFI_ADJUST_CFA_OFFSET -8
+ popfq_cfi
/*CFI_RESTORE rflags*/
- popq %rcx /* User %esp */
- CFI_ADJUST_CFA_OFFSET -8
+ popq_cfi %rcx /* User %esp */
CFI_REGISTER rsp,rcx
TRACE_IRQS_ON
ENABLE_INTERRUPTS_SYSEXIT32
@@ -421,8 +415,7 @@ ENTRY(ia32_syscall)
*/
ENABLE_INTERRUPTS(CLBR_NONE)
movl %eax,%eax
- pushq %rax
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi %rax
cld
/* note the registers are not zero extended to the sf.
this could be a problem. */
@@ -851,4 +844,7 @@ ia32_sys_call_table:
.quad sys_fanotify_init
.quad sys32_fanotify_mark
.quad sys_prlimit64 /* 340 */
+ .quad sys_name_to_handle_at
+ .quad compat_sys_open_by_handle_at
+ .quad compat_sys_clock_adjtime
ia32_syscall_end:
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 211ca3f7fd1..4ea15ca89b2 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -88,6 +88,7 @@ extern int acpi_disabled;
extern int acpi_pci_disabled;
extern int acpi_skip_timer_override;
extern int acpi_use_timer_override;
+extern int acpi_fix_pin2_polarity;
extern u8 acpi_sci_flags;
extern int acpi_sci_override_gsi;
diff --git a/arch/x86/include/asm/ce4100.h b/arch/x86/include/asm/ce4100.h
new file mode 100644
index 00000000000..e656ad8c0a2
--- /dev/null
+++ b/arch/x86/include/asm/ce4100.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_CE4100_H_
+#define _ASM_CE4100_H_
+
+int ce4100_pci_init(void);
+
+#endif
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 220e2ea08e8..91f3e087cf2 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -160,6 +160,7 @@
#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */
#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
+#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */
/*
* Auxiliary flags: Linux defined - For features scattered in various
@@ -279,6 +280,7 @@ extern const char * const x86_power_flags[32];
#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE)
#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
+#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
# define cpu_has_invlpg 1
diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h
index 06850a7194e..2c6fc9e6281 100644
--- a/arch/x86/include/asm/frame.h
+++ b/arch/x86/include/asm/frame.h
@@ -7,14 +7,12 @@
frame pointer later */
#ifdef CONFIG_FRAME_POINTER
.macro FRAME
- pushl %ebp
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ebp
CFI_REL_OFFSET ebp,0
movl %esp,%ebp
.endm
.macro ENDFRAME
- popl %ebp
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ebp
CFI_RESTORE ebp
.endm
#else
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index 1f11ce44e95..d09bb03653f 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -37,7 +37,7 @@
"+m" (*uaddr), "=&r" (tem) \
: "r" (oparg), "i" (-EFAULT), "1" (0))
-static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
+static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -48,7 +48,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP)
@@ -109,9 +109,10 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
return ret;
}
-static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval,
- int newval)
+static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
+ int ret = 0;
#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP)
/* Real i386 machines have no cmpxchg instruction */
@@ -119,21 +120,22 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval,
return -ENOSYS;
#endif
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
- asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %3, %1\n"
+ asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n"
"2:\t.section .fixup, \"ax\"\n"
- "3:\tmov %2, %0\n"
+ "3:\tmov %3, %0\n"
"\tjmp 2b\n"
"\t.previous\n"
_ASM_EXTABLE(1b, 3b)
- : "=a" (oldval), "+m" (*uaddr)
- : "i" (-EFAULT), "r" (newval), "0" (oldval)
+ : "+r" (ret), "=a" (oldval), "+m" (*uaddr)
+ : "i" (-EFAULT), "r" (newval), "1" (oldval)
: "memory"
);
- return oldval;
+ *uval = oldval;
+ return ret;
}
#endif
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index ca242d35e87..518bbbb9ee5 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -13,7 +13,6 @@ enum die_val {
DIE_PANIC,
DIE_NMI,
DIE_DIE,
- DIE_NMIWATCHDOG,
DIE_KERNELDEBUG,
DIE_TRAP,
DIE_GPF,
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 4d0dfa0d998..823d4822340 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -36,6 +36,11 @@
#define MSR_IA32_PERFCTR1 0x000000c2
#define MSR_FSB_FREQ 0x000000cd
+#define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2
+#define NHM_C3_AUTO_DEMOTE (1UL << 25)
+#define NHM_C1_AUTO_DEMOTE (1UL << 26)
+#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25)
+
#define MSR_MTRRcap 0x000000fe
#define MSR_IA32_BBL_CR_CTL 0x00000119
@@ -47,6 +52,9 @@
#define MSR_IA32_MCG_STATUS 0x0000017a
#define MSR_IA32_MCG_CTL 0x0000017b
+#define MSR_OFFCORE_RSP_0 0x000001a6
+#define MSR_OFFCORE_RSP_1 0x000001a7
+
#define MSR_IA32_PEBS_ENABLE 0x000003f1
#define MSR_IA32_DS_AREA 0x00000600
#define MSR_IA32_PERF_CAPABILITIES 0x00000345
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index c76f5b92b84..07f46016d3f 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -7,7 +7,6 @@
#ifdef CONFIG_X86_LOCAL_APIC
-extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
extern int reserve_perfctr_nmi(unsigned int);
extern void release_perfctr_nmi(unsigned int);
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index e2f6a99f14a..cc29086e30c 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -22,6 +22,7 @@
#define ARCH_P4_CNTRVAL_BITS (40)
#define ARCH_P4_CNTRVAL_MASK ((1ULL << ARCH_P4_CNTRVAL_BITS) - 1)
+#define ARCH_P4_UNFLAGGED_BIT ((1ULL) << (ARCH_P4_CNTRVAL_BITS - 1))
#define P4_ESCR_EVENT_MASK 0x7e000000U
#define P4_ESCR_EVENT_SHIFT 25
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 45636cefa18..4c25ab48257 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -94,10 +94,6 @@ struct cpuinfo_x86 {
int x86_cache_alignment; /* In bytes */
int x86_power;
unsigned long loops_per_jiffy;
-#ifdef CONFIG_SMP
- /* cpus sharing the last level cache: */
- cpumask_var_t llc_shared_map;
-#endif
/* cpuid returned max cores value: */
u16 x86_max_cores;
u16 apicid;
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index d1e41b0f9b6..df4cd32b4cc 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -37,26 +37,9 @@
#endif
#ifdef __KERNEL__
-
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/lockdep.h>
#include <asm/asm.h>
-struct rwsem_waiter;
-
-extern asmregparm struct rw_semaphore *
- rwsem_down_read_failed(struct rw_semaphore *sem);
-extern asmregparm struct rw_semaphore *
- rwsem_down_write_failed(struct rw_semaphore *sem);
-extern asmregparm struct rw_semaphore *
- rwsem_wake(struct rw_semaphore *);
-extern asmregparm struct rw_semaphore *
- rwsem_downgrade_wake(struct rw_semaphore *sem);
-
/*
- * the semaphore definition
- *
* The bias values and the counter type limits the number of
* potential readers/writers to 32767 for 32 bits and 2147483647
* for 64 bits.
@@ -74,43 +57,6 @@ extern asmregparm struct rw_semaphore *
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-typedef signed long rwsem_count_t;
-
-struct rw_semaphore {
- rwsem_count_t count;
- spinlock_t wait_lock;
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-
-#define __RWSEM_INITIALIZER(name) \
-{ \
- RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
- LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) \
-}
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
-do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
-} while (0)
-
/*
* lock for reading
*/
@@ -133,7 +79,7 @@ static inline void __down_read(struct rw_semaphore *sem)
*/
static inline int __down_read_trylock(struct rw_semaphore *sem)
{
- rwsem_count_t result, tmp;
+ long result, tmp;
asm volatile("# beginning __down_read_trylock\n\t"
" mov %0,%1\n\t"
"1:\n\t"
@@ -155,7 +101,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
*/
static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
{
- rwsem_count_t tmp;
+ long tmp;
asm volatile("# beginning down_write\n\t"
LOCK_PREFIX " xadd %1,(%2)\n\t"
/* adds 0xffff0001, returns the old value */
@@ -180,9 +126,8 @@ static inline void __down_write(struct rw_semaphore *sem)
*/
static inline int __down_write_trylock(struct rw_semaphore *sem)
{
- rwsem_count_t ret = cmpxchg(&sem->count,
- RWSEM_UNLOCKED_VALUE,
- RWSEM_ACTIVE_WRITE_BIAS);
+ long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
+ RWSEM_ACTIVE_WRITE_BIAS);
if (ret == RWSEM_UNLOCKED_VALUE)
return 1;
return 0;
@@ -193,7 +138,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
*/
static inline void __up_read(struct rw_semaphore *sem)
{
- rwsem_count_t tmp;
+ long tmp;
asm volatile("# beginning __up_read\n\t"
LOCK_PREFIX " xadd %1,(%2)\n\t"
/* subtracts 1, returns the old value */
@@ -211,7 +156,7 @@ static inline void __up_read(struct rw_semaphore *sem)
*/
static inline void __up_write(struct rw_semaphore *sem)
{
- rwsem_count_t tmp;
+ long tmp;
asm volatile("# beginning __up_write\n\t"
LOCK_PREFIX " xadd %1,(%2)\n\t"
/* subtracts 0xffff0001, returns the old value */
@@ -247,8 +192,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
/*
* implement atomic add functionality
*/
-static inline void rwsem_atomic_add(rwsem_count_t delta,
- struct rw_semaphore *sem)
+static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
{
asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0"
: "+m" (sem->count)
@@ -258,10 +202,9 @@ static inline void rwsem_atomic_add(rwsem_count_t delta,
/*
* implement exchange and add functionality
*/
-static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta,
- struct rw_semaphore *sem)
+static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
{
- rwsem_count_t tmp = delta;
+ long tmp = delta;
asm volatile(LOCK_PREFIX "xadd %0,%1"
: "+r" (tmp), "+m" (sem->count)
@@ -270,10 +213,5 @@ static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta,
return tmp + delta;
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* __KERNEL__ */
#endif /* _ASM_X86_RWSEM_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 1f469513677..99fa8b47381 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -17,12 +17,24 @@
#endif
#include <asm/thread_info.h>
#include <asm/cpumask.h>
+#include <asm/cpufeature.h>
extern int smp_num_siblings;
extern unsigned int num_processors;
+static inline bool cpu_has_ht_siblings(void)
+{
+ bool has_siblings = false;
+#ifdef CONFIG_SMP
+ has_siblings = cpu_has_ht && smp_num_siblings > 1;
+#endif
+ return has_siblings;
+}
+
DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
+/* cpus sharing the last level cache: */
+DECLARE_PER_CPU(cpumask_var_t, cpu_llc_shared_map);
DECLARE_PER_CPU(u16, cpu_llc_id);
DECLARE_PER_CPU(int, cpu_number);
@@ -36,6 +48,11 @@ static inline struct cpumask *cpu_core_mask(int cpu)
return per_cpu(cpu_core_map, cpu);
}
+static inline struct cpumask *cpu_llc_shared_mask(int cpu)
+{
+ return per_cpu(cpu_llc_shared_map, cpu);
+}
+
DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h
index 6c22bf353f2..725b7783199 100644
--- a/arch/x86/include/asm/smpboot_hooks.h
+++ b/arch/x86/include/asm/smpboot_hooks.h
@@ -34,7 +34,7 @@ static inline void smpboot_restore_warm_reset_vector(void)
*/
CMOS_WRITE(0, 0xf);
- *((volatile long *)phys_to_virt(apic->trampoline_phys_low)) = 0;
+ *((volatile u32 *)phys_to_virt(apic->trampoline_phys_low)) = 0;
}
static inline void __init smpboot_setup_io_apic(void)
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 33ecc3ea878..12569e691ce 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -98,8 +98,6 @@ do { \
*/
#define HAVE_DISABLE_HLT
#else
-#define __SAVE(reg, offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t"
-#define __RESTORE(reg, offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t"
/* frame pointer must be last for get_wchan */
#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index b766a5e8ba0..ffaf183c619 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -346,10 +346,13 @@
#define __NR_fanotify_init 338
#define __NR_fanotify_mark 339
#define __NR_prlimit64 340
+#define __NR_name_to_handle_at 341
+#define __NR_open_by_handle_at 342
+#define __NR_clock_adjtime 343
#ifdef __KERNEL__
-#define NR_syscalls 341
+#define NR_syscalls 344
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 363e9b8a715..5466bea670e 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -669,6 +669,12 @@ __SYSCALL(__NR_fanotify_init, sys_fanotify_init)
__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
#define __NR_prlimit64 302
__SYSCALL(__NR_prlimit64, sys_prlimit64)
+#define __NR_name_to_handle_at 303
+__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
+#define __NR_open_by_handle_at 304
+__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
+#define __NR_clock_adjtime 305
+__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime)
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index ce1d54c8a43..3e094af443c 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -176,7 +176,7 @@ struct bau_msg_payload {
struct bau_msg_header {
unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */
/* bits 5:0 */
- unsigned int base_dest_nodeid:15; /* nasid (pnode<<1) of */
+ unsigned int base_dest_nodeid:15; /* nasid of the */
/* bits 20:6 */ /* first bit in uvhub map */
unsigned int command:8; /* message type */
/* bits 28:21 */
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index a3c28ae4025..8508bfe5229 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -287,7 +287,7 @@ HYPERVISOR_fpu_taskswitch(int set)
static inline int
HYPERVISOR_sched_op(int cmd, void *arg)
{
- return _hypercall2(int, sched_op_new, cmd, arg);
+ return _hypercall2(int, sched_op, cmd, arg);
}
static inline long
@@ -422,10 +422,17 @@ HYPERVISOR_set_segment_base(int reg, unsigned long value)
#endif
static inline int
-HYPERVISOR_suspend(unsigned long srec)
+HYPERVISOR_suspend(unsigned long start_info_mfn)
{
- return _hypercall3(int, sched_op, SCHEDOP_shutdown,
- SHUTDOWN_suspend, srec);
+ struct sched_shutdown r = { .reason = SHUTDOWN_suspend };
+
+ /*
+ * For a PV guest the tools require that the start_info mfn be
+ * present in rdx/edx when the hypercall is made. Per the
+ * hypercall calling convention this is the third hypercall
+ * argument, which is start_info_mfn here.
+ */
+ return _hypercall3(int, sched_op, SCHEDOP_shutdown, &r, start_info_mfn);
}
static inline int
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index f25bdf238a3..c61934fbf22 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -29,8 +29,10 @@ typedef struct xpaddr {
/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
#define INVALID_P2M_ENTRY (~0UL)
-#define FOREIGN_FRAME_BIT (1UL<<31)
+#define FOREIGN_FRAME_BIT (1UL<<(BITS_PER_LONG-1))
+#define IDENTITY_FRAME_BIT (1UL<<(BITS_PER_LONG-2))
#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT)
+#define IDENTITY_FRAME(m) ((m) | IDENTITY_FRAME_BIT)
/* Maximum amount of memory we can handle in a domain in pages */
#define MAX_DOMAIN_PAGES \
@@ -41,12 +43,18 @@ extern unsigned int machine_to_phys_order;
extern unsigned long get_phys_to_machine(unsigned long pfn);
extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
+extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
+extern unsigned long set_phys_range_identity(unsigned long pfn_s,
+ unsigned long pfn_e);
extern int m2p_add_override(unsigned long mfn, struct page *page);
extern int m2p_remove_override(struct page *page);
extern struct page *m2p_find_override(unsigned long mfn);
extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
+#ifdef CONFIG_XEN_DEBUG_FS
+extern int p2m_dump_show(struct seq_file *m, void *v);
+#endif
static inline unsigned long pfn_to_mfn(unsigned long pfn)
{
unsigned long mfn;
@@ -57,7 +65,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn)
mfn = get_phys_to_machine(pfn);
if (mfn != INVALID_P2M_ENTRY)
- mfn &= ~FOREIGN_FRAME_BIT;
+ mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
return mfn;
}
@@ -73,25 +81,44 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn)
static inline unsigned long mfn_to_pfn(unsigned long mfn)
{
unsigned long pfn;
+ int ret = 0;
if (xen_feature(XENFEAT_auto_translated_physmap))
return mfn;
+ if (unlikely((mfn >> machine_to_phys_order) != 0)) {
+ pfn = ~0;
+ goto try_override;
+ }
pfn = 0;
/*
* The array access can fail (e.g., device space beyond end of RAM).
* In such cases it doesn't matter what we return (we return garbage),
* but we must handle the fault without crashing!
*/
- __get_user(pfn, &machine_to_phys_mapping[mfn]);
-
- /*
- * If this appears to be a foreign mfn (because the pfn
- * doesn't map back to the mfn), then check the local override
- * table to see if there's a better pfn to use.
+ ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
+try_override:
+ /* ret might be < 0 if there are no entries in the m2p for mfn */
+ if (ret < 0)
+ pfn = ~0;
+ else if (get_phys_to_machine(pfn) != mfn)
+ /*
+ * If this appears to be a foreign mfn (because the pfn
+ * doesn't map back to the mfn), then check the local override
+ * table to see if there's a better pfn to use.
+ *
+ * m2p_find_override_pfn returns ~0 if it doesn't find anything.
+ */
+ pfn = m2p_find_override_pfn(mfn, ~0);
+
+ /*
+ * pfn is ~0 if there are no entries in the m2p for mfn or if the
+ * entry doesn't map back to the mfn and m2p_override doesn't have a
+ * valid entry for it.
*/
- if (get_phys_to_machine(pfn) != mfn)
- pfn = m2p_find_override_pfn(mfn, pfn);
+ if (pfn == ~0 &&
+ get_phys_to_machine(mfn) == IDENTITY_FRAME(mfn))
+ pfn = mfn;
return pfn;
}
diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
index 2329b3eaf8d..aa862098916 100644
--- a/arch/x86/include/asm/xen/pci.h
+++ b/arch/x86/include/asm/xen/pci.h
@@ -27,16 +27,16 @@ static inline void __init xen_setup_pirqs(void)
* its own functions.
*/
struct xen_pci_frontend_ops {
- int (*enable_msi)(struct pci_dev *dev, int **vectors);
+ int (*enable_msi)(struct pci_dev *dev, int vectors[]);
void (*disable_msi)(struct pci_dev *dev);
- int (*enable_msix)(struct pci_dev *dev, int **vectors, int nvec);
+ int (*enable_msix)(struct pci_dev *dev, int vectors[], int nvec);
void (*disable_msix)(struct pci_dev *dev);
};
extern struct xen_pci_frontend_ops *xen_pci_frontend;
static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev,
- int **vectors)
+ int vectors[])
{
if (xen_pci_frontend && xen_pci_frontend->enable_msi)
return xen_pci_frontend->enable_msi(dev, vectors);
@@ -48,7 +48,7 @@ static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev)
xen_pci_frontend->disable_msi(dev);
}
static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev,
- int **vectors, int nvec)
+ int vectors[], int nvec)
{
if (xen_pci_frontend && xen_pci_frontend->enable_msix)
return xen_pci_frontend->enable_msix(dev, vectors, nvec);
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index b3a71137983..3e6e2d68f76 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -72,6 +72,7 @@ u8 acpi_sci_flags __initdata;
int acpi_sci_override_gsi __initdata;
int acpi_skip_timer_override __initdata;
int acpi_use_timer_override __initdata;
+int acpi_fix_pin2_polarity __initdata;
#ifdef CONFIG_X86_LOCAL_APIC
static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
@@ -415,10 +416,15 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
return 0;
}
- if (acpi_skip_timer_override &&
- intsrc->source_irq == 0 && intsrc->global_irq == 2) {
- printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
- return 0;
+ if (intsrc->source_irq == 0 && intsrc->global_irq == 2) {
+ if (acpi_skip_timer_override) {
+ printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
+ return 0;
+ }
+ if (acpi_fix_pin2_polarity && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) {
+ intsrc->inti_flags &= ~ACPI_MADT_POLARITY_MASK;
+ printk(PREFIX "BIOS IRQ0 pin2 override: forcing polarity to high active.\n");
+ }
}
mp_override_legacy_irq(intsrc->source_irq,
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index 51ef31a89be..51d4e166306 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -284,7 +284,7 @@ static int __init apbt_clockevent_register(void)
memcpy(&adev->evt, &apbt_clockevent, sizeof(struct clock_event_device));
if (mrst_timer_options == MRST_TIMER_LAPIC_APBT) {
- apbt_clockevent.rating = APBT_CLOCKEVENT_RATING - 100;
+ adev->evt.rating = APBT_CLOCKEVENT_RATING - 100;
global_clock_event = &adev->evt;
printk(KERN_DEBUG "%s clockevent registered as global\n",
global_clock_event->name);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index f0e079823c4..48dcd2e83b4 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1931,17 +1931,6 @@ void __cpuinit generic_processor_info(int apicid, int version)
{
int cpu;
- /*
- * Validate version
- */
- if (version == 0x0) {
- pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
- "fixing up to 0x10. (tell your hw vendor)\n",
- version);
- version = 0x10;
- }
- apic_version[apicid] = version;
-
if (num_processors >= nr_cpu_ids) {
int max = nr_cpu_ids;
int thiscpu = max + disabled_cpus;
@@ -1955,22 +1944,34 @@ void __cpuinit generic_processor_info(int apicid, int version)
}
num_processors++;
- cpu = cpumask_next_zero(-1, cpu_present_mask);
-
- if (version != apic_version[boot_cpu_physical_apicid])
- WARN_ONCE(1,
- "ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n",
- apic_version[boot_cpu_physical_apicid], cpu, version);
-
- physid_set(apicid, phys_cpu_present_map);
if (apicid == boot_cpu_physical_apicid) {
/*
* x86_bios_cpu_apicid is required to have processors listed
* in same order as logical cpu numbers. Hence the first
* entry is BSP, and so on.
+ * boot_cpu_init() already hold bit 0 in cpu_present_mask
+ * for BSP.
*/
cpu = 0;
+ } else
+ cpu = cpumask_next_zero(-1, cpu_present_mask);
+
+ /*
+ * Validate version
+ */
+ if (version == 0x0) {
+ pr_warning("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
+ cpu, apicid);
+ version = 0x10;
}
+ apic_version[apicid] = version;
+
+ if (version != apic_version[boot_cpu_physical_apicid]) {
+ pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
+ apic_version[boot_cpu_physical_apicid], cpu, version);
+ }
+
+ physid_set(apicid, phys_cpu_present_map);
if (apicid > max_physical_apicid)
max_physical_apicid = apicid;
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 79fd43ca6f9..c4e557a1ebb 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -83,7 +83,6 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
arch_spin_lock(&lock);
printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
show_regs(regs);
- dump_stack();
arch_spin_unlock(&lock);
cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
return NOTIFY_STOP;
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index cfa82c899f4..4f13fafc526 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -1,5 +1,70 @@
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
+#define COMPILE_OFFSETS
+
+#include <linux/crypto.h>
+#include <linux/sched.h>
+#include <linux/stddef.h>
+#include <linux/hardirq.h>
+#include <linux/suspend.h>
+#include <linux/kbuild.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+#include <asm/sigframe.h>
+#include <asm/bootparam.h>
+#include <asm/suspend.h>
+
+#ifdef CONFIG_XEN
+#include <xen/interface/xen.h>
+#endif
+
#ifdef CONFIG_X86_32
# include "asm-offsets_32.c"
#else
# include "asm-offsets_64.c"
#endif
+
+void common(void) {
+ BLANK();
+ OFFSET(TI_flags, thread_info, flags);
+ OFFSET(TI_status, thread_info, status);
+ OFFSET(TI_addr_limit, thread_info, addr_limit);
+ OFFSET(TI_preempt_count, thread_info, preempt_count);
+
+ BLANK();
+ OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
+
+ BLANK();
+ OFFSET(pbe_address, pbe, address);
+ OFFSET(pbe_orig_address, pbe, orig_address);
+ OFFSET(pbe_next, pbe, next);
+
+#ifdef CONFIG_PARAVIRT
+ BLANK();
+ OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
+ OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
+ OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
+ OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
+ OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
+ OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
+ OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
+ OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
+ OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
+#endif
+
+#ifdef CONFIG_XEN
+ BLANK();
+ OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
+ OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
+#endif
+
+ BLANK();
+ OFFSET(BP_scratch, boot_params, scratch);
+ OFFSET(BP_loadflags, boot_params, hdr.loadflags);
+ OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
+ OFFSET(BP_version, boot_params, hdr.version);
+ OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
+}
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 1a4088dda37..c29d631af6f 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -1,26 +1,4 @@
-/*
- * Generate definitions needed by assembly language modules.
- * This code generates raw asm output which is post-processed
- * to extract and format the required data.
- */
-
-#include <linux/crypto.h>
-#include <linux/sched.h>
-#include <linux/signal.h>
-#include <linux/personality.h>
-#include <linux/suspend.h>
-#include <linux/kbuild.h>
#include <asm/ucontext.h>
-#include <asm/sigframe.h>
-#include <asm/pgtable.h>
-#include <asm/fixmap.h>
-#include <asm/processor.h>
-#include <asm/thread_info.h>
-#include <asm/bootparam.h>
-#include <asm/elf.h>
-#include <asm/suspend.h>
-
-#include <xen/interface/xen.h>
#include <linux/lguest.h>
#include "../../../drivers/lguest/lg.h"
@@ -51,21 +29,10 @@ void foo(void)
OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
BLANK();
- OFFSET(TI_task, thread_info, task);
- OFFSET(TI_exec_domain, thread_info, exec_domain);
- OFFSET(TI_flags, thread_info, flags);
- OFFSET(TI_status, thread_info, status);
- OFFSET(TI_preempt_count, thread_info, preempt_count);
- OFFSET(TI_addr_limit, thread_info, addr_limit);
- OFFSET(TI_restart_block, thread_info, restart_block);
OFFSET(TI_sysenter_return, thread_info, sysenter_return);
OFFSET(TI_cpu, thread_info, cpu);
BLANK();
- OFFSET(GDS_size, desc_ptr, size);
- OFFSET(GDS_address, desc_ptr, address);
- BLANK();
-
OFFSET(PT_EBX, pt_regs, bx);
OFFSET(PT_ECX, pt_regs, cx);
OFFSET(PT_EDX, pt_regs, dx);
@@ -85,42 +52,13 @@ void foo(void)
OFFSET(PT_OLDSS, pt_regs, ss);
BLANK();
- OFFSET(EXEC_DOMAIN_handler, exec_domain, handler);
OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
BLANK();
- OFFSET(pbe_address, pbe, address);
- OFFSET(pbe_orig_address, pbe, orig_address);
- OFFSET(pbe_next, pbe, next);
-
/* Offset from the sysenter stack to tss.sp0 */
DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
sizeof(struct tss_struct));
- DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
- DEFINE(PAGE_SHIFT_asm, PAGE_SHIFT);
- DEFINE(THREAD_SIZE_asm, THREAD_SIZE);
-
- OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
-
-#ifdef CONFIG_PARAVIRT
- BLANK();
- OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
- OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
- OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
- OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
- OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
- OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
- OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
- OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
-#endif
-
-#ifdef CONFIG_XEN
- BLANK();
- OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
- OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
-#endif
-
#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
BLANK();
OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
@@ -139,11 +77,4 @@ void foo(void)
OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode);
OFFSET(LGUEST_PAGES_regs, lguest_pages, regs);
#endif
-
- BLANK();
- OFFSET(BP_scratch, boot_params, scratch);
- OFFSET(BP_loadflags, boot_params, hdr.loadflags);
- OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
- OFFSET(BP_version, boot_params, hdr.version);
- OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
}
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 4a6aeedcd96..e72a1194af2 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -1,27 +1,4 @@
-/*
- * Generate definitions needed by assembly language modules.
- * This code generates raw asm output which is post-processed to extract
- * and format the required data.
- */
-#define COMPILE_OFFSETS
-
-#include <linux/crypto.h>
-#include <linux/sched.h>
-#include <linux/stddef.h>
-#include <linux/errno.h>
-#include <linux/hardirq.h>
-#include <linux/suspend.h>
-#include <linux/kbuild.h>
-#include <asm/processor.h>
-#include <asm/segment.h>
-#include <asm/thread_info.h>
#include <asm/ia32.h>
-#include <asm/bootparam.h>
-#include <asm/suspend.h>
-
-#include <xen/interface/xen.h>
-
-#include <asm/sigframe.h>
#define __NO_STUBS 1
#undef __SYSCALL
@@ -33,41 +10,19 @@ static char syscalls[] = {
int main(void)
{
-#define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry))
- ENTRY(state);
- ENTRY(flags);
- ENTRY(pid);
- BLANK();
-#undef ENTRY
-#define ENTRY(entry) DEFINE(TI_ ## entry, offsetof(struct thread_info, entry))
- ENTRY(flags);
- ENTRY(addr_limit);
- ENTRY(preempt_count);
- ENTRY(status);
-#ifdef CONFIG_IA32_EMULATION
- ENTRY(sysenter_return);
-#endif
- BLANK();
-#undef ENTRY
#ifdef CONFIG_PARAVIRT
- BLANK();
- OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
- OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
- OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
- OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
- OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
- OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32);
OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
- OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
- OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
+ BLANK();
#endif
-
#ifdef CONFIG_IA32_EMULATION
-#define ENTRY(entry) DEFINE(IA32_SIGCONTEXT_ ## entry, offsetof(struct sigcontext_ia32, entry))
+ OFFSET(TI_sysenter_return, thread_info, sysenter_return);
+ BLANK();
+
+#define ENTRY(entry) OFFSET(IA32_SIGCONTEXT_ ## entry, sigcontext_ia32, entry)
ENTRY(ax);
ENTRY(bx);
ENTRY(cx);
@@ -79,15 +34,12 @@ int main(void)
ENTRY(ip);
BLANK();
#undef ENTRY
- DEFINE(IA32_RT_SIGFRAME_sigcontext,
- offsetof (struct rt_sigframe_ia32, uc.uc_mcontext));
+
+ OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
BLANK();
#endif
- DEFINE(pbe_address, offsetof(struct pbe, address));
- DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
- DEFINE(pbe_next, offsetof(struct pbe, next));
- BLANK();
-#define ENTRY(entry) DEFINE(pt_regs_ ## entry, offsetof(struct pt_regs, entry))
+
+#define ENTRY(entry) OFFSET(pt_regs_ ## entry, pt_regs, entry)
ENTRY(bx);
ENTRY(bx);
ENTRY(cx);
@@ -107,7 +59,8 @@ int main(void)
ENTRY(flags);
BLANK();
#undef ENTRY
-#define ENTRY(entry) DEFINE(saved_context_ ## entry, offsetof(struct saved_context, entry))
+
+#define ENTRY(entry) OFFSET(saved_context_ ## entry, saved_context, entry)
ENTRY(cr0);
ENTRY(cr2);
ENTRY(cr3);
@@ -115,26 +68,11 @@ int main(void)
ENTRY(cr8);
BLANK();
#undef ENTRY
- DEFINE(TSS_ist, offsetof(struct tss_struct, x86_tss.ist));
- BLANK();
- DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx));
- BLANK();
- DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
+ OFFSET(TSS_ist, tss_struct, x86_tss.ist);
BLANK();
- OFFSET(BP_scratch, boot_params, scratch);
- OFFSET(BP_loadflags, boot_params, hdr.loadflags);
- OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
- OFFSET(BP_version, boot_params, hdr.version);
- OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
- BLANK();
- DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
-#ifdef CONFIG_XEN
- BLANK();
- OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
- OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
-#undef ENTRY
-#endif
+ DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
+
return 0;
}
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index 13a38917951..452932d3473 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -106,8 +106,8 @@ void __init setup_bios_corruption_check(void)
addr += size;
}
- printk(KERN_INFO "Scanning %d areas for low memory corruption\n",
- num_scan_areas);
+ if (num_scan_areas)
+ printk(KERN_INFO "Scanning %d areas for low memory corruption\n", num_scan_areas);
}
@@ -143,12 +143,12 @@ static void check_corruption(struct work_struct *dummy)
{
check_for_bios_corruption();
schedule_delayed_work(&bios_check_work,
- round_jiffies_relative(corruption_check_period*HZ));
+ round_jiffies_relative(corruption_check_period*HZ));
}
static int start_periodic_check_for_corruption(void)
{
- if (!memory_corruption_check || corruption_check_period == 0)
+ if (!num_scan_areas || !memory_corruption_check || corruption_check_period == 0)
return 0;
printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n",
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 1d59834396b..5d98c46f876 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -675,7 +675,7 @@ void __init early_cpu_init(void)
const struct cpu_dev *const *cdev;
int count = 0;
-#ifdef PROCESSOR_SELECT
+#ifdef CONFIG_PROCESSOR_SELECT
printk(KERN_INFO "KERNEL supported cpus:\n");
#endif
@@ -687,7 +687,7 @@ void __init early_cpu_init(void)
cpu_devs[count] = cpudev;
count++;
-#ifdef PROCESSOR_SELECT
+#ifdef CONFIG_PROCESSOR_SELECT
{
unsigned int j;
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index bd1cac747f6..52c93648e49 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -158,9 +158,9 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
{
if (c->x86 == 0x06) {
if (cpu_has(c, X86_FEATURE_EST))
- printk(KERN_WARNING PFX "Warning: EST-capable CPU "
- "detected. The acpi-cpufreq module offers "
- "voltage scaling in addition of frequency "
+ printk_once(KERN_WARNING PFX "Warning: EST-capable "
+ "CPU detected. The acpi-cpufreq module offers "
+ "voltage scaling in addition to frequency "
"scaling. You should use that instead of "
"p4-clockmod, if possible.\n");
switch (c->x86_model) {
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
index 4f6f679f279..4a5a42b842a 100644
--- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
@@ -195,7 +195,7 @@ static unsigned int pcc_get_freq(unsigned int cpu)
cmd_incomplete:
iowrite16(0, &pcch_hdr->status);
spin_unlock(&pcc_lock);
- return -EINVAL;
+ return 0;
}
static int pcc_cpufreq_target(struct cpufreq_policy *policy,
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 35c7e65e59b..c567dec854f 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1537,6 +1537,7 @@ static struct notifier_block cpb_nb = {
static int __cpuinit powernowk8_init(void)
{
unsigned int i, supported_cpus = 0, cpu;
+ int rv;
for_each_online_cpu(i) {
int rc;
@@ -1555,14 +1556,14 @@ static int __cpuinit powernowk8_init(void)
cpb_capable = true;
- register_cpu_notifier(&cpb_nb);
-
msrs = msrs_alloc();
if (!msrs) {
printk(KERN_ERR "%s: Error allocating msrs!\n", __func__);
return -ENOMEM;
}
+ register_cpu_notifier(&cpb_nb);
+
rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
for_each_cpu(cpu, cpu_online_mask) {
@@ -1574,7 +1575,13 @@ static int __cpuinit powernowk8_init(void)
(cpb_enabled ? "on" : "off"));
}
- return cpufreq_register_driver(&cpufreq_amd64_driver);
+ rv = cpufreq_register_driver(&cpufreq_amd64_driver);
+ if (rv < 0 && boot_cpu_has(X86_FEATURE_CPB)) {
+ unregister_cpu_notifier(&cpb_nb);
+ msrs_free(msrs);
+ msrs = NULL;
+ }
+ return rv;
}
/* driver entry point for term */
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index ec2c19a7b8e..5419a263ebd 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -732,11 +732,11 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
struct cpuinfo_x86 *c = &cpu_data(cpu);
if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
- for_each_cpu(i, c->llc_shared_map) {
+ for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
if (!per_cpu(ici_cpuid4_info, i))
continue;
this_leaf = CPUID4_INFO_IDX(i, index);
- for_each_cpu(sibling, c->llc_shared_map) {
+ for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
if (!cpu_online(sibling))
continue;
set_bit(sibling, this_leaf->shared_cpu_map);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 5bf2fac52ac..167f97b5596 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -527,15 +527,12 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
int i, err = 0;
struct threshold_bank *b = NULL;
char name[32];
-#ifdef CONFIG_SMP
- struct cpuinfo_x86 *c = &cpu_data(cpu);
-#endif
sprintf(name, "threshold_bank%i", bank);
#ifdef CONFIG_SMP
if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
- i = cpumask_first(c->llc_shared_map);
+ i = cpumask_first(cpu_llc_shared_mask(cpu));
/* first core not up yet */
if (cpu_data(i).cpu_core_id)
@@ -555,7 +552,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
if (err)
goto out;
- cpumask_copy(b->cpus, c->llc_shared_map);
+ cpumask_copy(b->cpus, cpu_llc_shared_mask(cpu));
per_cpu(threshold_banks, cpu)[bank] = b;
goto out;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 9d977a2ea69..26604188aa4 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -30,6 +30,7 @@
#include <asm/stacktrace.h>
#include <asm/nmi.h>
#include <asm/compat.h>
+#include <asm/smp.h>
#if 0
#undef wrmsrl
@@ -93,6 +94,8 @@ struct amd_nb {
struct event_constraint event_constraints[X86_PMC_IDX_MAX];
};
+struct intel_percore;
+
#define MAX_LBR_ENTRIES 16
struct cpu_hw_events {
@@ -128,6 +131,13 @@ struct cpu_hw_events {
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
/*
+ * Intel percore register state.
+ * Coordinate shared resources between HT threads.
+ */
+ int percore_used; /* Used by this CPU? */
+ struct intel_percore *per_core;
+
+ /*
* AMD specific bits
*/
struct amd_nb *amd_nb;
@@ -166,8 +176,10 @@ struct cpu_hw_events {
/*
* Constraint on the Event code + UMask
*/
-#define PEBS_EVENT_CONSTRAINT(c, n) \
+#define INTEL_UEVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
+#define PEBS_EVENT_CONSTRAINT(c, n) \
+ INTEL_UEVENT_CONSTRAINT(c, n)
#define EVENT_CONSTRAINT_END \
EVENT_CONSTRAINT(0, 0, 0)
@@ -175,6 +187,28 @@ struct cpu_hw_events {
#define for_each_event_constraint(e, c) \
for ((e) = (c); (e)->weight; (e)++)
+/*
+ * Extra registers for specific events.
+ * Some events need large masks and require external MSRs.
+ * Define a mapping to these extra registers.
+ */
+struct extra_reg {
+ unsigned int event;
+ unsigned int msr;
+ u64 config_mask;
+ u64 valid_mask;
+};
+
+#define EVENT_EXTRA_REG(e, ms, m, vm) { \
+ .event = (e), \
+ .msr = (ms), \
+ .config_mask = (m), \
+ .valid_mask = (vm), \
+ }
+#define INTEL_EVENT_EXTRA_REG(event, msr, vm) \
+ EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm)
+#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0)
+
union perf_capabilities {
struct {
u64 lbr_format : 6;
@@ -219,6 +253,7 @@ struct x86_pmu {
void (*put_event_constraints)(struct cpu_hw_events *cpuc,
struct perf_event *event);
struct event_constraint *event_constraints;
+ struct event_constraint *percore_constraints;
void (*quirks)(void);
int perfctr_second_write;
@@ -247,6 +282,11 @@ struct x86_pmu {
*/
unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
int lbr_nr; /* hardware stack size */
+
+ /*
+ * Extra registers for events
+ */
+ struct extra_reg *extra_regs;
};
static struct x86_pmu x86_pmu __read_mostly;
@@ -271,6 +311,10 @@ static u64 __read_mostly hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
+static u64 __read_mostly hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX];
/*
* Propagate event elapsed time into the generic event.
@@ -298,7 +342,7 @@ x86_perf_event_update(struct perf_event *event)
*/
again:
prev_raw_count = local64_read(&hwc->prev_count);
- rdmsrl(hwc->event_base + idx, new_raw_count);
+ rdmsrl(hwc->event_base, new_raw_count);
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
@@ -321,6 +365,49 @@ again:
return new_raw_count;
}
+/* using X86_FEATURE_PERFCTR_CORE to later implement ALTERNATIVE() here */
+static inline int x86_pmu_addr_offset(int index)
+{
+ if (boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
+ return index << 1;
+ return index;
+}
+
+static inline unsigned int x86_pmu_config_addr(int index)
+{
+ return x86_pmu.eventsel + x86_pmu_addr_offset(index);
+}
+
+static inline unsigned int x86_pmu_event_addr(int index)
+{
+ return x86_pmu.perfctr + x86_pmu_addr_offset(index);
+}
+
+/*
+ * Find and validate any extra registers to set up.
+ */
+static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
+{
+ struct extra_reg *er;
+
+ event->hw.extra_reg = 0;
+ event->hw.extra_config = 0;
+
+ if (!x86_pmu.extra_regs)
+ return 0;
+
+ for (er = x86_pmu.extra_regs; er->msr; er++) {
+ if (er->event != (config & er->config_mask))
+ continue;
+ if (event->attr.config1 & ~er->valid_mask)
+ return -EINVAL;
+ event->hw.extra_reg = er->msr;
+ event->hw.extra_config = event->attr.config1;
+ break;
+ }
+ return 0;
+}
+
static atomic_t active_events;
static DEFINE_MUTEX(pmc_reserve_mutex);
@@ -331,12 +418,12 @@ static bool reserve_pmc_hardware(void)
int i;
for (i = 0; i < x86_pmu.num_counters; i++) {
- if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
+ if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
goto perfctr_fail;
}
for (i = 0; i < x86_pmu.num_counters; i++) {
- if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
+ if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
goto eventsel_fail;
}
@@ -344,13 +431,13 @@ static bool reserve_pmc_hardware(void)
eventsel_fail:
for (i--; i >= 0; i--)
- release_evntsel_nmi(x86_pmu.eventsel + i);
+ release_evntsel_nmi(x86_pmu_config_addr(i));
i = x86_pmu.num_counters;
perfctr_fail:
for (i--; i >= 0; i--)
- release_perfctr_nmi(x86_pmu.perfctr + i);
+ release_perfctr_nmi(x86_pmu_event_addr(i));
return false;
}
@@ -360,8 +447,8 @@ static void release_pmc_hardware(void)
int i;
for (i = 0; i < x86_pmu.num_counters; i++) {
- release_perfctr_nmi(x86_pmu.perfctr + i);
- release_evntsel_nmi(x86_pmu.eventsel + i);
+ release_perfctr_nmi(x86_pmu_event_addr(i));
+ release_evntsel_nmi(x86_pmu_config_addr(i));
}
}
@@ -382,7 +469,7 @@ static bool check_hw_exists(void)
* complain and bail.
*/
for (i = 0; i < x86_pmu.num_counters; i++) {
- reg = x86_pmu.eventsel + i;
+ reg = x86_pmu_config_addr(i);
ret = rdmsrl_safe(reg, &val);
if (ret)
goto msr_fail;
@@ -407,8 +494,8 @@ static bool check_hw_exists(void)
* that don't trap on the MSR access and always return 0s.
*/
val = 0xabcdUL;
- ret = checking_wrmsrl(x86_pmu.perfctr, val);
- ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
+ ret = checking_wrmsrl(x86_pmu_event_addr(0), val);
+ ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new);
if (ret || val != val_new)
goto msr_fail;
@@ -442,8 +529,9 @@ static inline int x86_pmu_initialized(void)
}
static inline int
-set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
+set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
{
+ struct perf_event_attr *attr = &event->attr;
unsigned int cache_type, cache_op, cache_result;
u64 config, val;
@@ -470,8 +558,8 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
return -EINVAL;
hwc->config |= val;
-
- return 0;
+ attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];
+ return x86_pmu_extra_regs(val, event);
}
static int x86_setup_perfctr(struct perf_event *event)
@@ -496,10 +584,10 @@ static int x86_setup_perfctr(struct perf_event *event)
}
if (attr->type == PERF_TYPE_RAW)
- return 0;
+ return x86_pmu_extra_regs(event->attr.config, event);
if (attr->type == PERF_TYPE_HW_CACHE)
- return set_ext_hw_attr(hwc, attr);
+ return set_ext_hw_attr(hwc, event);
if (attr->config >= x86_pmu.max_events)
return -EINVAL;
@@ -617,11 +705,11 @@ static void x86_pmu_disable_all(void)
if (!test_bit(idx, cpuc->active_mask))
continue;
- rdmsrl(x86_pmu.eventsel + idx, val);
+ rdmsrl(x86_pmu_config_addr(idx), val);
if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
continue;
val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
- wrmsrl(x86_pmu.eventsel + idx, val);
+ wrmsrl(x86_pmu_config_addr(idx), val);
}
}
@@ -642,21 +730,26 @@ static void x86_pmu_disable(struct pmu *pmu)
x86_pmu.disable_all();
}
+static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
+ u64 enable_mask)
+{
+ if (hwc->extra_reg)
+ wrmsrl(hwc->extra_reg, hwc->extra_config);
+ wrmsrl(hwc->config_base, hwc->config | enable_mask);
+}
+
static void x86_pmu_enable_all(int added)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int idx;
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- struct perf_event *event = cpuc->events[idx];
- u64 val;
+ struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
if (!test_bit(idx, cpuc->active_mask))
continue;
- val = event->hw.config;
- val |= ARCH_PERFMON_EVENTSEL_ENABLE;
- wrmsrl(x86_pmu.eventsel + idx, val);
+ __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
}
}
@@ -821,15 +914,10 @@ static inline void x86_assign_hw_event(struct perf_event *event,
hwc->event_base = 0;
} else if (hwc->idx >= X86_PMC_IDX_FIXED) {
hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
- /*
- * We set it so that event_base + idx in wrmsr/rdmsr maps to
- * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
- */
- hwc->event_base =
- MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
+ hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0;
} else {
- hwc->config_base = x86_pmu.eventsel;
- hwc->event_base = x86_pmu.perfctr;
+ hwc->config_base = x86_pmu_config_addr(hwc->idx);
+ hwc->event_base = x86_pmu_event_addr(hwc->idx);
}
}
@@ -915,17 +1003,11 @@ static void x86_pmu_enable(struct pmu *pmu)
x86_pmu.enable_all(added);
}
-static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
- u64 enable_mask)
-{
- wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask);
-}
-
static inline void x86_pmu_disable_event(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
- wrmsrl(hwc->config_base + hwc->idx, hwc->config);
+ wrmsrl(hwc->config_base, hwc->config);
}
static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -978,7 +1060,7 @@ x86_perf_event_set_period(struct perf_event *event)
*/
local64_set(&hwc->prev_count, (u64)-left);
- wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask);
+ wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
/*
* Due to erratum on certan cpu we need
@@ -986,7 +1068,7 @@ x86_perf_event_set_period(struct perf_event *event)
* is updated properly
*/
if (x86_pmu.perfctr_second_write) {
- wrmsrl(hwc->event_base + idx,
+ wrmsrl(hwc->event_base,
(u64)(-left) & x86_pmu.cntval_mask);
}
@@ -1113,8 +1195,8 @@ void perf_event_print_debug(void)
pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
- rdmsrl(x86_pmu.perfctr + idx, pmc_count);
+ rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
+ rdmsrl(x86_pmu_event_addr(idx), pmc_count);
prev_left = per_cpu(pmc_prev_left[idx], cpu);
@@ -1389,7 +1471,7 @@ static void __init pmu_check_apic(void)
pr_info("no hardware sampling interrupt available.\n");
}
-int __init init_hw_perf_events(void)
+static int __init init_hw_perf_events(void)
{
struct event_constraint *c;
int err;
@@ -1608,7 +1690,7 @@ out:
return ret;
}
-int x86_pmu_event_init(struct perf_event *event)
+static int x86_pmu_event_init(struct perf_event *event)
{
struct pmu *tmp;
int err;
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 67e2202a603..461f62bbd77 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -127,6 +127,11 @@ static int amd_pmu_hw_config(struct perf_event *event)
/*
* AMD64 events are detected based on their event codes.
*/
+static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
+{
+ return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
+}
+
static inline int amd_is_nb_event(struct hw_perf_event *hwc)
{
return (hwc->config & 0xe0) == 0xe0;
@@ -385,13 +390,181 @@ static __initconst const struct x86_pmu amd_pmu = {
.cpu_dead = amd_pmu_cpu_dead,
};
+/* AMD Family 15h */
+
+#define AMD_EVENT_TYPE_MASK 0x000000F0ULL
+
+#define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL
+#define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL
+#define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL
+#define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL
+#define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL
+#define AMD_EVENT_EX_LS 0x000000C0ULL
+#define AMD_EVENT_DE 0x000000D0ULL
+#define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL
+
+/*
+ * AMD family 15h event code/PMC mappings:
+ *
+ * type = event_code & 0x0F0:
+ *
+ * 0x000 FP PERF_CTL[5:3]
+ * 0x010 FP PERF_CTL[5:3]
+ * 0x020 LS PERF_CTL[5:0]
+ * 0x030 LS PERF_CTL[5:0]
+ * 0x040 DC PERF_CTL[5:0]
+ * 0x050 DC PERF_CTL[5:0]
+ * 0x060 CU PERF_CTL[2:0]
+ * 0x070 CU PERF_CTL[2:0]
+ * 0x080 IC/DE PERF_CTL[2:0]
+ * 0x090 IC/DE PERF_CTL[2:0]
+ * 0x0A0 ---
+ * 0x0B0 ---
+ * 0x0C0 EX/LS PERF_CTL[5:0]
+ * 0x0D0 DE PERF_CTL[2:0]
+ * 0x0E0 NB NB_PERF_CTL[3:0]
+ * 0x0F0 NB NB_PERF_CTL[3:0]
+ *
+ * Exceptions:
+ *
+ * 0x003 FP PERF_CTL[3]
+ * 0x00B FP PERF_CTL[3]
+ * 0x00D FP PERF_CTL[3]
+ * 0x023 DE PERF_CTL[2:0]
+ * 0x02D LS PERF_CTL[3]
+ * 0x02E LS PERF_CTL[3,0]
+ * 0x043 CU PERF_CTL[2:0]
+ * 0x045 CU PERF_CTL[2:0]
+ * 0x046 CU PERF_CTL[2:0]
+ * 0x054 CU PERF_CTL[2:0]
+ * 0x055 CU PERF_CTL[2:0]
+ * 0x08F IC PERF_CTL[0]
+ * 0x187 DE PERF_CTL[0]
+ * 0x188 DE PERF_CTL[0]
+ * 0x0DB EX PERF_CTL[5:0]
+ * 0x0DC LS PERF_CTL[5:0]
+ * 0x0DD LS PERF_CTL[5:0]
+ * 0x0DE LS PERF_CTL[5:0]
+ * 0x0DF LS PERF_CTL[5:0]
+ * 0x1D6 EX PERF_CTL[5:0]
+ * 0x1D8 EX PERF_CTL[5:0]
+ */
+
+static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);
+static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
+static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0);
+static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0);
+static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
+static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
+
+static struct event_constraint *
+amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+ unsigned int event_code = amd_get_event_code(&event->hw);
+
+ switch (event_code & AMD_EVENT_TYPE_MASK) {
+ case AMD_EVENT_FP:
+ switch (event_code) {
+ case 0x003:
+ case 0x00B:
+ case 0x00D:
+ return &amd_f15_PMC3;
+ default:
+ return &amd_f15_PMC53;
+ }
+ case AMD_EVENT_LS:
+ case AMD_EVENT_DC:
+ case AMD_EVENT_EX_LS:
+ switch (event_code) {
+ case 0x023:
+ case 0x043:
+ case 0x045:
+ case 0x046:
+ case 0x054:
+ case 0x055:
+ return &amd_f15_PMC20;
+ case 0x02D:
+ return &amd_f15_PMC3;
+ case 0x02E:
+ return &amd_f15_PMC30;
+ default:
+ return &amd_f15_PMC50;
+ }
+ case AMD_EVENT_CU:
+ case AMD_EVENT_IC_DE:
+ case AMD_EVENT_DE:
+ switch (event_code) {
+ case 0x08F:
+ case 0x187:
+ case 0x188:
+ return &amd_f15_PMC0;
+ case 0x0DB ... 0x0DF:
+ case 0x1D6:
+ case 0x1D8:
+ return &amd_f15_PMC50;
+ default:
+ return &amd_f15_PMC20;
+ }
+ case AMD_EVENT_NB:
+ /* not yet implemented */
+ return &emptyconstraint;
+ default:
+ return &emptyconstraint;
+ }
+}
+
+static __initconst const struct x86_pmu amd_pmu_f15h = {
+ .name = "AMD Family 15h",
+ .handle_irq = x86_pmu_handle_irq,
+ .disable_all = x86_pmu_disable_all,
+ .enable_all = x86_pmu_enable_all,
+ .enable = x86_pmu_enable_event,
+ .disable = x86_pmu_disable_event,
+ .hw_config = amd_pmu_hw_config,
+ .schedule_events = x86_schedule_events,
+ .eventsel = MSR_F15H_PERF_CTL,
+ .perfctr = MSR_F15H_PERF_CTR,
+ .event_map = amd_pmu_event_map,
+ .max_events = ARRAY_SIZE(amd_perfmon_event_map),
+ .num_counters = 6,
+ .cntval_bits = 48,
+ .cntval_mask = (1ULL << 48) - 1,
+ .apic = 1,
+ /* use highest bit to detect overflow */
+ .max_period = (1ULL << 47) - 1,
+ .get_event_constraints = amd_get_event_constraints_f15h,
+ /* nortbridge counters not yet implemented: */
+#if 0
+ .put_event_constraints = amd_put_event_constraints,
+
+ .cpu_prepare = amd_pmu_cpu_prepare,
+ .cpu_starting = amd_pmu_cpu_starting,
+ .cpu_dead = amd_pmu_cpu_dead,
+#endif
+};
+
static __init int amd_pmu_init(void)
{
/* Performance-monitoring supported from K7 and later: */
if (boot_cpu_data.x86 < 6)
return -ENODEV;
- x86_pmu = amd_pmu;
+ /*
+ * If core performance counter extensions exists, it must be
+ * family 15h, otherwise fail. See x86_pmu_addr_offset().
+ */
+ switch (boot_cpu_data.x86) {
+ case 0x15:
+ if (!cpu_has_perfctr_core)
+ return -ENODEV;
+ x86_pmu = amd_pmu_f15h;
+ break;
+ default:
+ if (cpu_has_perfctr_core)
+ return -ENODEV;
+ x86_pmu = amd_pmu;
+ break;
+ }
/* Events are common for all AMDs */
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 008835c1d79..8fc2b2cee1d 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1,5 +1,27 @@
#ifdef CONFIG_CPU_SUP_INTEL
+#define MAX_EXTRA_REGS 2
+
+/*
+ * Per register state.
+ */
+struct er_account {
+ int ref; /* reference count */
+ unsigned int extra_reg; /* extra MSR number */
+ u64 extra_config; /* extra MSR config */
+};
+
+/*
+ * Per core state
+ * This used to coordinate shared registers for HT threads.
+ */
+struct intel_percore {
+ raw_spinlock_t lock; /* protect structure */
+ struct er_account regs[MAX_EXTRA_REGS];
+ int refcnt; /* number of threads */
+ unsigned core_id;
+};
+
/*
* Intel PerfMon, used on Core and later.
*/
@@ -64,6 +86,18 @@ static struct event_constraint intel_nehalem_event_constraints[] =
EVENT_CONSTRAINT_END
};
+static struct extra_reg intel_nehalem_extra_regs[] =
+{
+ INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
+ EVENT_EXTRA_END
+};
+
+static struct event_constraint intel_nehalem_percore_constraints[] =
+{
+ INTEL_EVENT_CONSTRAINT(0xb7, 0),
+ EVENT_CONSTRAINT_END
+};
+
static struct event_constraint intel_westmere_event_constraints[] =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -76,6 +110,33 @@ static struct event_constraint intel_westmere_event_constraints[] =
EVENT_CONSTRAINT_END
};
+static struct event_constraint intel_snb_event_constraints[] =
+{
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
+ INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
+ INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */
+ INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */
+ INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
+ INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
+ EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg intel_westmere_extra_regs[] =
+{
+ INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
+ INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff),
+ EVENT_EXTRA_END
+};
+
+static struct event_constraint intel_westmere_percore_constraints[] =
+{
+ INTEL_EVENT_CONSTRAINT(0xb7, 0),
+ INTEL_EVENT_CONSTRAINT(0xbb, 0),
+ EVENT_CONSTRAINT_END
+};
+
static struct event_constraint intel_gen_event_constraints[] =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -89,6 +150,106 @@ static u64 intel_pmu_event_map(int hw_event)
return intel_perfmon_event_map[hw_event];
}
+static __initconst const u64 snb_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */
+ [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */
+ [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(LL ) ] = {
+ /*
+ * TBD: Need Off-core Response Performance Monitoring support
+ */
+ [ C(OP_READ) ] = {
+ /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01bb,
+ },
+ [ C(OP_WRITE) ] = {
+ /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01bb,
+ },
+ [ C(OP_PREFETCH) ] = {
+ /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01bb,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */
+ [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */
+ [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */
+ [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+ [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
static __initconst const u64 westmere_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -124,16 +285,26 @@ static __initconst const u64 westmere_hw_cache_event_ids
},
[ C(LL ) ] = {
[ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
- [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
+ /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01bb,
},
+ /*
+ * Use RFO, not WRITEBACK, because a write miss would typically occur
+ * on RFO.
+ */
[ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
- [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
+ /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01bb,
+ /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
},
[ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
- [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
+ /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01bb,
},
},
[ C(DTLB) ] = {
@@ -180,6 +351,39 @@ static __initconst const u64 westmere_hw_cache_event_ids
},
};
+/*
+ * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3
+ */
+
+#define DMND_DATA_RD (1 << 0)
+#define DMND_RFO (1 << 1)
+#define DMND_WB (1 << 3)
+#define PF_DATA_RD (1 << 4)
+#define PF_DATA_RFO (1 << 5)
+#define RESP_UNCORE_HIT (1 << 8)
+#define RESP_MISS (0xf600) /* non uncore hit */
+
+static __initconst const u64 nehalem_hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT,
+ [ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT,
+ [ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT,
+ [ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS,
+ },
+ }
+};
+
static __initconst const u64 nehalem_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -215,16 +419,26 @@ static __initconst const u64 nehalem_hw_cache_event_ids
},
[ C(LL ) ] = {
[ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
- [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
+ /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
},
+ /*
+ * Use RFO, not WRITEBACK, because a write miss would typically occur
+ * on RFO.
+ */
[ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
- [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
+ /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
},
[ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
- [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
+ /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
},
},
[ C(DTLB) ] = {
@@ -691,8 +905,8 @@ static void intel_pmu_reset(void)
printk("clearing PMU state on CPU#%d\n", smp_processor_id());
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
- checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
+ checking_wrmsrl(x86_pmu_config_addr(idx), 0ull);
+ checking_wrmsrl(x86_pmu_event_addr(idx), 0ull);
}
for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
@@ -794,6 +1008,67 @@ intel_bts_constraints(struct perf_event *event)
}
static struct event_constraint *
+intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
+ struct event_constraint *c;
+ struct intel_percore *pc;
+ struct er_account *era;
+ int i;
+ int free_slot;
+ int found;
+
+ if (!x86_pmu.percore_constraints || hwc->extra_alloc)
+ return NULL;
+
+ for (c = x86_pmu.percore_constraints; c->cmask; c++) {
+ if (e != c->code)
+ continue;
+
+ /*
+ * Allocate resource per core.
+ */
+ pc = cpuc->per_core;
+ if (!pc)
+ break;
+ c = &emptyconstraint;
+ raw_spin_lock(&pc->lock);
+ free_slot = -1;
+ found = 0;
+ for (i = 0; i < MAX_EXTRA_REGS; i++) {
+ era = &pc->regs[i];
+ if (era->ref > 0 && hwc->extra_reg == era->extra_reg) {
+ /* Allow sharing same config */
+ if (hwc->extra_config == era->extra_config) {
+ era->ref++;
+ cpuc->percore_used = 1;
+ hwc->extra_alloc = 1;
+ c = NULL;
+ }
+ /* else conflict */
+ found = 1;
+ break;
+ } else if (era->ref == 0 && free_slot == -1)
+ free_slot = i;
+ }
+ if (!found && free_slot != -1) {
+ era = &pc->regs[free_slot];
+ era->ref = 1;
+ era->extra_reg = hwc->extra_reg;
+ era->extra_config = hwc->extra_config;
+ cpuc->percore_used = 1;
+ hwc->extra_alloc = 1;
+ c = NULL;
+ }
+ raw_spin_unlock(&pc->lock);
+ return c;
+ }
+
+ return NULL;
+}
+
+static struct event_constraint *
intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
{
struct event_constraint *c;
@@ -806,9 +1081,51 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
if (c)
return c;
+ c = intel_percore_constraints(cpuc, event);
+ if (c)
+ return c;
+
return x86_get_event_constraints(cpuc, event);
}
+static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ struct extra_reg *er;
+ struct intel_percore *pc;
+ struct er_account *era;
+ struct hw_perf_event *hwc = &event->hw;
+ int i, allref;
+
+ if (!cpuc->percore_used)
+ return;
+
+ for (er = x86_pmu.extra_regs; er->msr; er++) {
+ if (er->event != (hwc->config & er->config_mask))
+ continue;
+
+ pc = cpuc->per_core;
+ raw_spin_lock(&pc->lock);
+ for (i = 0; i < MAX_EXTRA_REGS; i++) {
+ era = &pc->regs[i];
+ if (era->ref > 0 &&
+ era->extra_config == hwc->extra_config &&
+ era->extra_reg == er->msr) {
+ era->ref--;
+ hwc->extra_alloc = 0;
+ break;
+ }
+ }
+ allref = 0;
+ for (i = 0; i < MAX_EXTRA_REGS; i++)
+ allref += pc->regs[i].ref;
+ if (allref == 0)
+ cpuc->percore_used = 0;
+ raw_spin_unlock(&pc->lock);
+ break;
+ }
+}
+
static int intel_pmu_hw_config(struct perf_event *event)
{
int ret = x86_pmu_hw_config(event);
@@ -880,20 +1197,67 @@ static __initconst const struct x86_pmu core_pmu = {
*/
.max_period = (1ULL << 31) - 1,
.get_event_constraints = intel_get_event_constraints,
+ .put_event_constraints = intel_put_event_constraints,
.event_constraints = intel_core_event_constraints,
};
+static int intel_pmu_cpu_prepare(int cpu)
+{
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+
+ if (!cpu_has_ht_siblings())
+ return NOTIFY_OK;
+
+ cpuc->per_core = kzalloc_node(sizeof(struct intel_percore),
+ GFP_KERNEL, cpu_to_node(cpu));
+ if (!cpuc->per_core)
+ return NOTIFY_BAD;
+
+ raw_spin_lock_init(&cpuc->per_core->lock);
+ cpuc->per_core->core_id = -1;
+ return NOTIFY_OK;
+}
+
static void intel_pmu_cpu_starting(int cpu)
{
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+ int core_id = topology_core_id(cpu);
+ int i;
+
init_debug_store_on_cpu(cpu);
/*
* Deal with CPUs that don't clear their LBRs on power-up.
*/
intel_pmu_lbr_reset();
+
+ if (!cpu_has_ht_siblings())
+ return;
+
+ for_each_cpu(i, topology_thread_cpumask(cpu)) {
+ struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core;
+
+ if (pc && pc->core_id == core_id) {
+ kfree(cpuc->per_core);
+ cpuc->per_core = pc;
+ break;
+ }
+ }
+
+ cpuc->per_core->core_id = core_id;
+ cpuc->per_core->refcnt++;
}
static void intel_pmu_cpu_dying(int cpu)
{
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+ struct intel_percore *pc = cpuc->per_core;
+
+ if (pc) {
+ if (pc->core_id == -1 || --pc->refcnt == 0)
+ kfree(pc);
+ cpuc->per_core = NULL;
+ }
+
fini_debug_store_on_cpu(cpu);
}
@@ -918,7 +1282,9 @@ static __initconst const struct x86_pmu intel_pmu = {
*/
.max_period = (1ULL << 31) - 1,
.get_event_constraints = intel_get_event_constraints,
+ .put_event_constraints = intel_put_event_constraints,
+ .cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
};
@@ -1024,6 +1390,7 @@ static __init int intel_pmu_init(void)
intel_pmu_lbr_init_core();
x86_pmu.event_constraints = intel_core2_event_constraints;
+ x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
pr_cont("Core2 events, ");
break;
@@ -1032,11 +1399,16 @@ static __init int intel_pmu_init(void)
case 46: /* 45 nm nehalem-ex, "Beckton" */
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));
intel_pmu_lbr_init_nhm();
x86_pmu.event_constraints = intel_nehalem_event_constraints;
+ x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
+ x86_pmu.percore_constraints = intel_nehalem_percore_constraints;
x86_pmu.enable_all = intel_pmu_nhm_enable_all;
+ x86_pmu.extra_regs = intel_nehalem_extra_regs;
pr_cont("Nehalem events, ");
break;
@@ -1047,6 +1419,7 @@ static __init int intel_pmu_init(void)
intel_pmu_lbr_init_atom();
x86_pmu.event_constraints = intel_gen_event_constraints;
+ x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
pr_cont("Atom events, ");
break;
@@ -1054,14 +1427,30 @@ static __init int intel_pmu_init(void)
case 44: /* 32 nm nehalem, "Gulftown" */
memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));
intel_pmu_lbr_init_nhm();
x86_pmu.event_constraints = intel_westmere_event_constraints;
+ x86_pmu.percore_constraints = intel_westmere_percore_constraints;
x86_pmu.enable_all = intel_pmu_nhm_enable_all;
+ x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
+ x86_pmu.extra_regs = intel_westmere_extra_regs;
pr_cont("Westmere events, ");
break;
+ case 42: /* SandyBridge */
+ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+
+ intel_pmu_lbr_init_nhm();
+
+ x86_pmu.event_constraints = intel_snb_event_constraints;
+ x86_pmu.pebs_constraints = intel_snb_pebs_events;
+ pr_cont("SandyBridge events, ");
+ break;
+
default:
/*
* default constraints for v2 and up
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index b7dcd9f2b8a..b95c66ae4a2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -361,30 +361,88 @@ static int intel_pmu_drain_bts_buffer(void)
/*
* PEBS
*/
-
-static struct event_constraint intel_core_pebs_events[] = {
- PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */
+static struct event_constraint intel_core2_pebs_event_constraints[] = {
+ PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
- PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */
- PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */
- PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */
- PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */
- PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */
+ INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
+ EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_atom_pebs_event_constraints[] = {
+ PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
+ PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
+ INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
EVENT_CONSTRAINT_END
};
-static struct event_constraint intel_nehalem_pebs_events[] = {
- PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */
- PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */
- PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */
- PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */
- PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */
- PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */
- PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */
- PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */
- PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */
+static struct event_constraint intel_nehalem_pebs_event_constraints[] = {
+ INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
+ PEBS_EVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
+ INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */
+ INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
+ PEBS_EVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
+ INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
+ PEBS_EVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
+ INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
+ EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_westmere_pebs_event_constraints[] = {
+ INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
+ PEBS_EVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
+ INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
+
+ INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
+ PEBS_EVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
+ INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
+ EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_snb_pebs_events[] = {
+ PEBS_EVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+ PEBS_EVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
+ PEBS_EVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
+ PEBS_EVENT_CONSTRAINT(0x01c4, 0xf), /* BR_INST_RETIRED.CONDITIONAL */
+ PEBS_EVENT_CONSTRAINT(0x02c4, 0xf), /* BR_INST_RETIRED.NEAR_CALL */
+ PEBS_EVENT_CONSTRAINT(0x04c4, 0xf), /* BR_INST_RETIRED.ALL_BRANCHES */
+ PEBS_EVENT_CONSTRAINT(0x08c4, 0xf), /* BR_INST_RETIRED.NEAR_RETURN */
+ PEBS_EVENT_CONSTRAINT(0x10c4, 0xf), /* BR_INST_RETIRED.NOT_TAKEN */
+ PEBS_EVENT_CONSTRAINT(0x20c4, 0xf), /* BR_INST_RETIRED.NEAR_TAKEN */
+ PEBS_EVENT_CONSTRAINT(0x40c4, 0xf), /* BR_INST_RETIRED.FAR_BRANCH */
+ PEBS_EVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
+ PEBS_EVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
+ PEBS_EVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
+ PEBS_EVENT_CONSTRAINT(0x10c5, 0xf), /* BR_MISP_RETIRED.NOT_TAKEN */
+ PEBS_EVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.TAKEN */
+ PEBS_EVENT_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
+ PEBS_EVENT_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORE */
+ PEBS_EVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */
+ PEBS_EVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */
+ PEBS_EVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */
+ PEBS_EVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */
+ PEBS_EVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */
+ PEBS_EVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */
+ PEBS_EVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */
+ PEBS_EVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */
+ PEBS_EVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
+ PEBS_EVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
+ PEBS_EVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.LLC_HIT */
+ PEBS_EVENT_CONSTRAINT(0x40d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */
+ PEBS_EVENT_CONSTRAINT(0x01d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */
+ PEBS_EVENT_CONSTRAINT(0x02d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */
+ PEBS_EVENT_CONSTRAINT(0x04d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM */
+ PEBS_EVENT_CONSTRAINT(0x08d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE */
+ PEBS_EVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
EVENT_CONSTRAINT_END
};
@@ -695,20 +753,17 @@ static void intel_ds_init(void)
printk(KERN_CONT "PEBS fmt0%c, ", pebs_type);
x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
- x86_pmu.pebs_constraints = intel_core_pebs_events;
break;
case 1:
printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
- x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
break;
default:
printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
x86_pmu.pebs = 0;
- break;
}
}
}
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index f7a0993c1e7..3769ac822f9 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -764,15 +764,20 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
u64 v;
/* an official way for overflow indication */
- rdmsrl(hwc->config_base + hwc->idx, v);
+ rdmsrl(hwc->config_base, v);
if (v & P4_CCCR_OVF) {
- wrmsrl(hwc->config_base + hwc->idx, v & ~P4_CCCR_OVF);
+ wrmsrl(hwc->config_base, v & ~P4_CCCR_OVF);
return 1;
}
- /* it might be unflagged overflow */
- rdmsrl(hwc->event_base + hwc->idx, v);
- if (!(v & ARCH_P4_CNTRVAL_MASK))
+ /*
+ * In some circumstances the overflow might issue an NMI but did
+ * not set P4_CCCR_OVF bit. Because a counter holds a negative value
+ * we simply check for high bit being set, if it's cleared it means
+ * the counter has reached zero value and continued counting before
+ * real NMI signal was received:
+ */
+ if (!(v & ARCH_P4_UNFLAGGED_BIT))
return 1;
return 0;
@@ -810,7 +815,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event)
* state we need to clear P4_CCCR_OVF, otherwise interrupt get
* asserted again and again
*/
- (void)checking_wrmsrl(hwc->config_base + hwc->idx,
+ (void)checking_wrmsrl(hwc->config_base,
(u64)(p4_config_unpack_cccr(hwc->config)) &
~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
}
@@ -880,7 +885,7 @@ static void p4_pmu_enable_event(struct perf_event *event)
p4_pmu_enable_pebs(hwc->config);
(void)checking_wrmsrl(escr_addr, escr_conf);
- (void)checking_wrmsrl(hwc->config_base + hwc->idx,
+ (void)checking_wrmsrl(hwc->config_base,
(cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
}
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index 34ba07be2cd..20c097e3386 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -68,7 +68,7 @@ p6_pmu_disable_event(struct perf_event *event)
if (cpuc->enabled)
val |= ARCH_PERFMON_EVENTSEL_ENABLE;
- (void)checking_wrmsrl(hwc->config_base + hwc->idx, val);
+ (void)checking_wrmsrl(hwc->config_base, val);
}
static void p6_pmu_enable_event(struct perf_event *event)
@@ -81,7 +81,7 @@ static void p6_pmu_enable_event(struct perf_event *event)
if (cpuc->enabled)
val |= ARCH_PERFMON_EVENTSEL_ENABLE;
- (void)checking_wrmsrl(hwc->config_base + hwc->idx, val);
+ (void)checking_wrmsrl(hwc->config_base, val);
}
static __initconst const struct x86_pmu p6_pmu = {
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index d5a23661550..966512b2cac 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -46,6 +46,8 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
/* returns the bit offset of the performance counter register */
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
+ if (msr >= MSR_F15H_PERF_CTR)
+ return (msr - MSR_F15H_PERF_CTR) >> 1;
return msr - MSR_K7_PERFCTR0;
case X86_VENDOR_INTEL:
if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
@@ -70,6 +72,8 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
/* returns the bit offset of the event selection register */
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
+ if (msr >= MSR_F15H_PERF_CTL)
+ return (msr - MSR_F15H_PERF_CTL) >> 1;
return msr - MSR_K7_EVNTSEL0;
case X86_VENDOR_INTEL:
if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index df20723a6a1..220a1c11cfd 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -320,31 +320,6 @@ void die(const char *str, struct pt_regs *regs, long err)
oops_end(flags, regs, sig);
}
-void notrace __kprobes
-die_nmi(char *str, struct pt_regs *regs, int do_panic)
-{
- unsigned long flags;
-
- if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
- return;
-
- /*
- * We are in trouble anyway, lets at least try
- * to get a message out.
- */
- flags = oops_begin();
- printk(KERN_EMERG "%s", str);
- printk(" on CPU%d, ip %08lx, registers:\n",
- smp_processor_id(), regs->ip);
- show_registers(regs);
- oops_end(flags, regs, 0);
- if (do_panic || panic_on_oops)
- panic("Non maskable interrupt");
- nmi_exit();
- local_irq_enable();
- do_exit(SIGBUS);
-}
-
static int __init oops_setup(char *s)
{
if (!s)
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 76b8cd953de..9efbdcc5642 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -143,15 +143,10 @@ static void __init ati_bugs(int num, int slot, int func)
static u32 __init ati_sbx00_rev(int num, int slot, int func)
{
- u32 old, d;
+ u32 d;
- d = read_pci_config(num, slot, func, 0x70);
- old = d;
- d &= ~(1<<8);
- write_pci_config(num, slot, func, 0x70, d);
d = read_pci_config(num, slot, func, 0x8);
d &= 0xff;
- write_pci_config(num, slot, func, 0x70, old);
return d;
}
@@ -160,13 +155,16 @@ static void __init ati_bugs_contd(int num, int slot, int func)
{
u32 d, rev;
- if (acpi_use_timer_override)
- return;
-
rev = ati_sbx00_rev(num, slot, func);
+ if (rev >= 0x40)
+ acpi_fix_pin2_polarity = 1;
+
if (rev > 0x13)
return;
+ if (acpi_use_timer_override)
+ return;
+
/* check for IRQ0 interrupt swap */
d = read_pci_config(num, slot, func, 0x64);
if (!(d & (1<<14)))
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index c8b4efad7eb..fa41f7298c8 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -65,6 +65,8 @@
#define sysexit_audit syscall_exit_work
#endif
+ .section .entry.text, "ax"
+
/*
* We use macros for low-level operations which need to be overridden
* for paravirtualization. The following will never clobber any registers:
@@ -395,7 +397,7 @@ sysenter_past_esp:
* A tiny bit of offset fixup is necessary - 4*4 means the 4 words
* pushed above; +8 corresponds to copy_thread's esp0 setting.
*/
- pushl_cfi ((TI_sysenter_return)-THREAD_SIZE_asm+8+4*4)(%esp)
+ pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp)
CFI_REL_OFFSET eip, 0
pushl_cfi %eax
@@ -788,7 +790,7 @@ ENDPROC(ptregs_clone)
*/
.section .init.rodata,"a"
ENTRY(interrupt)
-.text
+.section .entry.text, "ax"
.p2align 5
.p2align CONFIG_X86_L1_CACHE_SHIFT
ENTRY(irq_entries_start)
@@ -807,7 +809,7 @@ vector=FIRST_EXTERNAL_VECTOR
.endif
.previous
.long 1b
- .text
+ .section .entry.text, "ax"
vector=vector+1
.endif
.endr
@@ -1409,8 +1411,7 @@ END(general_protection)
#ifdef CONFIG_KVM_GUEST
ENTRY(async_page_fault)
RING0_EC_FRAME
- pushl $do_async_page_fault
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $do_async_page_fault
jmp error_code
CFI_ENDPROC
END(apf_page_fault)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index aed1ffbeb0c..c32cbbcff7b 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -61,6 +61,8 @@
#define __AUDIT_ARCH_LE 0x40000000
.code64
+ .section .entry.text, "ax"
+
#ifdef CONFIG_FUNCTION_TRACER
#ifdef CONFIG_DYNAMIC_FTRACE
ENTRY(mcount)
@@ -744,7 +746,7 @@ END(stub_rt_sigreturn)
*/
.section .init.rodata,"a"
ENTRY(interrupt)
- .text
+ .section .entry.text
.p2align 5
.p2align CONFIG_X86_L1_CACHE_SHIFT
ENTRY(irq_entries_start)
@@ -763,7 +765,7 @@ vector=FIRST_EXTERNAL_VECTOR
.endif
.previous
.quad 1b
- .text
+ .section .entry.text
vector=vector+1
.endif
.endr
@@ -1248,7 +1250,7 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
decl PER_CPU_VAR(irq_count)
jmp error_exit
CFI_ENDPROC
-END(do_hypervisor_callback)
+END(xen_do_hypervisor_callback)
/*
* Hypervisor uses this for application faults while it executes.
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 382eb2936d4..a93742a5746 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -437,18 +437,19 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
return;
}
- if (ftrace_push_return_trace(old, self_addr, &trace.depth,
- frame_pointer) == -EBUSY) {
- *parent = old;
- return;
- }
-
trace.func = self_addr;
+ trace.depth = current->curr_ret_stack + 1;
/* Only trace if the calling function expects to */
if (!ftrace_graph_entry(&trace)) {
- current->curr_ret_stack--;
*parent = old;
+ return;
+ }
+
+ if (ftrace_push_return_trace(old, self_addr, &trace.depth,
+ frame_pointer) == -EBUSY) {
+ *parent = old;
+ return;
}
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 767d6c43de3..187aa63b321 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -73,7 +73,7 @@ MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT
*/
KERNEL_PAGES = LOWMEM_PAGES
-INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm
+INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE
RESERVE_BRK(pagetables, INIT_MAP_SIZE)
/*
@@ -623,7 +623,7 @@ ENTRY(initial_code)
* BSS section
*/
__PAGE_ALIGNED_BSS
- .align PAGE_SIZE_asm
+ .align PAGE_SIZE
#ifdef CONFIG_X86_PAE
initial_pg_pmd:
.fill 1024*KPMDS,4,0
@@ -644,7 +644,7 @@ ENTRY(swapper_pg_dir)
#ifdef CONFIG_X86_PAE
__PAGE_ALIGNED_DATA
/* Page-aligned for the benefit of paravirt? */
- .align PAGE_SIZE_asm
+ .align PAGE_SIZE
ENTRY(initial_page_table)
.long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */
# if KPMDS == 3
@@ -662,7 +662,7 @@ ENTRY(initial_page_table)
# else
# error "Kernel PMDs should be 1, 2 or 3"
# endif
- .align PAGE_SIZE_asm /* needs to be page-sized too */
+ .align PAGE_SIZE /* needs to be page-sized too */
#endif
.data
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 8eec0ec59af..8c968974253 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -14,22 +14,9 @@
#include <linux/slab.h>
#include <linux/thread_info.h>
#include <linux/syscalls.h>
+#include <linux/bitmap.h>
#include <asm/syscalls.h>
-/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
-static void set_bitmap(unsigned long *bitmap, unsigned int base,
- unsigned int extent, int new_value)
-{
- unsigned int i;
-
- for (i = base; i < base + extent; i++) {
- if (new_value)
- __set_bit(i, bitmap);
- else
- __clear_bit(i, bitmap);
- }
-}
-
/*
* this changes the io permissions bitmap in the current task.
*/
@@ -69,7 +56,10 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
*/
tss = &per_cpu(init_tss, get_cpu());
- set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
+ if (turn_on)
+ bitmap_clear(t->io_bitmap_ptr, from, num);
+ else
+ bitmap_set(t->io_bitmap_ptr, from, num);
/*
* Search for a (possibly new) maximum. This is simple and stupid,
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index a4130005028..7c64c420a9f 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -533,15 +533,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
}
return NOTIFY_DONE;
- case DIE_NMIWATCHDOG:
- if (atomic_read(&kgdb_active) != -1) {
- /* KGDB CPU roundup: */
- kgdb_nmicallback(raw_smp_processor_id(), regs);
- return NOTIFY_STOP;
- }
- /* Enter debugger: */
- break;
-
case DIE_DEBUG:
if (atomic_read(&kgdb_cpu_doing_single_step) != -1) {
if (user_mode(regs))
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index d91c477b3f6..c969fd9d156 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1276,6 +1276,14 @@ static int __kprobes can_optimize(unsigned long paddr)
if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
return 0;
+ /*
+ * Do not optimize in the entry code due to the unstable
+ * stack handling.
+ */
+ if ((paddr >= (unsigned long )__entry_text_start) &&
+ (paddr < (unsigned long )__entry_text_end))
+ return 0;
+
/* Check there is enough space for a relative jump. */
if (size - offset < RELATIVEJUMP_SIZE)
return 0;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ff455419898..99fa3adf014 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -110,12 +110,9 @@ void show_regs_common(void)
init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
- printk(KERN_CONT " ");
- printk(KERN_CONT "%s %s", vendor, product);
- if (board) {
- printk(KERN_CONT "/");
- printk(KERN_CONT "%s", board);
- }
+ printk(KERN_CONT " %s %s", vendor, product);
+ if (board)
+ printk(KERN_CONT "/%s", board);
printk(KERN_CONT "\n");
}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index fc7aae1e2bc..715037caeb4 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -285,6 +285,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
},
},
+ { /* Handle problems with rebooting on VersaLogic Menlow boards */
+ .callback = set_bios_reboot,
+ .ident = "VersaLogic Menlow based board",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "VersaLogic Corporation"),
+ DMI_MATCH(DMI_BOARD_NAME, "VersaLogic Menlow board"),
+ },
+ },
{ }
};
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 09d0172a005..16ce4261399 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -131,6 +131,8 @@ EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_llc_shared_map);
+
/* Per CPU bogomips and other parameters */
DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);
@@ -356,23 +358,6 @@ notrace static void __cpuinit start_secondary(void *unused)
cpu_idle();
}
-#ifdef CONFIG_CPUMASK_OFFSTACK
-/* In this case, llc_shared_map is a pointer to a cpumask. */
-static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst,
- const struct cpuinfo_x86 *src)
-{
- struct cpumask *llc = dst->llc_shared_map;
- *dst = *src;
- dst->llc_shared_map = llc;
-}
-#else
-static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst,
- const struct cpuinfo_x86 *src)
-{
- *dst = *src;
-}
-#endif /* CONFIG_CPUMASK_OFFSTACK */
-
/*
* The bootstrap kernel entry code has set these up. Save them for
* a given CPU
@@ -382,7 +367,7 @@ void __cpuinit smp_store_cpu_info(int id)
{
struct cpuinfo_x86 *c = &cpu_data(id);
- copy_cpuinfo_x86(c, &boot_cpu_data);
+ *c = boot_cpu_data;
c->cpu_index = id;
if (id != 0)
identify_secondary_cpu(c);
@@ -390,15 +375,12 @@ void __cpuinit smp_store_cpu_info(int id)
static void __cpuinit link_thread_siblings(int cpu1, int cpu2)
{
- struct cpuinfo_x86 *c1 = &cpu_data(cpu1);
- struct cpuinfo_x86 *c2 = &cpu_data(cpu2);
-
cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2));
cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1));
cpumask_set_cpu(cpu1, cpu_core_mask(cpu2));
cpumask_set_cpu(cpu2, cpu_core_mask(cpu1));
- cpumask_set_cpu(cpu1, c2->llc_shared_map);
- cpumask_set_cpu(cpu2, c1->llc_shared_map);
+ cpumask_set_cpu(cpu1, cpu_llc_shared_mask(cpu2));
+ cpumask_set_cpu(cpu2, cpu_llc_shared_mask(cpu1));
}
@@ -426,7 +408,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
}
- cpumask_set_cpu(cpu, c->llc_shared_map);
+ cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
if (__this_cpu_read(cpu_info.x86_max_cores) == 1) {
cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu));
@@ -437,8 +419,8 @@ void __cpuinit set_cpu_sibling_map(int cpu)
for_each_cpu(i, cpu_sibling_setup_mask) {
if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
- cpumask_set_cpu(i, c->llc_shared_map);
- cpumask_set_cpu(cpu, cpu_data(i).llc_shared_map);
+ cpumask_set_cpu(i, cpu_llc_shared_mask(cpu));
+ cpumask_set_cpu(cpu, cpu_llc_shared_mask(i));
}
if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
cpumask_set_cpu(i, cpu_core_mask(cpu));
@@ -477,7 +459,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
!(cpu_has(c, X86_FEATURE_AMD_DCM)))
return cpu_core_mask(cpu);
else
- return c->llc_shared_map;
+ return cpu_llc_shared_mask(cpu);
}
static void impress_friends(void)
@@ -1098,13 +1080,13 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
preempt_disable();
smp_cpu_index_default();
- memcpy(__this_cpu_ptr(&cpu_info), &boot_cpu_data, sizeof(cpu_info));
- cpumask_copy(cpu_callin_mask, cpumask_of(0));
- mb();
+
/*
* Setup boot CPU information
*/
smp_store_cpu_info(0); /* Final full version of the data */
+ cpumask_copy(cpu_callin_mask, cpumask_of(0));
+ mb();
#ifdef CONFIG_X86_32
boot_cpu_logical_apicid = logical_smp_processor_id();
#endif
@@ -1112,7 +1094,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
for_each_possible_cpu(i) {
zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
- zalloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL);
+ zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
}
set_cpu_sibling_map(0);
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index b35786dc9b8..5f181742e8f 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -340,3 +340,6 @@ ENTRY(sys_call_table)
.long sys_fanotify_init
.long sys_fanotify_mark
.long sys_prlimit64 /* 340 */
+ .long sys_name_to_handle_at
+ .long sys_open_by_handle_at
+ .long sys_clock_adjtime
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index bf470075518..0381e1f3bae 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -105,6 +105,7 @@ SECTIONS
SCHED_TEXT
LOCK_TEXT
KPROBES_TEXT
+ ENTRY_TEXT
IRQENTRY_TEXT
*(.fixup)
*(.gnu.warning)
@@ -305,7 +306,7 @@ SECTIONS
}
#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
- PERCPU(THREAD_SIZE)
+ PERCPU(PAGE_SIZE)
#endif
. = ALIGN(PAGE_SIZE);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 54ce246a383..63fec1531e8 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2777,6 +2777,8 @@ static int dr_interception(struct vcpu_svm *svm)
kvm_register_write(&svm->vcpu, reg, val);
}
+ skip_emulated_instruction(&svm->vcpu);
+
return 1;
}
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 1357d7cf4ec..db932760ea8 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -62,21 +62,21 @@ TRACE_EVENT(kvm_hv_hypercall,
TP_ARGS(code, fast, rep_cnt, rep_idx, ingpa, outgpa),
TP_STRUCT__entry(
- __field( __u16, code )
- __field( bool, fast )
__field( __u16, rep_cnt )
__field( __u16, rep_idx )
__field( __u64, ingpa )
__field( __u64, outgpa )
+ __field( __u16, code )
+ __field( bool, fast )
),
TP_fast_assign(
- __entry->code = code;
- __entry->fast = fast;
__entry->rep_cnt = rep_cnt;
__entry->rep_idx = rep_idx;
__entry->ingpa = ingpa;
__entry->outgpa = outgpa;
+ __entry->code = code;
+ __entry->fast = fast;
),
TP_printk("code 0x%x %s cnt 0x%x idx 0x%x in 0x%llx out 0x%llx",
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S
index 2cda60a06e6..e8e7e0d06f4 100644
--- a/arch/x86/lib/atomic64_386_32.S
+++ b/arch/x86/lib/atomic64_386_32.S
@@ -15,14 +15,12 @@
/* if you want SMP support, implement these with real spinlocks */
.macro LOCK reg
- pushfl
- CFI_ADJUST_CFA_OFFSET 4
+ pushfl_cfi
cli
.endm
.macro UNLOCK reg
- popfl
- CFI_ADJUST_CFA_OFFSET -4
+ popfl_cfi
.endm
#define BEGIN(op) \
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index 71e080de335..391a083674b 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -14,14 +14,12 @@
#include <asm/dwarf2.h>
.macro SAVE reg
- pushl %\reg
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %\reg
CFI_REL_OFFSET \reg, 0
.endm
.macro RESTORE reg
- popl %\reg
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %\reg
CFI_RESTORE \reg
.endm
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index adbccd0bbb7..78d16a554db 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -50,11 +50,9 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
*/
ENTRY(csum_partial)
CFI_STARTPROC
- pushl %esi
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %esi
CFI_REL_OFFSET esi, 0
- pushl %ebx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
movl 20(%esp),%eax # Function arg: unsigned int sum
movl 16(%esp),%ecx # Function arg: int len
@@ -132,11 +130,9 @@ ENTRY(csum_partial)
jz 8f
roll $8, %eax
8:
- popl %ebx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ebx
CFI_RESTORE ebx
- popl %esi
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %esi
CFI_RESTORE esi
ret
CFI_ENDPROC
@@ -148,11 +144,9 @@ ENDPROC(csum_partial)
ENTRY(csum_partial)
CFI_STARTPROC
- pushl %esi
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %esi
CFI_REL_OFFSET esi, 0
- pushl %ebx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
movl 20(%esp),%eax # Function arg: unsigned int sum
movl 16(%esp),%ecx # Function arg: int len
@@ -260,11 +254,9 @@ ENTRY(csum_partial)
jz 90f
roll $8, %eax
90:
- popl %ebx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ebx
CFI_RESTORE ebx
- popl %esi
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %esi
CFI_RESTORE esi
ret
CFI_ENDPROC
@@ -309,14 +301,11 @@ ENTRY(csum_partial_copy_generic)
CFI_STARTPROC
subl $4,%esp
CFI_ADJUST_CFA_OFFSET 4
- pushl %edi
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %edi
CFI_REL_OFFSET edi, 0
- pushl %esi
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %esi
CFI_REL_OFFSET esi, 0
- pushl %ebx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
movl ARGBASE+16(%esp),%eax # sum
movl ARGBASE+12(%esp),%ecx # len
@@ -426,17 +415,13 @@ DST( movb %cl, (%edi) )
.previous
- popl %ebx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ebx
CFI_RESTORE ebx
- popl %esi
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %esi
CFI_RESTORE esi
- popl %edi
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %edi
CFI_RESTORE edi
- popl %ecx # equivalent to addl $4,%esp
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ecx # equivalent to addl $4,%esp
ret
CFI_ENDPROC
ENDPROC(csum_partial_copy_generic)
@@ -459,14 +444,11 @@ ENDPROC(csum_partial_copy_generic)
ENTRY(csum_partial_copy_generic)
CFI_STARTPROC
- pushl %ebx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
- pushl %edi
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %edi
CFI_REL_OFFSET edi, 0
- pushl %esi
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %esi
CFI_REL_OFFSET esi, 0
movl ARGBASE+4(%esp),%esi #src
movl ARGBASE+8(%esp),%edi #dst
@@ -527,14 +509,11 @@ DST( movb %dl, (%edi) )
jmp 7b
.previous
- popl %esi
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %esi
CFI_RESTORE esi
- popl %edi
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %edi
CFI_RESTORE edi
- popl %ebx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ebx
CFI_RESTORE ebx
ret
CFI_ENDPROC
diff --git a/arch/x86/lib/rwsem_64.S b/arch/x86/lib/rwsem_64.S
index 41fcf00e49d..67743977398 100644
--- a/arch/x86/lib/rwsem_64.S
+++ b/arch/x86/lib/rwsem_64.S
@@ -23,43 +23,50 @@
#include <asm/dwarf2.h>
#define save_common_regs \
- pushq %rdi; \
- pushq %rsi; \
- pushq %rcx; \
- pushq %r8; \
- pushq %r9; \
- pushq %r10; \
- pushq %r11
+ pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \
+ pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \
+ pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \
+ pushq_cfi %r8; CFI_REL_OFFSET r8, 0; \
+ pushq_cfi %r9; CFI_REL_OFFSET r9, 0; \
+ pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \
+ pushq_cfi %r11; CFI_REL_OFFSET r11, 0
#define restore_common_regs \
- popq %r11; \
- popq %r10; \
- popq %r9; \
- popq %r8; \
- popq %rcx; \
- popq %rsi; \
- popq %rdi
+ popq_cfi %r11; CFI_RESTORE r11; \
+ popq_cfi %r10; CFI_RESTORE r10; \
+ popq_cfi %r9; CFI_RESTORE r9; \
+ popq_cfi %r8; CFI_RESTORE r8; \
+ popq_cfi %rcx; CFI_RESTORE rcx; \
+ popq_cfi %rsi; CFI_RESTORE rsi; \
+ popq_cfi %rdi; CFI_RESTORE rdi
/* Fix up special calling conventions */
ENTRY(call_rwsem_down_read_failed)
+ CFI_STARTPROC
save_common_regs
- pushq %rdx
+ pushq_cfi %rdx
+ CFI_REL_OFFSET rdx, 0
movq %rax,%rdi
call rwsem_down_read_failed
- popq %rdx
+ popq_cfi %rdx
+ CFI_RESTORE rdx
restore_common_regs
ret
- ENDPROC(call_rwsem_down_read_failed)
+ CFI_ENDPROC
+ENDPROC(call_rwsem_down_read_failed)
ENTRY(call_rwsem_down_write_failed)
+ CFI_STARTPROC
save_common_regs
movq %rax,%rdi
call rwsem_down_write_failed
restore_common_regs
ret
- ENDPROC(call_rwsem_down_write_failed)
+ CFI_ENDPROC
+ENDPROC(call_rwsem_down_write_failed)
ENTRY(call_rwsem_wake)
+ CFI_STARTPROC
decl %edx /* do nothing if still outstanding active readers */
jnz 1f
save_common_regs
@@ -67,15 +74,20 @@ ENTRY(call_rwsem_wake)
call rwsem_wake
restore_common_regs
1: ret
- ENDPROC(call_rwsem_wake)
+ CFI_ENDPROC
+ENDPROC(call_rwsem_wake)
/* Fix up special calling conventions */
ENTRY(call_rwsem_downgrade_wake)
+ CFI_STARTPROC
save_common_regs
- pushq %rdx
+ pushq_cfi %rdx
+ CFI_REL_OFFSET rdx, 0
movq %rax,%rdi
call rwsem_downgrade_wake
- popq %rdx
+ popq_cfi %rdx
+ CFI_RESTORE rdx
restore_common_regs
ret
- ENDPROC(call_rwsem_downgrade_wake)
+ CFI_ENDPROC
+ENDPROC(call_rwsem_downgrade_wake)
diff --git a/arch/x86/lib/semaphore_32.S b/arch/x86/lib/semaphore_32.S
index 648fe474178..06691daa410 100644
--- a/arch/x86/lib/semaphore_32.S
+++ b/arch/x86/lib/semaphore_32.S
@@ -36,7 +36,7 @@
*/
#ifdef CONFIG_SMP
ENTRY(__write_lock_failed)
- CFI_STARTPROC simple
+ CFI_STARTPROC
FRAME
2: LOCK_PREFIX
addl $ RW_LOCK_BIAS,(%eax)
@@ -74,29 +74,23 @@ ENTRY(__read_lock_failed)
/* Fix up special calling conventions */
ENTRY(call_rwsem_down_read_failed)
CFI_STARTPROC
- push %ecx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ecx
CFI_REL_OFFSET ecx,0
- push %edx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %edx
CFI_REL_OFFSET edx,0
call rwsem_down_read_failed
- pop %edx
- CFI_ADJUST_CFA_OFFSET -4
- pop %ecx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %edx
+ popl_cfi %ecx
ret
CFI_ENDPROC
ENDPROC(call_rwsem_down_read_failed)
ENTRY(call_rwsem_down_write_failed)
CFI_STARTPROC
- push %ecx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ecx
CFI_REL_OFFSET ecx,0
calll rwsem_down_write_failed
- pop %ecx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ecx
ret
CFI_ENDPROC
ENDPROC(call_rwsem_down_write_failed)
@@ -105,12 +99,10 @@ ENTRY(call_rwsem_wake)
CFI_STARTPROC
decw %dx /* do nothing if still outstanding active readers */
jnz 1f
- push %ecx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ecx
CFI_REL_OFFSET ecx,0
call rwsem_wake
- pop %ecx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ecx
1: ret
CFI_ENDPROC
ENDPROC(call_rwsem_wake)
@@ -118,17 +110,13 @@ ENTRY(call_rwsem_wake)
/* Fix up special calling conventions */
ENTRY(call_rwsem_downgrade_wake)
CFI_STARTPROC
- push %ecx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ecx
CFI_REL_OFFSET ecx,0
- push %edx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %edx
CFI_REL_OFFSET edx,0
call rwsem_downgrade_wake
- pop %edx
- CFI_ADJUST_CFA_OFFSET -4
- pop %ecx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %edx
+ popl_cfi %ecx
ret
CFI_ENDPROC
ENDPROC(call_rwsem_downgrade_wake)
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
index 650b11e00ec..2930ae05d77 100644
--- a/arch/x86/lib/thunk_32.S
+++ b/arch/x86/lib/thunk_32.S
@@ -7,24 +7,6 @@
#include <linux/linkage.h>
-#define ARCH_TRACE_IRQS_ON \
- pushl %eax; \
- pushl %ecx; \
- pushl %edx; \
- call trace_hardirqs_on; \
- popl %edx; \
- popl %ecx; \
- popl %eax;
-
-#define ARCH_TRACE_IRQS_OFF \
- pushl %eax; \
- pushl %ecx; \
- pushl %edx; \
- call trace_hardirqs_off; \
- popl %edx; \
- popl %ecx; \
- popl %eax;
-
#ifdef CONFIG_TRACE_IRQFLAGS
/* put return address in eax (arg1) */
.macro thunk_ra name,func
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index bf9a7d5a542..782b082c9ff 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -22,26 +22,6 @@
CFI_ENDPROC
.endm
- /* rdi: arg1 ... normal C conventions. rax is passed from C. */
- .macro thunk_retrax name,func
- .globl \name
-\name:
- CFI_STARTPROC
- SAVE_ARGS
- call \func
- jmp restore_norax
- CFI_ENDPROC
- .endm
-
-
- .section .sched.text, "ax"
-#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
- thunk rwsem_down_read_failed_thunk,rwsem_down_read_failed
- thunk rwsem_down_write_failed_thunk,rwsem_down_write_failed
- thunk rwsem_wake_thunk,rwsem_wake
- thunk rwsem_downgrade_thunk,rwsem_downgrade_wake
-#endif
-
#ifdef CONFIG_TRACE_IRQFLAGS
/* put return address in rdi (arg1) */
.macro thunk_ra name,func
@@ -72,10 +52,3 @@ restore:
RESTORE_ARGS
ret
CFI_ENDPROC
-
- CFI_STARTPROC
- SAVE_ARGS
-restore_norax:
- RESTORE_ARGS 1
- ret
- CFI_ENDPROC
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 7d90ceb882a..20e3f8702d1 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -229,15 +229,14 @@ void vmalloc_sync_all(void)
for (address = VMALLOC_START & PMD_MASK;
address >= TASK_SIZE && address < FIXADDR_TOP;
address += PMD_SIZE) {
-
- unsigned long flags;
struct page *page;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
list_for_each_entry(page, &pgd_list, lru) {
spinlock_t *pgt_lock;
pmd_t *ret;
+ /* the pgt_lock only for Xen */
pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
spin_lock(pgt_lock);
@@ -247,7 +246,7 @@ void vmalloc_sync_all(void)
if (!ret)
break;
}
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
}
@@ -828,6 +827,13 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
unsigned long address, unsigned int fault)
{
if (fault & VM_FAULT_OOM) {
+ /* Kernel mode? Handle exceptions or die: */
+ if (!(error_code & PF_USER)) {
+ up_read(&current->mm->mmap_sem);
+ no_context(regs, error_code, address);
+ return;
+ }
+
out_of_memory(regs, error_code, address);
} else {
if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 71a59296af8..c14a5422e15 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -105,18 +105,18 @@ void sync_global_pgds(unsigned long start, unsigned long end)
for (address = start; address <= end; address += PGDIR_SIZE) {
const pgd_t *pgd_ref = pgd_offset_k(address);
- unsigned long flags;
struct page *page;
if (pgd_none(*pgd_ref))
continue;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
list_for_each_entry(page, &pgd_list, lru) {
pgd_t *pgd;
spinlock_t *pgt_lock;
pgd = (pgd_t *)page_address(page) + pgd_index(address);
+ /* the pgt_lock only for Xen */
pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
spin_lock(pgt_lock);
@@ -128,7 +128,7 @@ void sync_global_pgds(unsigned long start, unsigned long end)
spin_unlock(pgt_lock);
}
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
}
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 95ea1551eeb..1337c51b07d 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -780,11 +780,7 @@ void __cpuinit numa_add_cpu(int cpu)
int physnid;
int nid = NUMA_NO_NODE;
- apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
- if (apicid != BAD_APICID)
- nid = apicid_to_node[apicid];
- if (nid == NUMA_NO_NODE)
- nid = early_cpu_to_node(cpu);
+ nid = early_cpu_to_node(cpu);
BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
/*
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index d343b3c81f3..90825f2eb0f 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -57,12 +57,10 @@ static unsigned long direct_pages_count[PG_LEVEL_NUM];
void update_page_count(int level, unsigned long pages)
{
- unsigned long flags;
-
/* Protect against CPA */
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
direct_pages_count[level] += pages;
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
static void split_page_count(int level)
@@ -394,7 +392,7 @@ static int
try_preserve_large_page(pte_t *kpte, unsigned long address,
struct cpa_data *cpa)
{
- unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn;
+ unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn;
pte_t new_pte, old_pte, *tmp;
pgprot_t old_prot, new_prot, req_prot;
int i, do_split = 1;
@@ -403,7 +401,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
if (cpa->force_split)
return 1;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
/*
* Check for races, another CPU might have split this page
* up already:
@@ -498,14 +496,14 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
}
out_unlock:
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
return do_split;
}
static int split_large_page(pte_t *kpte, unsigned long address)
{
- unsigned long flags, pfn, pfninc = 1;
+ unsigned long pfn, pfninc = 1;
unsigned int i, level;
pte_t *pbase, *tmp;
pgprot_t ref_prot;
@@ -519,7 +517,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
if (!base)
return -ENOMEM;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
/*
* Check for races, another CPU might have split this page
* up for us already:
@@ -591,7 +589,7 @@ out_unlock:
*/
if (base)
__free_page(base);
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
return 0;
}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 500242d3c96..0113d19c8aa 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -121,14 +121,12 @@ static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
static void pgd_dtor(pgd_t *pgd)
{
- unsigned long flags; /* can be called from interrupt context */
-
if (SHARED_KERNEL_PMD)
return;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
pgd_list_del(pgd);
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
/*
@@ -260,7 +258,6 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd;
pmd_t *pmds[PREALLOCATED_PMDS];
- unsigned long flags;
pgd = (pgd_t *)__get_free_page(PGALLOC_GFP);
@@ -280,12 +277,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
* respect to anything walking the pgd_list, so that they
* never see a partially populated pgd.
*/
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
pgd_ctor(mm, pgd);
pgd_prepopulate_pmd(mm, pgd, pmds);
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
return pgd;
diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c
index 85b68ef5e80..9260b3eb18d 100644
--- a/arch/x86/pci/ce4100.c
+++ b/arch/x86/pci/ce4100.c
@@ -34,6 +34,7 @@
#include <linux/pci.h>
#include <linux/init.h>
+#include <asm/ce4100.h>
#include <asm/pci_x86.h>
struct sim_reg {
@@ -306,10 +307,10 @@ struct pci_raw_ops ce4100_pci_conf = {
.write = ce4100_conf_write,
};
-static int __init ce4100_pci_init(void)
+int __init ce4100_pci_init(void)
{
init_sim_regs();
raw_pci_ops = &ce4100_pci_conf;
- return 0;
+ /* Indicate caller that it should invoke pci_legacy_init() */
+ return 1;
}
-subsys_initcall(ce4100_pci_init);
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 2bdcc36e316..8c4085a95ef 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -20,7 +20,8 @@
#include <asm/xen/pci.h>
#ifdef CONFIG_ACPI
-static int xen_hvm_register_pirq(u32 gsi, int triggering)
+static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
+ int trigger, int polarity)
{
int rc, irq;
struct physdev_map_pirq map_irq;
@@ -41,7 +42,7 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering)
return -1;
}
- if (triggering == ACPI_EDGE_SENSITIVE) {
+ if (trigger == ACPI_EDGE_SENSITIVE) {
shareable = 0;
name = "ioapic-edge";
} else {
@@ -55,12 +56,6 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering)
return irq;
}
-
-static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
- int trigger, int polarity)
-{
- return xen_hvm_register_pirq(gsi, trigger);
-}
#endif
#if defined(CONFIG_PCI_MSI)
@@ -91,7 +86,7 @@ static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq,
static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
- int irq, pirq, ret = 0;
+ int irq, pirq;
struct msi_desc *msidesc;
struct msi_msg msg;
@@ -99,39 +94,32 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
__read_msi_msg(msidesc, &msg);
pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) |
((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff);
- if (xen_irq_from_pirq(pirq) >= 0 && msg.data == XEN_PIRQ_MSI_DATA) {
- xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
- "msi-x" : "msi", &irq, &pirq, XEN_ALLOC_IRQ);
- if (irq < 0)
+ if (msg.data != XEN_PIRQ_MSI_DATA ||
+ xen_irq_from_pirq(pirq) < 0) {
+ pirq = xen_allocate_pirq_msi(dev, msidesc);
+ if (pirq < 0)
goto error;
- ret = irq_set_msi_desc(irq, msidesc);
- if (ret < 0)
- goto error_while;
- printk(KERN_DEBUG "xen: msi already setup: msi --> irq=%d"
- " pirq=%d\n", irq, pirq);
- return 0;
+ xen_msi_compose_msg(dev, pirq, &msg);
+ __write_msi_msg(msidesc, &msg);
+ dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
+ } else {
+ dev_dbg(&dev->dev,
+ "xen: msi already bound to pirq=%d\n", pirq);
}
- xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
- "msi-x" : "msi", &irq, &pirq, (XEN_ALLOC_IRQ | XEN_ALLOC_PIRQ));
- if (irq < 0 || pirq < 0)
+ irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0,
+ (type == PCI_CAP_ID_MSIX) ?
+ "msi-x" : "msi");
+ if (irq < 0)
goto error;
- printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq);
- xen_msi_compose_msg(dev, pirq, &msg);
- ret = irq_set_msi_desc(irq, msidesc);
- if (ret < 0)
- goto error_while;
- write_msi_msg(irq, &msg);
+ dev_dbg(&dev->dev,
+ "xen: msi --> pirq=%d --> irq=%d\n", pirq, irq);
}
return 0;
-error_while:
- unbind_from_irqhandler(irq, NULL);
error:
- if (ret == -ENODEV)
- dev_err(&dev->dev, "Xen PCI frontend has not registered" \
- " MSI/MSI-X support!\n");
-
- return ret;
+ dev_err(&dev->dev,
+ "Xen PCI frontend has not registered MSI/MSI-X support!\n");
+ return -ENODEV;
}
/*
@@ -150,35 +138,26 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
return -ENOMEM;
if (type == PCI_CAP_ID_MSIX)
- ret = xen_pci_frontend_enable_msix(dev, &v, nvec);
+ ret = xen_pci_frontend_enable_msix(dev, v, nvec);
else
- ret = xen_pci_frontend_enable_msi(dev, &v);
+ ret = xen_pci_frontend_enable_msi(dev, v);
if (ret)
goto error;
i = 0;
list_for_each_entry(msidesc, &dev->msi_list, list) {
- irq = xen_allocate_pirq(v[i], 0, /* not sharable */
- (type == PCI_CAP_ID_MSIX) ?
- "pcifront-msi-x" : "pcifront-msi");
- if (irq < 0) {
- ret = -1;
+ irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0,
+ (type == PCI_CAP_ID_MSIX) ?
+ "pcifront-msi-x" :
+ "pcifront-msi");
+ if (irq < 0)
goto free;
- }
-
- ret = irq_set_msi_desc(irq, msidesc);
- if (ret)
- goto error_while;
i++;
}
kfree(v);
return 0;
-error_while:
- unbind_from_irqhandler(irq, NULL);
error:
- if (ret == -ENODEV)
- dev_err(&dev->dev, "Xen PCI frontend has not registered" \
- " MSI/MSI-X support!\n");
+ dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n");
free:
kfree(v);
return ret;
@@ -193,6 +172,9 @@ static void xen_teardown_msi_irqs(struct pci_dev *dev)
xen_pci_frontend_disable_msix(dev);
else
xen_pci_frontend_disable_msi(dev);
+
+ /* Free the IRQ's and the msidesc using the generic code. */
+ default_teardown_msi_irqs(dev);
}
static void xen_teardown_msi_irq(unsigned int irq)
@@ -200,47 +182,82 @@ static void xen_teardown_msi_irq(unsigned int irq)
xen_destroy_irq(irq);
}
+#ifdef CONFIG_XEN_DOM0
static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
- int irq, ret;
+ int ret = 0;
struct msi_desc *msidesc;
list_for_each_entry(msidesc, &dev->msi_list, list) {
- irq = xen_create_msi_irq(dev, msidesc, type);
- if (irq < 0)
- return -1;
+ struct physdev_map_pirq map_irq;
- ret = irq_set_msi_desc(irq, msidesc);
- if (ret)
- goto error;
- }
- return 0;
+ memset(&map_irq, 0, sizeof(map_irq));
+ map_irq.domid = DOMID_SELF;
+ map_irq.type = MAP_PIRQ_TYPE_MSI;
+ map_irq.index = -1;
+ map_irq.pirq = -1;
+ map_irq.bus = dev->bus->number;
+ map_irq.devfn = dev->devfn;
-error:
- xen_destroy_irq(irq);
+ if (type == PCI_CAP_ID_MSIX) {
+ int pos;
+ u32 table_offset, bir;
+
+ pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
+
+ pci_read_config_dword(dev, pos + PCI_MSIX_TABLE,
+ &table_offset);
+ bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
+
+ map_irq.table_base = pci_resource_start(dev, bir);
+ map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
+ }
+
+ ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
+ if (ret) {
+ dev_warn(&dev->dev, "xen map irq failed %d\n", ret);
+ goto out;
+ }
+
+ ret = xen_bind_pirq_msi_to_irq(dev, msidesc,
+ map_irq.pirq, map_irq.index,
+ (type == PCI_CAP_ID_MSIX) ?
+ "msi-x" : "msi");
+ if (ret < 0)
+ goto out;
+ }
+ ret = 0;
+out:
return ret;
}
#endif
+#endif
static int xen_pcifront_enable_irq(struct pci_dev *dev)
{
int rc;
int share = 1;
+ u8 gsi;
- dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq);
-
- if (dev->irq < 0)
- return -EINVAL;
+ rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi);
+ if (rc < 0) {
+ dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n",
+ rc);
+ return rc;
+ }
- if (dev->irq < NR_IRQS_LEGACY)
+ if (gsi < NR_IRQS_LEGACY)
share = 0;
- rc = xen_allocate_pirq(dev->irq, share, "pcifront");
+ rc = xen_allocate_pirq(gsi, share, "pcifront");
if (rc < 0) {
- dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n",
- dev->irq, rc);
+ dev_warn(&dev->dev, "Xen PCI: failed to register GSI%d: %d\n",
+ gsi, rc);
return rc;
}
+
+ dev->irq = rc;
+ dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq);
return 0;
}
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
index d2c0d51a717..cd6f184c3b3 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -15,6 +15,7 @@
#include <linux/serial_reg.h>
#include <linux/serial_8250.h>
+#include <asm/ce4100.h>
#include <asm/setup.h>
#include <asm/io.h>
@@ -129,4 +130,5 @@ void __init x86_ce4100_early_setup(void)
x86_init.resources.probe_roms = x86_init_noop;
x86_init.mpparse.get_smp_config = x86_init_uint_noop;
x86_init.mpparse.find_smp_config = sdv_find_smp_config;
+ x86_init.pci.init = ce4100_pci_init;
}
diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c
index dab87464753..044bda5b317 100644
--- a/arch/x86/platform/olpc/olpc_dt.c
+++ b/arch/x86/platform/olpc/olpc_dt.c
@@ -140,8 +140,7 @@ void * __init prom_early_alloc(unsigned long size)
* wasted bootmem) and hand off chunks of it to callers.
*/
res = alloc_bootmem(chunk_size);
- if (!res)
- return NULL;
+ BUG_ON(!res);
prom_early_allocated += chunk_size;
memset(res, 0, chunk_size);
free_mem = chunk_size;
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index df58e9cad96..a7b38d35c29 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1364,11 +1364,11 @@ uv_activation_descriptor_init(int node, int pnode)
memset(bd2, 0, sizeof(struct bau_desc));
bd2->header.sw_ack_flag = 1;
/*
- * base_dest_nodeid is the nasid (pnode<<1) of the first uvhub
+ * base_dest_nodeid is the nasid of the first uvhub
* in the partition. The bit map will indicate uvhub numbers,
* which are 0-N in a partition. Pnodes are unique system-wide.
*/
- bd2->header.base_dest_nodeid = uv_partition_base_pnode << 1;
+ bd2->header.base_dest_nodeid = UV_PNODE_TO_NASID(uv_partition_base_pnode);
bd2->header.dest_subnodeid = 0x10; /* the LB */
bd2->header.command = UV_NET_ENDPOINT_INTD;
bd2->header.int_both = 1;
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 5b54892e4bc..e4343fe488e 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -48,3 +48,11 @@ config XEN_DEBUG_FS
help
Enable statistics output and various tuning options in debugfs.
Enabling this option may incur a significant performance overhead.
+
+config XEN_DEBUG
+ bool "Enable Xen debug checks"
+ depends on XEN
+ default n
+ help
+ Enable various WARN_ON checks in the Xen MMU code.
+ Enabling this option WILL incur a significant performance overhead.
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 50542efe45f..49dbd78ec3c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1284,15 +1284,14 @@ static int init_hvm_pv_info(int *major, int *minor)
xen_setup_features();
- pv_info = xen_info;
- pv_info.kernel_rpl = 0;
+ pv_info.name = "Xen HVM";
xen_domain_type = XEN_HVM_DOMAIN;
return 0;
}
-void xen_hvm_init_shared_info(void)
+void __ref xen_hvm_init_shared_info(void)
{
int cpu;
struct xen_add_to_physmap xatp;
@@ -1331,6 +1330,8 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
switch (action) {
case CPU_UP_PREPARE:
per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+ if (xen_have_vector_callback)
+ xen_init_lock_cpu(cpu);
break;
default:
break;
@@ -1355,6 +1356,7 @@ static void __init xen_hvm_guest_init(void)
if (xen_feature(XENFEAT_hvm_callback_vector))
xen_have_vector_callback = 1;
+ xen_hvm_smp_init();
register_cpu_notifier(&xen_hvm_cpu_notifier);
xen_unplug_emulated_devices();
have_vcpu_info_placement = 0;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 5e92b61ad57..832765c0fb8 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -46,6 +46,7 @@
#include <linux/module.h>
#include <linux/gfp.h>
#include <linux/memblock.h>
+#include <linux/seq_file.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
@@ -416,8 +417,12 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
if (val & _PAGE_PRESENT) {
unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
pteval_t flags = val & PTE_FLAGS_MASK;
- unsigned long mfn = pfn_to_mfn(pfn);
+ unsigned long mfn;
+ if (!xen_feature(XENFEAT_auto_translated_physmap))
+ mfn = get_phys_to_machine(pfn);
+ else
+ mfn = pfn;
/*
* If there's no mfn for the pfn, then just create an
* empty non-present pte. Unfortunately this loses
@@ -427,8 +432,18 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
if (unlikely(mfn == INVALID_P2M_ENTRY)) {
mfn = 0;
flags = 0;
+ } else {
+ /*
+ * Paramount to do this test _after_ the
+ * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY &
+ * IDENTITY_FRAME_BIT resolves to true.
+ */
+ mfn &= ~FOREIGN_FRAME_BIT;
+ if (mfn & IDENTITY_FRAME_BIT) {
+ mfn &= ~IDENTITY_FRAME_BIT;
+ flags |= _PAGE_IOMAP;
+ }
}
-
val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
}
@@ -532,6 +547,41 @@ pte_t xen_make_pte(pteval_t pte)
}
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
+#ifdef CONFIG_XEN_DEBUG
+pte_t xen_make_pte_debug(pteval_t pte)
+{
+ phys_addr_t addr = (pte & PTE_PFN_MASK);
+ phys_addr_t other_addr;
+ bool io_page = false;
+ pte_t _pte;
+
+ if (pte & _PAGE_IOMAP)
+ io_page = true;
+
+ _pte = xen_make_pte(pte);
+
+ if (!addr)
+ return _pte;
+
+ if (io_page &&
+ (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
+ other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT;
+ WARN(addr != other_addr,
+ "0x%lx is using VM_IO, but it is 0x%lx!\n",
+ (unsigned long)addr, (unsigned long)other_addr);
+ } else {
+ pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP;
+ other_addr = (_pte.pte & PTE_PFN_MASK);
+ WARN((addr == other_addr) && (!io_page) && (!iomap_set),
+ "0x%lx is missing VM_IO (and wasn't fixed)!\n",
+ (unsigned long)addr);
+ }
+
+ return _pte;
+}
+PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug);
+#endif
+
pgd_t xen_make_pgd(pgdval_t pgd)
{
pgd = pte_pfn_to_mfn(pgd);
@@ -986,10 +1036,9 @@ static void xen_pgd_pin(struct mm_struct *mm)
*/
void xen_mm_pin_all(void)
{
- unsigned long flags;
struct page *page;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
list_for_each_entry(page, &pgd_list, lru) {
if (!PagePinned(page)) {
@@ -998,7 +1047,7 @@ void xen_mm_pin_all(void)
}
}
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
/*
@@ -1099,10 +1148,9 @@ static void xen_pgd_unpin(struct mm_struct *mm)
*/
void xen_mm_unpin_all(void)
{
- unsigned long flags;
struct page *page;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
list_for_each_entry(page, &pgd_list, lru) {
if (PageSavePinned(page)) {
@@ -1112,7 +1160,7 @@ void xen_mm_unpin_all(void)
}
}
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
@@ -1942,6 +1990,9 @@ __init void xen_ident_map_ISA(void)
static __init void xen_post_allocator_init(void)
{
+#ifdef CONFIG_XEN_DEBUG
+ pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
+#endif
pv_mmu_ops.set_pte = xen_set_pte;
pv_mmu_ops.set_pmd = xen_set_pmd;
pv_mmu_ops.set_pud = xen_set_pud;
@@ -2074,7 +2125,7 @@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
in_frames[i] = virt_to_mfn(vaddr);
MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
- set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
+ __set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
if (out_frames)
out_frames[i] = virt_to_pfn(vaddr);
@@ -2353,6 +2404,18 @@ EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
#ifdef CONFIG_XEN_DEBUG_FS
+static int p2m_dump_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, p2m_dump_show, NULL);
+}
+
+static const struct file_operations p2m_dump_fops = {
+ .open = p2m_dump_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
static struct dentry *d_mmu_debug;
static int __init xen_mmu_debugfs(void)
@@ -2408,6 +2471,7 @@ static int __init xen_mmu_debugfs(void)
debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug,
&mmu_stats.prot_commit_batched);
+ debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
return 0;
}
fs_initcall(xen_mmu_debugfs);
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index fd12d7ce7ff..215a3ce6106 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -23,6 +23,129 @@
* P2M_PER_PAGE depends on the architecture, as a mfn is always
* unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to
* 512 and 1024 entries respectively.
+ *
+ * In short, these structures contain the Machine Frame Number (MFN) of the PFN.
+ *
+ * However not all entries are filled with MFNs. Specifically for all other
+ * leaf entries, or for the top root, or middle one, for which there is a void
+ * entry, we assume it is "missing". So (for example)
+ * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY.
+ *
+ * We also have the possibility of setting 1-1 mappings on certain regions, so
+ * that:
+ * pfn_to_mfn(0xc0000)=0xc0000
+ *
+ * The benefit of this is, that we can assume for non-RAM regions (think
+ * PCI BARs, or ACPI spaces), we can create mappings easily b/c we
+ * get the PFN value to match the MFN.
+ *
+ * For this to work efficiently we have one new page p2m_identity and
+ * allocate (via reserved_brk) any other pages we need to cover the sides
+ * (1GB or 4MB boundary violations). All entries in p2m_identity are set to
+ * INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs,
+ * no other fancy value).
+ *
+ * On lookup we spot that the entry points to p2m_identity and return the
+ * identity value instead of dereferencing and returning INVALID_P2M_ENTRY.
+ * If the entry points to an allocated page, we just proceed as before and
+ * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in
+ * appropriate functions (pfn_to_mfn).
+ *
+ * The reason for having the IDENTITY_FRAME_BIT instead of just returning the
+ * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a
+ * non-identity pfn. To protect ourselves against we elect to set (and get) the
+ * IDENTITY_FRAME_BIT on all identity mapped PFNs.
+ *
+ * This simplistic diagram is used to explain the more subtle piece of code.
+ * There is also a digram of the P2M at the end that can help.
+ * Imagine your E820 looking as so:
+ *
+ * 1GB 2GB
+ * /-------------------+---------\/----\ /----------\ /---+-----\
+ * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM |
+ * \-------------------+---------/\----/ \----------/ \---+-----/
+ * ^- 1029MB ^- 2001MB
+ *
+ * [1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100),
+ * 2048MB = 524288 (0x80000)]
+ *
+ * And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB
+ * is actually not present (would have to kick the balloon driver to put it in).
+ *
+ * When we are told to set the PFNs for identity mapping (see patch: "xen/setup:
+ * Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start
+ * of the PFN and the end PFN (263424 and 512256 respectively). The first step
+ * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page
+ * covers 512^2 of page estate (1GB) and in case the start or end PFN is not
+ * aligned on 512^2*PAGE_SIZE (1GB) we loop on aligned 1GB PFNs from start pfn
+ * to end pfn. We reserve_brk top leaf pages if they are missing (means they
+ * point to p2m_mid_missing).
+ *
+ * With the E820 example above, 263424 is not 1GB aligned so we allocate a
+ * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000.
+ * Each entry in the allocate page is "missing" (points to p2m_missing).
+ *
+ * Next stage is to determine if we need to do a more granular boundary check
+ * on the 4MB (or 2MB depending on architecture) off the start and end pfn's.
+ * We check if the start pfn and end pfn violate that boundary check, and if
+ * so reserve_brk a middle (p2m[x][y]) leaf page. This way we have a much finer
+ * granularity of setting which PFNs are missing and which ones are identity.
+ * In our example 263424 and 512256 both fail the check so we reserve_brk two
+ * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing"
+ * values) and assign them to p2m[1][2] and p2m[1][488] respectively.
+ *
+ * At this point we would at minimum reserve_brk one page, but could be up to
+ * three. Each call to set_phys_range_identity has at maximum a three page
+ * cost. If we were to query the P2M at this stage, all those entries from
+ * start PFN through end PFN (so 1029MB -> 2001MB) would return
+ * INVALID_P2M_ENTRY ("missing").
+ *
+ * The next step is to walk from the start pfn to the end pfn setting
+ * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity.
+ * If we find that the middle leaf is pointing to p2m_missing we can swap it
+ * over to p2m_identity - this way covering 4MB (or 2MB) PFN space. At this
+ * point we do not need to worry about boundary aligment (so no need to
+ * reserve_brk a middle page, figure out which PFNs are "missing" and which
+ * ones are identity), as that has been done earlier. If we find that the
+ * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference
+ * that page (which covers 512 PFNs) and set the appropriate PFN with
+ * IDENTITY_FRAME_BIT. In our example 263424 and 512256 end up there, and we
+ * set from p2m[1][2][256->511] and p2m[1][488][0->256] with
+ * IDENTITY_FRAME_BIT set.
+ *
+ * All other regions that are void (or not filled) either point to p2m_missing
+ * (considered missing) or have the default value of INVALID_P2M_ENTRY (also
+ * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511]
+ * contain the INVALID_P2M_ENTRY value and are considered "missing."
+ *
+ * This is what the p2m ends up looking (for the E820 above) with this
+ * fabulous drawing:
+ *
+ * p2m /--------------\
+ * /-----\ | &mfn_list[0],| /-----------------\
+ * | 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. |
+ * |-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] |
+ * | 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] |
+ * |-----| \ | [p2m_identity]+\\ | .... |
+ * | 2 |--\ \-------------------->| ... | \\ \----------------/
+ * |-----| \ \---------------/ \\
+ * | 3 |\ \ \\ p2m_identity
+ * |-----| \ \-------------------->/---------------\ /-----------------\
+ * | .. +->+ | [p2m_identity]+-->| ~0, ~0, ~0, ... |
+ * \-----/ / | [p2m_identity]+-->| ..., ~0 |
+ * / /---------------\ | .... | \-----------------/
+ * / | IDENTITY[@0] | /-+-[x], ~0, ~0.. |
+ * / | IDENTITY[@256]|<----/ \---------------/
+ * / | ~0, ~0, .... |
+ * | \---------------/
+ * |
+ * p2m_missing p2m_missing
+ * /------------------\ /------------\
+ * | [p2m_mid_missing]+---->| ~0, ~0, ~0 |
+ * | [p2m_mid_missing]+---->| ..., ~0 |
+ * \------------------/ \------------/
+ *
+ * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
*/
#include <linux/init.h>
@@ -30,6 +153,7 @@
#include <linux/list.h>
#include <linux/hash.h>
#include <linux/sched.h>
+#include <linux/seq_file.h>
#include <asm/cache.h>
#include <asm/setup.h>
@@ -59,9 +183,15 @@ static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);
+static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE);
+
RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
+/* We might hit two boundary violations at the start and end, at max each
+ * boundary violation will require three middle nodes. */
+RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3);
+
static inline unsigned p2m_top_index(unsigned long pfn)
{
BUG_ON(pfn >= MAX_P2M_PFN);
@@ -136,7 +266,7 @@ static void p2m_init(unsigned long *p2m)
* - After resume we're called from within stop_machine, but the mfn
* tree should alreay be completely allocated.
*/
-void xen_build_mfn_list_list(void)
+void __ref xen_build_mfn_list_list(void)
{
unsigned long pfn;
@@ -221,6 +351,9 @@ void __init xen_build_dynamic_phys_to_machine(void)
p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_top_init(p2m_top);
+ p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
+ p2m_init(p2m_identity);
+
/*
* The domain builder gives us a pre-constructed p2m array in
* mfn_list for all the pages initially given to us, so we just
@@ -266,6 +399,14 @@ unsigned long get_phys_to_machine(unsigned long pfn)
mididx = p2m_mid_index(pfn);
idx = p2m_index(pfn);
+ /*
+ * The INVALID_P2M_ENTRY is filled in both p2m_*identity
+ * and in p2m_*missing, so returning the INVALID_P2M_ENTRY
+ * would be wrong.
+ */
+ if (p2m_top[topidx][mididx] == p2m_identity)
+ return IDENTITY_FRAME(pfn);
+
return p2m_top[topidx][mididx][idx];
}
EXPORT_SYMBOL_GPL(get_phys_to_machine);
@@ -335,9 +476,11 @@ static bool alloc_p2m(unsigned long pfn)
p2m_top_mfn_p[topidx] = mid_mfn;
}
- if (p2m_top[topidx][mididx] == p2m_missing) {
+ if (p2m_top[topidx][mididx] == p2m_identity ||
+ p2m_top[topidx][mididx] == p2m_missing) {
/* p2m leaf page is missing */
unsigned long *p2m;
+ unsigned long *p2m_orig = p2m_top[topidx][mididx];
p2m = alloc_p2m_page();
if (!p2m)
@@ -345,7 +488,7 @@ static bool alloc_p2m(unsigned long pfn)
p2m_init(p2m);
- if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing)
+ if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig)
free_p2m_page(p2m);
else
mid_mfn[mididx] = virt_to_mfn(p2m);
@@ -354,11 +497,91 @@ static bool alloc_p2m(unsigned long pfn)
return true;
}
+bool __early_alloc_p2m(unsigned long pfn)
+{
+ unsigned topidx, mididx, idx;
+
+ topidx = p2m_top_index(pfn);
+ mididx = p2m_mid_index(pfn);
+ idx = p2m_index(pfn);
+
+ /* Pfff.. No boundary cross-over, lets get out. */
+ if (!idx)
+ return false;
+
+ WARN(p2m_top[topidx][mididx] == p2m_identity,
+ "P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n",
+ topidx, mididx);
+
+ /*
+ * Could be done by xen_build_dynamic_phys_to_machine..
+ */
+ if (p2m_top[topidx][mididx] != p2m_missing)
+ return false;
+
+ /* Boundary cross-over for the edges: */
+ if (idx) {
+ unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
+
+ p2m_init(p2m);
+
+ p2m_top[topidx][mididx] = p2m;
+
+ }
+ return idx != 0;
+}
+unsigned long set_phys_range_identity(unsigned long pfn_s,
+ unsigned long pfn_e)
+{
+ unsigned long pfn;
+
+ if (unlikely(pfn_s >= MAX_P2M_PFN || pfn_e >= MAX_P2M_PFN))
+ return 0;
+
+ if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
+ return pfn_e - pfn_s;
+
+ if (pfn_s > pfn_e)
+ return 0;
+
+ for (pfn = (pfn_s & ~(P2M_MID_PER_PAGE * P2M_PER_PAGE - 1));
+ pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE));
+ pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE)
+ {
+ unsigned topidx = p2m_top_index(pfn);
+ if (p2m_top[topidx] == p2m_mid_missing) {
+ unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
+
+ p2m_mid_init(mid);
+
+ p2m_top[topidx] = mid;
+ }
+ }
+
+ __early_alloc_p2m(pfn_s);
+ __early_alloc_p2m(pfn_e);
+
+ for (pfn = pfn_s; pfn < pfn_e; pfn++)
+ if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn)))
+ break;
+
+ if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s),
+ "Identity mapping failed. We are %ld short of 1-1 mappings!\n",
+ (pfn_e - pfn_s) - (pfn - pfn_s)))
+ printk(KERN_DEBUG "1-1 mapping on %lx->%lx\n", pfn_s, pfn);
+
+ return pfn - pfn_s;
+}
+
/* Try to install p2m mapping; fail if intermediate bits missing */
bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
unsigned topidx, mididx, idx;
+ if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
+ BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
+ return true;
+ }
if (unlikely(pfn >= MAX_P2M_PFN)) {
BUG_ON(mfn != INVALID_P2M_ENTRY);
return true;
@@ -368,6 +591,21 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
mididx = p2m_mid_index(pfn);
idx = p2m_index(pfn);
+ /* For sparse holes were the p2m leaf has real PFN along with
+ * PCI holes, stick in the PFN as the MFN value.
+ */
+ if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) {
+ if (p2m_top[topidx][mididx] == p2m_identity)
+ return true;
+
+ /* Swap over from MISSING to IDENTITY if needed. */
+ if (p2m_top[topidx][mididx] == p2m_missing) {
+ WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing,
+ p2m_identity) != p2m_missing);
+ return true;
+ }
+ }
+
if (p2m_top[topidx][mididx] == p2m_missing)
return mfn == INVALID_P2M_ENTRY;
@@ -378,11 +616,6 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
- if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
- BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
- return true;
- }
-
if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
if (!alloc_p2m(pfn))
return false;
@@ -421,7 +654,7 @@ int m2p_add_override(unsigned long mfn, struct page *page)
{
unsigned long flags;
unsigned long pfn;
- unsigned long address;
+ unsigned long uninitialized_var(address);
unsigned level;
pte_t *ptep = NULL;
@@ -455,7 +688,7 @@ int m2p_remove_override(struct page *page)
unsigned long flags;
unsigned long mfn;
unsigned long pfn;
- unsigned long address;
+ unsigned long uninitialized_var(address);
unsigned level;
pte_t *ptep = NULL;
@@ -520,3 +753,80 @@ unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
return ret;
}
EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
+
+#ifdef CONFIG_XEN_DEBUG_FS
+
+int p2m_dump_show(struct seq_file *m, void *v)
+{
+ static const char * const level_name[] = { "top", "middle",
+ "entry", "abnormal" };
+ static const char * const type_name[] = { "identity", "missing",
+ "pfn", "abnormal"};
+#define TYPE_IDENTITY 0
+#define TYPE_MISSING 1
+#define TYPE_PFN 2
+#define TYPE_UNKNOWN 3
+ unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0;
+ unsigned int uninitialized_var(prev_level);
+ unsigned int uninitialized_var(prev_type);
+
+ if (!p2m_top)
+ return 0;
+
+ for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) {
+ unsigned topidx = p2m_top_index(pfn);
+ unsigned mididx = p2m_mid_index(pfn);
+ unsigned idx = p2m_index(pfn);
+ unsigned lvl, type;
+
+ lvl = 4;
+ type = TYPE_UNKNOWN;
+ if (p2m_top[topidx] == p2m_mid_missing) {
+ lvl = 0; type = TYPE_MISSING;
+ } else if (p2m_top[topidx] == NULL) {
+ lvl = 0; type = TYPE_UNKNOWN;
+ } else if (p2m_top[topidx][mididx] == NULL) {
+ lvl = 1; type = TYPE_UNKNOWN;
+ } else if (p2m_top[topidx][mididx] == p2m_identity) {
+ lvl = 1; type = TYPE_IDENTITY;
+ } else if (p2m_top[topidx][mididx] == p2m_missing) {
+ lvl = 1; type = TYPE_MISSING;
+ } else if (p2m_top[topidx][mididx][idx] == 0) {
+ lvl = 2; type = TYPE_UNKNOWN;
+ } else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) {
+ lvl = 2; type = TYPE_IDENTITY;
+ } else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) {
+ lvl = 2; type = TYPE_MISSING;
+ } else if (p2m_top[topidx][mididx][idx] == pfn) {
+ lvl = 2; type = TYPE_PFN;
+ } else if (p2m_top[topidx][mididx][idx] != pfn) {
+ lvl = 2; type = TYPE_PFN;
+ }
+ if (pfn == 0) {
+ prev_level = lvl;
+ prev_type = type;
+ }
+ if (pfn == MAX_DOMAIN_PAGES-1) {
+ lvl = 3;
+ type = TYPE_UNKNOWN;
+ }
+ if (prev_type != type) {
+ seq_printf(m, " [0x%lx->0x%lx] %s\n",
+ prev_pfn_type, pfn, type_name[prev_type]);
+ prev_pfn_type = pfn;
+ prev_type = type;
+ }
+ if (prev_level != lvl) {
+ seq_printf(m, " [0x%lx->0x%lx] level %s\n",
+ prev_pfn_level, pfn, level_name[prev_level]);
+ prev_pfn_level = pfn;
+ prev_level = lvl;
+ }
+ }
+ return 0;
+#undef TYPE_IDENTITY
+#undef TYPE_MISSING
+#undef TYPE_PFN
+#undef TYPE_UNKNOWN
+}
+#endif
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index a8a66a50d44..fa0269a9937 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -52,6 +52,8 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
static __init void xen_add_extra_mem(unsigned long pages)
{
+ unsigned long pfn;
+
u64 size = (u64)pages * PAGE_SIZE;
u64 extra_start = xen_extra_mem_start + xen_extra_mem_size;
@@ -66,6 +68,9 @@ static __init void xen_add_extra_mem(unsigned long pages)
xen_extra_mem_size += size;
xen_max_p2m_pfn = PFN_DOWN(extra_start + size);
+
+ for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++)
+ __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
}
static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
@@ -104,7 +109,7 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n",
start, end, ret);
if (ret == 1) {
- set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+ __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
len++;
}
}
@@ -138,12 +143,55 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
return released;
}
+static unsigned long __init xen_set_identity(const struct e820entry *list,
+ ssize_t map_size)
+{
+ phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS;
+ phys_addr_t start_pci = last;
+ const struct e820entry *entry;
+ unsigned long identity = 0;
+ int i;
+
+ for (i = 0, entry = list; i < map_size; i++, entry++) {
+ phys_addr_t start = entry->addr;
+ phys_addr_t end = start + entry->size;
+
+ if (start < last)
+ start = last;
+
+ if (end <= start)
+ continue;
+
+ /* Skip over the 1MB region. */
+ if (last > end)
+ continue;
+
+ if (entry->type == E820_RAM) {
+ if (start > start_pci)
+ identity += set_phys_range_identity(
+ PFN_UP(start_pci), PFN_DOWN(start));
+
+ /* Without saving 'last' we would gooble RAM too
+ * at the end of the loop. */
+ last = end;
+ start_pci = end;
+ continue;
+ }
+ start_pci = min(start, start_pci);
+ last = end;
+ }
+ if (last > start_pci)
+ identity += set_phys_range_identity(
+ PFN_UP(start_pci), PFN_DOWN(last));
+ return identity;
+}
/**
* machine_specific_memory_setup - Hook for machine specific memory setup.
**/
char * __init xen_memory_setup(void)
{
static struct e820entry map[E820MAX] __initdata;
+ static struct e820entry map_raw[E820MAX] __initdata;
unsigned long max_pfn = xen_start_info->nr_pages;
unsigned long long mem_end;
@@ -151,6 +199,7 @@ char * __init xen_memory_setup(void)
struct xen_memory_map memmap;
unsigned long extra_pages = 0;
unsigned long extra_limit;
+ unsigned long identity_pages = 0;
int i;
int op;
@@ -176,6 +225,7 @@ char * __init xen_memory_setup(void)
}
BUG_ON(rc);
+ memcpy(map_raw, map, sizeof(map));
e820.nr_map = 0;
xen_extra_mem_start = mem_end;
for (i = 0; i < memmap.nr_entries; i++) {
@@ -194,6 +244,15 @@ char * __init xen_memory_setup(void)
end -= delta;
extra_pages += PFN_DOWN(delta);
+ /*
+ * Set RAM below 4GB that is not for us to be unusable.
+ * This prevents "System RAM" address space from being
+ * used as potential resource for I/O address (happens
+ * when 'allocate_resource' is called).
+ */
+ if (delta &&
+ (xen_initial_domain() && end < 0x100000000ULL))
+ e820_add_region(end, delta, E820_UNUSABLE);
}
if (map[i].size > 0 && end > xen_extra_mem_start)
@@ -251,6 +310,13 @@ char * __init xen_memory_setup(void)
xen_add_extra_mem(extra_pages);
+ /*
+ * Set P2M for all non-RAM pages and E820 gaps to be identity
+ * type PFNs. We supply it with the non-sanitized version
+ * of the E820.
+ */
+ identity_pages = xen_set_identity(map_raw, memmap.nr_entries);
+ printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages);
return "Xen";
}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 72a4c795904..30612441ed9 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -509,3 +509,41 @@ void __init xen_smp_init(void)
xen_fill_possible_map();
xen_init_spinlocks();
}
+
+static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
+{
+ native_smp_prepare_cpus(max_cpus);
+ WARN_ON(xen_smp_intr_init(0));
+
+ if (!xen_have_vector_callback)
+ return;
+ xen_init_lock_cpu(0);
+ xen_init_spinlocks();
+}
+
+static int __cpuinit xen_hvm_cpu_up(unsigned int cpu)
+{
+ int rc;
+ rc = native_cpu_up(cpu);
+ WARN_ON (xen_smp_intr_init(cpu));
+ return rc;
+}
+
+static void xen_hvm_cpu_die(unsigned int cpu)
+{
+ unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
+ unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
+ unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
+ unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
+ native_cpu_die(cpu);
+}
+
+void __init xen_hvm_smp_init(void)
+{
+ smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
+ smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
+ smp_ops.cpu_up = xen_hvm_cpu_up;
+ smp_ops.cpu_die = xen_hvm_cpu_die;
+ smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi;
+ smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi;
+}
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 9bbd63a129b..45329c8c226 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -12,7 +12,7 @@
#include "xen-ops.h"
#include "mmu.h"
-void xen_pre_suspend(void)
+void xen_arch_pre_suspend(void)
{
xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
xen_start_info->console.domU.mfn =
@@ -26,8 +26,9 @@ void xen_pre_suspend(void)
BUG();
}
-void xen_hvm_post_suspend(int suspend_cancelled)
+void xen_arch_hvm_post_suspend(int suspend_cancelled)
{
+#ifdef CONFIG_XEN_PVHVM
int cpu;
xen_hvm_init_shared_info();
xen_callback_vector();
@@ -37,9 +38,10 @@ void xen_hvm_post_suspend(int suspend_cancelled)
xen_setup_runstate_info(cpu);
}
}
+#endif
}
-void xen_post_suspend(int suspend_cancelled)
+void xen_arch_post_suspend(int suspend_cancelled)
{
xen_build_mfn_list_list();
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 067759e3d6a..2e2d370a47b 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -397,7 +397,9 @@ void xen_setup_timer(int cpu)
name = "<timer kasprintf failed>";
irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
- IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER,
+ IRQF_DISABLED|IRQF_PERCPU|
+ IRQF_NOBALANCING|IRQF_TIMER|
+ IRQF_FORCE_RESUME,
name, NULL);
evt = &per_cpu(xen_clock_events, cpu);
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 1a5ff24e29c..aaa7291c925 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -28,9 +28,9 @@ ENTRY(startup_xen)
__FINIT
.pushsection .text
- .align PAGE_SIZE_asm
+ .align PAGE_SIZE
ENTRY(hypercall_page)
- .skip PAGE_SIZE_asm
+ .skip PAGE_SIZE
.popsection
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9d41bf98575..3112f55638c 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -64,10 +64,12 @@ void xen_setup_vcpu_info_placement(void);
#ifdef CONFIG_SMP
void xen_smp_init(void);
+void __init xen_hvm_smp_init(void);
extern cpumask_var_t xen_cpu_initialized_map;
#else
static inline void xen_smp_init(void) {}
+static inline void xen_hvm_smp_init(void) {}
#endif
#ifdef CONFIG_PARAVIRT_SPINLOCKS