summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2012-08-22 14:21:38 -0700
committerDavid S. Miller <davem@davemloft.net>2012-08-22 14:21:38 -0700
commit1304a7343b30fc4f16045412efdbb4179a3d9255 (patch)
tree83d667ac4f62e30f70305ce4cc7e030e3465f92e /arch/x86
parent1d76efe1577b4323609b1bcbfafa8b731eda071a (diff)
parent23dcfa61bac244e1200ff9ad19c6e9144dcb6bb5 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/Makefile4
-rw-r--r--arch/x86/boot/Makefile2
-rw-r--r--arch/x86/include/asm/mce.h8
-rw-r--r--arch/x86/include/asm/olpc.h19
-rw-r--r--arch/x86/include/asm/perf_event.h11
-rw-r--r--arch/x86/kernel/acpi/sleep.c4
-rw-r--r--arch/x86/kernel/acpi/sleep.h2
-rw-r--r--arch/x86/kernel/acpi/wakeup_32.S4
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S4
-rw-r--r--arch/x86/kernel/alternative.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c14
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c7
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c43
-rw-r--r--arch/x86/kernel/cpu/perf_event.c89
-rw-r--r--arch/x86/kernel/cpu/perf_event.h20
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c10
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c253
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h48
-rw-r--r--arch/x86/kernel/irq.c1
-rw-r--r--arch/x86/kernel/kdebugfs.c6
-rw-r--r--arch/x86/kvm/i8259.c17
-rw-r--r--arch/x86/kvm/vmx.c20
-rw-r--r--arch/x86/kvm/x86.c4
-rw-r--r--arch/x86/mm/hugetlbpage.c21
-rw-r--r--arch/x86/mm/pageattr.c10
-rw-r--r--arch/x86/mm/srat.c15
-rw-r--r--arch/x86/platform/efi/efi.c30
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-pm.c16
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-sci.c1
-rw-r--r--arch/x86/platform/olpc/olpc-xo15-sci.c1
-rw-r--r--arch/x86/platform/olpc/olpc.c190
-rw-r--r--arch/x86/realmode/rm/Makefile2
-rw-r--r--arch/x86/syscalls/syscall_64.tbl8
-rw-r--r--arch/x86/xen/p2m.c5
38 files changed, 524 insertions, 382 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ba2657c4921..8ec3a1aa4ab 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1527,7 +1527,7 @@ config SECCOMP
If unsure, say Y. Only embedded should say N here.
config CC_STACKPROTECTOR
- bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
+ bool "Enable -fstack-protector buffer overflow detection"
---help---
This option turns on the -fstack-protector GCC feature. This
feature puts, at the beginning of functions, a canary value on
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index b0c5276861e..682e9c210ba 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -27,6 +27,10 @@ ifeq ($(CONFIG_X86_32),y)
KBUILD_CFLAGS += -msoft-float -mregparm=3 -freg-struct-return
+ # Never want PIC in a 32-bit kernel, prevent breakage with GCC built
+ # with nonstandard options
+ KBUILD_CFLAGS += -fno-pic
+
# prevent gcc from keeping the stack 16 byte aligned
KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2)
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 5a747dd884d..f7535bedc33 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -57,7 +57,7 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \
-Wall -Wstrict-prototypes \
-march=i386 -mregparm=3 \
-include $(srctree)/$(src)/code16gcc.h \
- -fno-strict-aliasing -fomit-frame-pointer \
+ -fno-strict-aliasing -fomit-frame-pointer -fno-pic \
$(call cc-option, -ffreestanding) \
$(call cc-option, -fno-toplevel-reorder,\
$(call cc-option, -fno-unit-at-a-time)) \
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 441520e4174..a3ac52b29cb 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -33,6 +33,14 @@
#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
#define MCI_STATUS_AR (1ULL<<55) /* Action required */
+#define MCACOD 0xffff /* MCA Error Code */
+
+/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
+#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */
+#define MCACOD_SCRUBMSK 0xfff0
+#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */
+#define MCACOD_DATA 0x0134 /* Data Load */
+#define MCACOD_INSTR 0x0150 /* Instruction Fetch */
/* MCi_MISC register defines */
#define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f)
diff --git a/arch/x86/include/asm/olpc.h b/arch/x86/include/asm/olpc.h
index 87bdbca72f9..72f9adf6eca 100644
--- a/arch/x86/include/asm/olpc.h
+++ b/arch/x86/include/asm/olpc.h
@@ -100,25 +100,6 @@ extern void olpc_xo1_pm_wakeup_clear(u16 value);
extern int pci_olpc_init(void);
-/* EC related functions */
-
-extern int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen,
- unsigned char *outbuf, size_t outlen);
-
-/* EC commands */
-
-#define EC_FIRMWARE_REV 0x08
-#define EC_WRITE_SCI_MASK 0x1b
-#define EC_WAKE_UP_WLAN 0x24
-#define EC_WLAN_LEAVE_RESET 0x25
-#define EC_READ_EB_MODE 0x2a
-#define EC_SET_SCI_INHIBIT 0x32
-#define EC_SET_SCI_INHIBIT_RELEASE 0x34
-#define EC_WLAN_ENTER_RESET 0x35
-#define EC_WRITE_EXT_SCI_MASK 0x38
-#define EC_SCI_QUERY 0x84
-#define EC_EXT_SCI_QUERY 0x85
-
/* SCI source values */
#define EC_SCI_SRC_EMPTY 0x00
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index dab39350e51..cb4e43bce98 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -196,11 +196,16 @@ static inline u32 get_ibs_caps(void) { return 0; }
extern void perf_events_lapic_init(void);
/*
- * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups.
- * This flag is otherwise unused and ABI specified to be 0, so nobody should
- * care what we do with it.
+ * Abuse bits {3,5} of the cpu eflags register. These flags are otherwise
+ * unused and ABI specified to be 0, so nobody should care what we do with
+ * them.
+ *
+ * EXACT - the IP points to the exact instruction that triggered the
+ * event (HW bugs exempt).
+ * VM - original X86_VM_MASK; see set_linear_ip().
*/
#define PERF_EFLAGS_EXACT (1UL << 3)
+#define PERF_EFLAGS_VM (1UL << 5)
struct pt_regs;
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 95bf99de905..1b8e5a03d94 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -25,10 +25,6 @@ unsigned long acpi_realmode_flags;
static char temp_stack[4096];
#endif
-asmlinkage void acpi_enter_s3(void)
-{
- acpi_enter_sleep_state(3, wake_sleep_flags);
-}
/**
* acpi_suspend_lowlevel - save kernel state
*
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h
index 5653a5791ec..67f59f8c695 100644
--- a/arch/x86/kernel/acpi/sleep.h
+++ b/arch/x86/kernel/acpi/sleep.h
@@ -2,7 +2,6 @@
* Variables and functions used by the code in sleep.c
*/
-#include <linux/linkage.h>
#include <asm/realmode.h>
extern unsigned long saved_video_mode;
@@ -11,7 +10,6 @@ extern long saved_magic;
extern int wakeup_pmode_return;
extern u8 wake_sleep_flags;
-extern asmlinkage void acpi_enter_s3(void);
extern unsigned long acpi_copy_wakeup_routine(unsigned long);
extern void wakeup_long64(void);
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S
index 72610839f03..13ab720573e 100644
--- a/arch/x86/kernel/acpi/wakeup_32.S
+++ b/arch/x86/kernel/acpi/wakeup_32.S
@@ -74,7 +74,9 @@ restore_registers:
ENTRY(do_suspend_lowlevel)
call save_processor_state
call save_registers
- call acpi_enter_s3
+ pushl $3
+ call acpi_enter_sleep_state
+ addl $4, %esp
# In case of S3 failure, we'll emerge here. Jump
# to ret_point to recover
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 014d1d28c39..8ea5164cbd0 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -71,7 +71,9 @@ ENTRY(do_suspend_lowlevel)
movq %rsi, saved_rsi
addq $8, %rsp
- call acpi_enter_s3
+ movl $3, %edi
+ xorl %eax, %eax
+ call acpi_enter_sleep_state
/* in case something went wrong, restore the machine status and go on */
jmp resume_point
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 931280ff829..afb7ff79a29 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -224,7 +224,7 @@ void __init arch_init_ideal_nops(void)
ideal_nops = intel_nops;
#endif
}
-
+ break;
default:
#ifdef CONFIG_X86_64
ideal_nops = k8_nops;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 406eee78468..c265593ec2c 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1204,7 +1204,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
BUG_ON(!cfg->vector);
vector = cfg->vector;
- for_each_cpu(cpu, cfg->domain)
+ for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
per_cpu(vector_irq, cpu)[vector] = -1;
cfg->vector = 0;
@@ -1212,7 +1212,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
if (likely(!cfg->move_in_progress))
return;
- for_each_cpu(cpu, cfg->old_domain) {
+ for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
vector++) {
if (per_cpu(vector_irq, cpu)[vector] != irq)
@@ -1356,6 +1356,16 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
if (!IO_APIC_IRQ(irq))
return;
+ /*
+ * For legacy irqs, cfg->domain starts with cpu 0. Now that IO-APIC
+ * can handle this irq and the apic driver is finialized at this point,
+ * update the cfg->domain.
+ */
+ if (irq < legacy_pic->nr_legacy_irqs &&
+ cpumask_equal(cfg->domain, cpumask_of(0)))
+ apic->vector_allocation_domain(0, cfg->domain,
+ apic->target_cpus());
+
if (assign_irq_vector(irq, cfg, apic->target_cpus()))
return;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 46d8786d655..a5fbc3c5fcc 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -144,6 +144,8 @@ static int __init x86_xsave_setup(char *s)
{
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
+ setup_clear_cpu_cap(X86_FEATURE_AVX);
+ setup_clear_cpu_cap(X86_FEATURE_AVX2);
return 1;
}
__setup("noxsave", x86_xsave_setup);
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 413c2ced887..13017626f9a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -55,13 +55,6 @@ static struct severity {
#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
#define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV)
-#define MCACOD 0xffff
-/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
-#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */
-#define MCACOD_SCRUBMSK 0xfff0
-#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */
-#define MCACOD_DATA 0x0134 /* Data Load */
-#define MCACOD_INSTR 0x0150 /* Instruction Fetch */
MCESEV(
NO, "Invalid",
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 5e095f873e3..292d0258311 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -103,6 +103,8 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
static DEFINE_PER_CPU(struct work_struct, mce_work);
+static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
+
/*
* CPU/chipset specific EDAC code can register a notifier call here to print
* MCE errors in a human-readable form.
@@ -650,14 +652,18 @@ EXPORT_SYMBOL_GPL(machine_check_poll);
* Do a quick check if any of the events requires a panic.
* This decides if we keep the events around or clear them.
*/
-static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp)
+static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
+ struct pt_regs *regs)
{
int i, ret = 0;
for (i = 0; i < banks; i++) {
m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
- if (m->status & MCI_STATUS_VAL)
+ if (m->status & MCI_STATUS_VAL) {
__set_bit(i, validp);
+ if (quirk_no_way_out)
+ quirk_no_way_out(i, m, regs);
+ }
if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
ret = 1;
}
@@ -1040,7 +1046,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
*final = m;
memset(valid_banks, 0, sizeof(valid_banks));
- no_way_out = mce_no_way_out(&m, &msg, valid_banks);
+ no_way_out = mce_no_way_out(&m, &msg, valid_banks, regs);
barrier();
@@ -1418,6 +1424,34 @@ static void __mcheck_cpu_init_generic(void)
}
}
+/*
+ * During IFU recovery Sandy Bridge -EP4S processors set the RIPV and
+ * EIPV bits in MCG_STATUS to zero on the affected logical processor (SDM
+ * Vol 3B Table 15-20). But this confuses both the code that determines
+ * whether the machine check occurred in kernel or user mode, and also
+ * the severity assessment code. Pretend that EIPV was set, and take the
+ * ip/cs values from the pt_regs that mce_gather_info() ignored earlier.
+ */
+static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
+{
+ if (bank != 0)
+ return;
+ if ((m->mcgstatus & (MCG_STATUS_EIPV|MCG_STATUS_RIPV)) != 0)
+ return;
+ if ((m->status & (MCI_STATUS_OVER|MCI_STATUS_UC|
+ MCI_STATUS_EN|MCI_STATUS_MISCV|MCI_STATUS_ADDRV|
+ MCI_STATUS_PCC|MCI_STATUS_S|MCI_STATUS_AR|
+ MCACOD)) !=
+ (MCI_STATUS_UC|MCI_STATUS_EN|
+ MCI_STATUS_MISCV|MCI_STATUS_ADDRV|MCI_STATUS_S|
+ MCI_STATUS_AR|MCACOD_INSTR))
+ return;
+
+ m->mcgstatus |= MCG_STATUS_EIPV;
+ m->ip = regs->ip;
+ m->cs = regs->cs;
+}
+
/* Add per CPU specific workarounds here */
static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
{
@@ -1515,6 +1549,9 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
*/
if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0)
mce_bootlog = 0;
+
+ if (c->x86 == 6 && c->x86_model == 45)
+ quirk_no_way_out = quirk_sandybridge_ifu;
}
if (monarch_timeout < 0)
monarch_timeout = 0;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 29557aa06dd..915b876edd1 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -32,6 +32,8 @@
#include <asm/smp.h>
#include <asm/alternative.h>
#include <asm/timer.h>
+#include <asm/desc.h>
+#include <asm/ldt.h>
#include "perf_event.h"
@@ -1738,6 +1740,29 @@ valid_user_frame(const void __user *fp, unsigned long size)
return (__range_not_ok(fp, size, TASK_SIZE) == 0);
}
+static unsigned long get_segment_base(unsigned int segment)
+{
+ struct desc_struct *desc;
+ int idx = segment >> 3;
+
+ if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
+ if (idx > LDT_ENTRIES)
+ return 0;
+
+ if (idx > current->active_mm->context.size)
+ return 0;
+
+ desc = current->active_mm->context.ldt;
+ } else {
+ if (idx > GDT_ENTRIES)
+ return 0;
+
+ desc = __this_cpu_ptr(&gdt_page.gdt[0]);
+ }
+
+ return get_desc_base(desc + idx);
+}
+
#ifdef CONFIG_COMPAT
#include <asm/compat.h>
@@ -1746,13 +1771,17 @@ static inline int
perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
{
/* 32-bit process in 64-bit kernel. */
+ unsigned long ss_base, cs_base;
struct stack_frame_ia32 frame;
const void __user *fp;
if (!test_thread_flag(TIF_IA32))
return 0;
- fp = compat_ptr(regs->bp);
+ cs_base = get_segment_base(regs->cs);
+ ss_base = get_segment_base(regs->ss);
+
+ fp = compat_ptr(ss_base + regs->bp);
while (entry->nr < PERF_MAX_STACK_DEPTH) {
unsigned long bytes;
frame.next_frame = 0;
@@ -1765,8 +1794,8 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
if (!valid_user_frame(fp, sizeof(frame)))
break;
- perf_callchain_store(entry, frame.return_address);
- fp = compat_ptr(frame.next_frame);
+ perf_callchain_store(entry, cs_base + frame.return_address);
+ fp = compat_ptr(ss_base + frame.next_frame);
}
return 1;
}
@@ -1789,6 +1818,12 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
return;
}
+ /*
+ * We don't know what to do with VM86 stacks.. ignore them for now.
+ */
+ if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM))
+ return;
+
fp = (void __user *)regs->bp;
perf_callchain_store(entry, regs->ip);
@@ -1816,16 +1851,50 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
}
}
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
+/*
+ * Deal with code segment offsets for the various execution modes:
+ *
+ * VM86 - the good olde 16 bit days, where the linear address is
+ * 20 bits and we use regs->ip + 0x10 * regs->cs.
+ *
+ * IA32 - Where we need to look at GDT/LDT segment descriptor tables
+ * to figure out what the 32bit base address is.
+ *
+ * X32 - has TIF_X32 set, but is running in x86_64
+ *
+ * X86_64 - CS,DS,SS,ES are all zero based.
+ */
+static unsigned long code_segment_base(struct pt_regs *regs)
{
- unsigned long ip;
+ /*
+ * If we are in VM86 mode, add the segment offset to convert to a
+ * linear address.
+ */
+ if (regs->flags & X86_VM_MASK)
+ return 0x10 * regs->cs;
+
+ /*
+ * For IA32 we look at the GDT/LDT segment base to convert the
+ * effective IP to a linear address.
+ */
+#ifdef CONFIG_X86_32
+ if (user_mode(regs) && regs->cs != __USER_CS)
+ return get_segment_base(regs->cs);
+#else
+ if (test_thread_flag(TIF_IA32)) {
+ if (user_mode(regs) && regs->cs != __USER32_CS)
+ return get_segment_base(regs->cs);
+ }
+#endif
+ return 0;
+}
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
- ip = perf_guest_cbs->get_guest_ip();
- else
- ip = instruction_pointer(regs);
+ return perf_guest_cbs->get_guest_ip();
- return ip;
+ return regs->ip + code_segment_base(regs);
}
unsigned long perf_misc_flags(struct pt_regs *regs)
@@ -1838,7 +1907,7 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
else
misc |= PERF_RECORD_MISC_GUEST_KERNEL;
} else {
- if (!kernel_ip(regs->ip))
+ if (user_mode(regs))
misc |= PERF_RECORD_MISC_USER;
else
misc |= PERF_RECORD_MISC_KERNEL;
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 821d53b696d..6605a81ba33 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -516,6 +516,26 @@ static inline bool kernel_ip(unsigned long ip)
#endif
}
+/*
+ * Not all PMUs provide the right context information to place the reported IP
+ * into full context. Specifically segment registers are typically not
+ * supplied.
+ *
+ * Assuming the address is a linear address (it is for IBS), we fake the CS and
+ * vm86 mode using the known zero-based code segment and 'fix up' the registers
+ * to reflect this.
+ *
+ * Intel PEBS/LBR appear to typically provide the effective address, nothing
+ * much we can do about that but pray and treat it like a linear address.
+ */
+static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
+{
+ regs->cs = kernel_ip(ip) ? __KERNEL_CS : __USER_CS;
+ if (regs->flags & X86_VM_MASK)
+ regs->flags ^= (PERF_EFLAGS_VM | X86_VM_MASK);
+ regs->ip = ip;
+}
+
#ifdef CONFIG_CPU_SUP_AMD
int amd_pmu_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index da9bcdcd985..7bfb5bec863 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -13,6 +13,8 @@
#include <asm/apic.h>
+#include "perf_event.h"
+
static u32 ibs_caps;
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
@@ -536,7 +538,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
regs.flags &= ~PERF_EFLAGS_EXACT;
} else {
- instruction_pointer_set(&regs, ibs_data.regs[1]);
+ set_linear_ip(&regs, ibs_data.regs[1]);
regs.flags |= PERF_EFLAGS_EXACT;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 382366977d4..7f2739e03e7 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1522,8 +1522,16 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
+ /*
+ * If PMU counter has PEBS enabled it is not enough to disable counter
+ * on a guest entry since PEBS memory write can overshoot guest entry
+ * and corrupt guest memory. Disabling PEBS solves the problem.
+ */
+ arr[1].msr = MSR_IA32_PEBS_ENABLE;
+ arr[1].host = cpuc->pebs_enabled;
+ arr[1].guest = 0;
- *nr = 1;
+ *nr = 2;
return arr;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 629ae0b7ad9..e38d97bf425 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -499,7 +499,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
* We sampled a branch insn, rewind using the LBR stack
*/
if (ip == to) {
- regs->ip = from;
+ set_linear_ip(regs, from);
return 1;
}
@@ -529,7 +529,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
} while (to < ip);
if (to == ip) {
- regs->ip = old_to;
+ set_linear_ip(regs, old_to);
return 1;
}
@@ -569,7 +569,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
* A possible PERF_SAMPLE_REGS will have to transfer all regs.
*/
regs = *iregs;
- regs.ip = pebs->ip;
+ regs.flags = pebs->flags;
+ set_linear_ip(&regs, pebs->ip);
regs.bp = pebs->bp;
regs.sp = pebs->sp;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 7563fda9f03..0a5571080e7 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -796,7 +796,6 @@ static struct intel_uncore_type *nhm_msr_uncores[] = {
DEFINE_UNCORE_FORMAT_ATTR(event5, event, "config:1-5");
DEFINE_UNCORE_FORMAT_ATTR(counter, counter, "config:6-7");
-DEFINE_UNCORE_FORMAT_ATTR(mm_cfg, mm_cfg, "config:63");
DEFINE_UNCORE_FORMAT_ATTR(match, match, "config1:0-63");
DEFINE_UNCORE_FORMAT_ATTR(mask, mask, "config2:0-63");
@@ -902,16 +901,21 @@ static struct attribute_group nhmex_uncore_cbox_format_group = {
.attrs = nhmex_uncore_cbox_formats_attr,
};
+/* msr offset for each instance of cbox */
+static unsigned nhmex_cbox_msr_offsets[] = {
+ 0x0, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x240, 0x2c0,
+};
+
static struct intel_uncore_type nhmex_uncore_cbox = {
.name = "cbox",
.num_counters = 6,
- .num_boxes = 8,
+ .num_boxes = 10,
.perf_ctr_bits = 48,
.event_ctl = NHMEX_C0_MSR_PMON_EV_SEL0,
.perf_ctr = NHMEX_C0_MSR_PMON_CTR0,
.event_mask = NHMEX_PMON_RAW_EVENT_MASK,
.box_ctl = NHMEX_C0_MSR_PMON_GLOBAL_CTL,
- .msr_offset = NHMEX_C_MSR_OFFSET,
+ .msr_offsets = nhmex_cbox_msr_offsets,
.pair_ctr_ctl = 1,
.ops = &nhmex_uncore_ops,
.format_group = &nhmex_uncore_cbox_format_group
@@ -1032,24 +1036,22 @@ static struct intel_uncore_type nhmex_uncore_bbox = {
static int nhmex_sbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
{
- struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
- struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
- if (event->attr.config & NHMEX_S_PMON_MM_CFG_EN) {
- reg1->config = event->attr.config1;
- reg2->config = event->attr.config2;
- } else {
- reg1->config = ~0ULL;
- reg2->config = ~0ULL;
- }
+ /* only TO_R_PROG_EV event uses the match/mask register */
+ if ((hwc->config & NHMEX_PMON_CTL_EV_SEL_MASK) !=
+ NHMEX_S_EVENT_TO_R_PROG_EV)
+ return 0;
if (box->pmu->pmu_idx == 0)
reg1->reg = NHMEX_S0_MSR_MM_CFG;
else
reg1->reg = NHMEX_S1_MSR_MM_CFG;
-
reg1->idx = 0;
-
+ reg1->config = event->attr.config1;
+ reg2->config = event->attr.config2;
return 0;
}
@@ -1059,8 +1061,8 @@ static void nhmex_sbox_msr_enable_event(struct intel_uncore_box *box, struct per
struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
- wrmsrl(reg1->reg, 0);
- if (reg1->config != ~0ULL || reg2->config != ~0ULL) {
+ if (reg1->idx != EXTRA_REG_NONE) {
+ wrmsrl(reg1->reg, 0);
wrmsrl(reg1->reg + 1, reg1->config);
wrmsrl(reg1->reg + 2, reg2->config);
wrmsrl(reg1->reg, NHMEX_S_PMON_MM_CFG_EN);
@@ -1074,7 +1076,6 @@ static struct attribute *nhmex_uncore_sbox_formats_attr[] = {
&format_attr_edge.attr,
&format_attr_inv.attr,
&format_attr_thresh8.attr,
- &format_attr_mm_cfg.attr,
&format_attr_match.attr,
&format_attr_mask.attr,
NULL,
@@ -1142,6 +1143,9 @@ static struct extra_reg nhmex_uncore_mbox_extra_regs[] = {
EVENT_EXTRA_END
};
+/* Nehalem-EX or Westmere-EX ? */
+bool uncore_nhmex;
+
static bool nhmex_mbox_get_shared_reg(struct intel_uncore_box *box, int idx, u64 config)
{
struct intel_uncore_extra_reg *er;
@@ -1171,18 +1175,29 @@ static bool nhmex_mbox_get_shared_reg(struct intel_uncore_box *box, int idx, u64
return false;
/* mask of the shared fields */
- mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK;
+ if (uncore_nhmex)
+ mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK;
+ else
+ mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK;
er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
raw_spin_lock_irqsave(&er->lock, flags);
/* add mask of the non-shared field if it's in use */
- if (__BITS_VALUE(atomic_read(&er->ref), idx, 8))
- mask |= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+ if (__BITS_VALUE(atomic_read(&er->ref), idx, 8)) {
+ if (uncore_nhmex)
+ mask |= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+ else
+ mask |= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+ }
if (!atomic_read(&er->ref) || !((er->config ^ config) & mask)) {
atomic_add(1 << (idx * 8), &er->ref);
- mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK |
- NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+ if (uncore_nhmex)
+ mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK |
+ NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+ else
+ mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK |
+ WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
er->config &= ~mask;
er->config |= (config & mask);
ret = true;
@@ -1216,7 +1231,10 @@ u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modify)
/* get the non-shared control bits and shift them */
idx = orig_idx - EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
- config &= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+ if (uncore_nhmex)
+ config &= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+ else
+ config &= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
if (new_idx > orig_idx) {
idx = new_idx - orig_idx;
config <<= 3 * idx;
@@ -1226,6 +1244,10 @@ u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modify)
}
/* add the shared control bits back */
+ if (uncore_nhmex)
+ config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
+ else
+ config |= WSMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
if (modify) {
/* adjust the main event selector */
@@ -1264,7 +1286,8 @@ again:
}
/* for the match/mask registers */
- if ((uncore_box_is_fake(box) || !reg2->alloc) &&
+ if (reg2->idx != EXTRA_REG_NONE &&
+ (uncore_box_is_fake(box) || !reg2->alloc) &&
!nhmex_mbox_get_shared_reg(box, reg2->idx, reg2->config))
goto fail;
@@ -1278,7 +1301,8 @@ again:
if (idx[0] != 0xff && idx[0] != __BITS_VALUE(reg1->idx, 0, 8))
nhmex_mbox_alter_er(event, idx[0], true);
reg1->alloc |= alloc;
- reg2->alloc = 1;
+ if (reg2->idx != EXTRA_REG_NONE)
+ reg2->alloc = 1;
}
return NULL;
fail:
@@ -1342,9 +1366,6 @@ static int nhmex_mbox_hw_config(struct intel_uncore_box *box, struct perf_event
struct extra_reg *er;
unsigned msr;
int reg_idx = 0;
-
- if (WARN_ON_ONCE(reg1->idx != -1))
- return -EINVAL;
/*
* The mbox events may require 2 extra MSRs at the most. But only
* the lower 32 bits in these MSRs are significant, so we can use
@@ -1355,11 +1376,6 @@ static int nhmex_mbox_hw_config(struct intel_uncore_box *box, struct perf_event
continue;
if (event->attr.config1 & ~er->valid_mask)
return -EINVAL;
- if (er->idx == __BITS_VALUE(reg1->idx, 0, 8) ||
- er->idx == __BITS_VALUE(reg1->idx, 1, 8))
- continue;
- if (WARN_ON_ONCE(reg_idx >= 2))
- return -EINVAL;
msr = er->msr + type->msr_offset * box->pmu->pmu_idx;
if (WARN_ON_ONCE(msr >= 0xffff || er->idx >= 0xff))
@@ -1368,6 +1384,8 @@ static int nhmex_mbox_hw_config(struct intel_uncore_box *box, struct perf_event
/* always use the 32~63 bits to pass the PLD config */
if (er->idx == EXTRA_REG_NHMEX_M_PLD)
reg_idx = 1;
+ else if (WARN_ON_ONCE(reg_idx > 0))
+ return -EINVAL;
reg1->idx &= ~(0xff << (reg_idx * 8));
reg1->reg &= ~(0xffff << (reg_idx * 16));
@@ -1376,17 +1394,21 @@ static int nhmex_mbox_hw_config(struct intel_uncore_box *box, struct perf_event
reg1->config = event->attr.config1;
reg_idx++;
}
- /* use config2 to pass the filter config */
- reg2->idx = EXTRA_REG_NHMEX_M_FILTER;
- if (event->attr.config2 & NHMEX_M_PMON_MM_CFG_EN)
- reg2->config = event->attr.config2;
- else
- reg2->config = ~0ULL;
- if (box->pmu->pmu_idx == 0)
- reg2->reg = NHMEX_M0_MSR_PMU_MM_CFG;
- else
- reg2->reg = NHMEX_M1_MSR_PMU_MM_CFG;
-
+ /*
+ * The mbox only provides ability to perform address matching
+ * for the PLD events.
+ */
+ if (reg_idx == 2) {
+ reg2->idx = EXTRA_REG_NHMEX_M_FILTER;
+ if (event->attr.config2 & NHMEX_M_PMON_MM_CFG_EN)
+ reg2->config = event->attr.config2;
+ else
+ reg2->config = ~0ULL;
+ if (box->pmu->pmu_idx == 0)
+ reg2->reg = NHMEX_M0_MSR_PMU_MM_CFG;
+ else
+ reg2->reg = NHMEX_M1_MSR_PMU_MM_CFG;
+ }
return 0;
}
@@ -1422,34 +1444,36 @@ static void nhmex_mbox_msr_enable_event(struct intel_uncore_box *box, struct per
wrmsrl(__BITS_VALUE(reg1->reg, 1, 16),
nhmex_mbox_shared_reg_config(box, idx));
- wrmsrl(reg2->reg, 0);
- if (reg2->config != ~0ULL) {
- wrmsrl(reg2->reg + 1,
- reg2->config & NHMEX_M_PMON_ADDR_MATCH_MASK);
- wrmsrl(reg2->reg + 2, NHMEX_M_PMON_ADDR_MASK_MASK &
- (reg2->config >> NHMEX_M_PMON_ADDR_MASK_SHIFT));
- wrmsrl(reg2->reg, NHMEX_M_PMON_MM_CFG_EN);
+ if (reg2->idx != EXTRA_REG_NONE) {
+ wrmsrl(reg2->reg, 0);
+ if (reg2->config != ~0ULL) {
+ wrmsrl(reg2->reg + 1,
+ reg2->config & NHMEX_M_PMON_ADDR_MATCH_MASK);
+ wrmsrl(reg2->reg + 2, NHMEX_M_PMON_ADDR_MASK_MASK &
+ (reg2->config >> NHMEX_M_PMON_ADDR_MASK_SHIFT));
+ wrmsrl(reg2->reg, NHMEX_M_PMON_MM_CFG_EN);
+ }
}
wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0);
}
-DEFINE_UNCORE_FORMAT_ATTR(count_mode, count_mode, "config:2-3");
-DEFINE_UNCORE_FORMAT_ATTR(storage_mode, storage_mode, "config:4-5");
-DEFINE_UNCORE_FORMAT_ATTR(wrap_mode, wrap_mode, "config:6");
-DEFINE_UNCORE_FORMAT_ATTR(flag_mode, flag_mode, "config:7");
-DEFINE_UNCORE_FORMAT_ATTR(inc_sel, inc_sel, "config:9-13");
-DEFINE_UNCORE_FORMAT_ATTR(set_flag_sel, set_flag_sel, "config:19-21");
-DEFINE_UNCORE_FORMAT_ATTR(filter_cfg, filter_cfg, "config2:63");
-DEFINE_UNCORE_FORMAT_ATTR(filter_match, filter_match, "config2:0-33");
-DEFINE_UNCORE_FORMAT_ATTR(filter_mask, filter_mask, "config2:34-61");
-DEFINE_UNCORE_FORMAT_ATTR(dsp, dsp, "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(thr, thr, "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(fvc, fvc, "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(pgt, pgt, "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(map, map, "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(iss, iss, "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(pld, pld, "config1:32-63");
+DEFINE_UNCORE_FORMAT_ATTR(count_mode, count_mode, "config:2-3");
+DEFINE_UNCORE_FORMAT_ATTR(storage_mode, storage_mode, "config:4-5");
+DEFINE_UNCORE_FORMAT_ATTR(wrap_mode, wrap_mode, "config:6");
+DEFINE_UNCORE_FORMAT_ATTR(flag_mode, flag_mode, "config:7");
+DEFINE_UNCORE_FORMAT_ATTR(inc_sel, inc_sel, "config:9-13");
+DEFINE_UNCORE_FORMAT_ATTR(set_flag_sel, set_flag_sel, "config:19-21");
+DEFINE_UNCORE_FORMAT_ATTR(filter_cfg_en, filter_cfg_en, "config2:63");
+DEFINE_UNCORE_FORMAT_ATTR(filter_match, filter_match, "config2:0-33");
+DEFINE_UNCORE_FORMAT_ATTR(filter_mask, filter_mask, "config2:34-61");
+DEFINE_UNCORE_FORMAT_ATTR(dsp, dsp, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(thr, thr, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(fvc, fvc, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(pgt, pgt, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(map, map, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(iss, iss, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(pld, pld, "config1:32-63");
static struct attribute *nhmex_uncore_mbox_formats_attr[] = {
&format_attr_count_mode.attr,
@@ -1458,7 +1482,7 @@ static struct attribute *nhmex_uncore_mbox_formats_attr[] = {
&format_attr_flag_mode.attr,
&format_attr_inc_sel.attr,
&format_attr_set_flag_sel.attr,
- &format_attr_filter_cfg.attr,
+ &format_attr_filter_cfg_en.attr,
&format_attr_filter_match.attr,
&format_attr_filter_mask.attr,
&format_attr_dsp.attr,
@@ -1482,6 +1506,12 @@ static struct uncore_event_desc nhmex_uncore_mbox_events[] = {
{ /* end: all zeroes */ },
};
+static struct uncore_event_desc wsmex_uncore_mbox_events[] = {
+ INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x5000"),
+ INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x5040"),
+ { /* end: all zeroes */ },
+};
+
static struct intel_uncore_ops nhmex_uncore_mbox_ops = {
NHMEX_UNCORE_OPS_COMMON_INIT(),
.enable_event = nhmex_mbox_msr_enable_event,
@@ -1513,7 +1543,7 @@ void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event)
struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
int port;
- /* adjust the main event selector */
+ /* adjust the main event selector and extra register index */
if (reg1->idx % 2) {
reg1->idx--;
hwc->config -= 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
@@ -1522,29 +1552,17 @@ void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event)
hwc->config += 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
}
- /* adjust address or config of extra register */
+ /* adjust extra register config */
port = reg1->idx / 6 + box->pmu->pmu_idx * 4;
switch (reg1->idx % 6) {
- case 0:
- reg1->reg = NHMEX_R_MSR_PORTN_IPERF_CFG0(port);
- break;
- case 1:
- reg1->reg = NHMEX_R_MSR_PORTN_IPERF_CFG1(port);
- break;
case 2:
- /* the 8~15 bits to the 0~7 bits */
+ /* shift the 8~15 bits to the 0~7 bits */
reg1->config >>= 8;
break;
case 3:
- /* the 0~7 bits to the 8~15 bits */
+ /* shift the 0~7 bits to the 8~15 bits */
reg1->config <<= 8;
break;
- case 4:
- reg1->reg = NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port);
- break;
- case 5:
- reg1->reg = NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port);
- break;
};
}
@@ -1671,7 +1689,7 @@ static int nhmex_rbox_hw_config(struct intel_uncore_box *box, struct perf_event
struct hw_perf_event *hwc = &event->hw;
struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
- int port, idx;
+ int idx;
idx = (event->hw.config & NHMEX_R_PMON_CTL_EV_SEL_MASK) >>
NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
@@ -1681,27 +1699,11 @@ static int nhmex_rbox_hw_config(struct intel_uncore_box *box, struct perf_event
reg1->idx = idx;
reg1->config = event->attr.config1;
- port = idx / 6 + box->pmu->pmu_idx * 4;
- idx %= 6;
- switch (idx) {
- case 0:
- reg1->reg = NHMEX_R_MSR_PORTN_IPERF_CFG0(port);
- break;
- case 1:
- reg1->reg = NHMEX_R_MSR_PORTN_IPERF_CFG1(port);
- break;
- case 2:
- case 3:
- reg1->reg = NHMEX_R_MSR_PORTN_QLX_CFG(port);
- break;
+ switch (idx % 6) {
case 4:
case 5:
- if (idx == 4)
- reg1->reg = NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port);
- else
- reg1->reg = NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port);
- reg2->config = event->attr.config2;
hwc->config |= event->attr.config & (~0ULL << 32);
+ reg2->config = event->attr.config2;
break;
};
return 0;
@@ -1727,28 +1729,34 @@ static void nhmex_rbox_msr_enable_event(struct intel_uncore_box *box, struct per
struct hw_perf_event *hwc = &event->hw;
struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
- int idx, er_idx;
+ int idx, port;
- idx = reg1->idx % 6;
- er_idx = idx;
- if (er_idx > 2)
- er_idx--;
- er_idx += (reg1->idx / 6) * 5;
+ idx = reg1->idx;
+ port = idx / 6 + box->pmu->pmu_idx * 4;
- switch (idx) {
+ switch (idx % 6) {
case 0:
+ wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG0(port), reg1->config);
+ break;
case 1:
- wrmsrl(reg1->reg, reg1->config);
+ wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG1(port), reg1->config);
break;
case 2:
case 3:
- wrmsrl(reg1->reg, nhmex_rbox_shared_reg_config(box, er_idx));
+ wrmsrl(NHMEX_R_MSR_PORTN_QLX_CFG(port),
+ nhmex_rbox_shared_reg_config(box, 2 + (idx / 6) * 5));
break;
case 4:
+ wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port),
+ hwc->config >> 32);
+ wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(port), reg1->config);
+ wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MASK(port), reg2->config);
+ break;
case 5:
- wrmsrl(reg1->reg, reg1->config);
- wrmsrl(reg1->reg + 1, hwc->config >> 32);
- wrmsrl(reg1->reg + 2, reg2->config);
+ wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port),
+ hwc->config >> 32);
+ wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(port), reg1->config);
+ wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MASK(port), reg2->config);
break;
};
@@ -1756,8 +1764,8 @@ static void nhmex_rbox_msr_enable_event(struct intel_uncore_box *box, struct per
(hwc->config & NHMEX_R_PMON_CTL_EV_SEL_MASK));
}
-DEFINE_UNCORE_FORMAT_ATTR(xbr_match, xbr_match, "config:32-63");
-DEFINE_UNCORE_FORMAT_ATTR(xbr_mm_cfg, xbr_mm_cfg, "config1:0-63");
+DEFINE_UNCORE_FORMAT_ATTR(xbr_mm_cfg, xbr_mm_cfg, "config:32-63");
+DEFINE_UNCORE_FORMAT_ATTR(xbr_match, xbr_match, "config1:0-63");
DEFINE_UNCORE_FORMAT_ATTR(xbr_mask, xbr_mask, "config2:0-63");
DEFINE_UNCORE_FORMAT_ATTR(qlx_cfg, qlx_cfg, "config1:0-15");
DEFINE_UNCORE_FORMAT_ATTR(iperf_cfg, iperf_cfg, "config1:0-31");
@@ -2303,6 +2311,7 @@ int uncore_pmu_event_init(struct perf_event *event)
event->hw.idx = -1;
event->hw.last_tag = ~0ULL;
event->hw.extra_reg.idx = EXTRA_REG_NONE;
+ event->hw.branch_reg.idx = EXTRA_REG_NONE;
if (event->attr.config == UNCORE_FIXED_EVENT) {
/* no fixed counter */
@@ -2373,7 +2382,7 @@ static void __init uncore_type_exit(struct intel_uncore_type *type)
type->attr_groups[1] = NULL;
}
-static void uncore_types_exit(struct intel_uncore_type **types)
+static void __init uncore_types_exit(struct intel_uncore_type **types)
{
int i;
for (i = 0; types[i]; i++)
@@ -2814,7 +2823,13 @@ static int __init uncore_cpu_init(void)
snbep_uncore_cbox.num_boxes = max_cores;
msr_uncores = snbep_msr_uncores;
break;
- case 46:
+ case 46: /* Nehalem-EX */
+ uncore_nhmex = true;
+ case 47: /* Westmere-EX aka. Xeon E7 */
+ if (!uncore_nhmex)
+ nhmex_uncore_mbox.event_descs = wsmex_uncore_mbox_events;
+ if (nhmex_uncore_cbox.num_boxes > max_cores)
+ nhmex_uncore_cbox.num_boxes = max_cores;
msr_uncores = nhmex_msr_uncores;
break;
default:
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index f3851892e07..5b81c1856aa 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -5,7 +5,7 @@
#include "perf_event.h"
#define UNCORE_PMU_NAME_LEN 32
-#define UNCORE_PMU_HRTIMER_INTERVAL (60 * NSEC_PER_SEC)
+#define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC)
#define UNCORE_FIXED_EVENT 0xff
#define UNCORE_PMC_IDX_MAX_GENERIC 8
@@ -230,6 +230,7 @@
#define NHMEX_S1_MSR_MASK 0xe5a
#define NHMEX_S_PMON_MM_CFG_EN (0x1ULL << 63)
+#define NHMEX_S_EVENT_TO_R_PROG_EV 0
/* NHM-EX Mbox */
#define NHMEX_M0_MSR_GLOBAL_CTL 0xca0
@@ -275,18 +276,12 @@
NHMEX_M_PMON_CTL_INC_SEL_MASK | \
NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK)
-
-#define NHMEX_M_PMON_ZDP_CTL_FVC_FVID_MASK 0x1f
-#define NHMEX_M_PMON_ZDP_CTL_FVC_BCMD_MASK (0x7 << 5)
-#define NHMEX_M_PMON_ZDP_CTL_FVC_RSP_MASK (0x7 << 8)
-#define NHMEX_M_PMON_ZDP_CTL_FVC_PBOX_INIT_ERR (1 << 23)
-#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK \
- (NHMEX_M_PMON_ZDP_CTL_FVC_FVID_MASK | \
- NHMEX_M_PMON_ZDP_CTL_FVC_BCMD_MASK | \
- NHMEX_M_PMON_ZDP_CTL_FVC_RSP_MASK | \
- NHMEX_M_PMON_ZDP_CTL_FVC_PBOX_INIT_ERR)
+#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 11) - 1) | (1 << 23))
#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7 << (11 + 3 * (n)))
+#define WSMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 12) - 1) | (1 << 24))
+#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7 << (12 + 3 * (n)))
+
/*
* use the 9~13 bits to select event If the 7th bit is not set,
* otherwise use the 19~21 bits to select event.
@@ -368,6 +363,7 @@ struct intel_uncore_type {
unsigned num_shared_regs:8;
unsigned single_fixed:1;
unsigned pair_ctr_ctl:1;
+ unsigned *msr_offsets;
struct event_constraint unconstrainted;
struct event_constraint *constraints;
struct intel_uncore_pmu *pmus;
@@ -485,29 +481,31 @@ unsigned uncore_pci_perf_ctr(struct intel_uncore_box *box, int idx)
return idx * 8 + box->pmu->type->perf_ctr;
}
-static inline
-unsigned uncore_msr_box_ctl(struct intel_uncore_box *box)
+static inline unsigned uncore_msr_box_offset(struct intel_uncore_box *box)
+{
+ struct intel_uncore_pmu *pmu = box->pmu;
+ return pmu->type->msr_offsets ?
+ pmu->type->msr_offsets[pmu->pmu_idx] :
+ pmu->type->msr_offset * pmu->pmu_idx;
+}
+
+static inline unsigned uncore_msr_box_ctl(struct intel_uncore_box *box)
{
if (!box->pmu->type->box_ctl)
return 0;
- return box->pmu->type->box_ctl +
- box->pmu->type->msr_offset * box->pmu->pmu_idx;
+ return box->pmu->type->box_ctl + uncore_msr_box_offset(box);
}
-static inline
-unsigned uncore_msr_fixed_ctl(struct intel_uncore_box *box)
+static inline unsigned uncore_msr_fixed_ctl(struct intel_uncore_box *box)
{
if (!box->pmu->type->fixed_ctl)
return 0;
- return box->pmu->type->fixed_ctl +
- box->pmu->type->msr_offset * box->pmu->pmu_idx;
+ return box->pmu->type->fixed_ctl + uncore_msr_box_offset(box);
}
-static inline
-unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box)
+static inline unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box)
{
- return box->pmu->type->fixed_ctr +
- box->pmu->type->msr_offset * box->pmu->pmu_idx;
+ return box->pmu->type->fixed_ctr + uncore_msr_box_offset(box);
}
static inline
@@ -515,7 +513,7 @@ unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx)
{
return box->pmu->type->event_ctl +
(box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
- box->pmu->type->msr_offset * box->pmu->pmu_idx;
+ uncore_msr_box_offset(box);
}
static inline
@@ -523,7 +521,7 @@ unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx)
{
return box->pmu->type->perf_ctr +
(box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
- box->pmu->type->msr_offset * box->pmu->pmu_idx;
+ uncore_msr_box_offset(box);
}
static inline
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 1f5f1d5d2a0..7ad683d7864 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -328,6 +328,7 @@ void fixup_irqs(void)
chip->irq_retrigger(data);
raw_spin_unlock(&desc->lock);
}
+ __this_cpu_write(vector_irq[vector], -1);
}
}
#endif
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index 1d5d31ea686..dc1404bf8e4 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -107,7 +107,7 @@ static int __init create_setup_data_nodes(struct dentry *parent)
{
struct setup_data_node *node;
struct setup_data *data;
- int error = -ENOMEM;
+ int error;
struct dentry *d;
struct page *pg;
u64 pa_data;
@@ -121,8 +121,10 @@ static int __init create_setup_data_nodes(struct dentry *parent)
while (pa_data) {
node = kmalloc(sizeof(*node), GFP_KERNEL);
- if (!node)
+ if (!node) {
+ error = -ENOMEM;
goto err_dir;
+ }
pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT);
if (PageHighMem(pg)) {
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 1df8fb9e1d5..e498b18f010 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -316,6 +316,11 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
addr &= 1;
if (addr == 0) {
if (val & 0x10) {
+ u8 edge_irr = s->irr & ~s->elcr;
+ int i;
+ bool found;
+ struct kvm_vcpu *vcpu;
+
s->init4 = val & 1;
s->last_irr = 0;
s->irr &= s->elcr;
@@ -333,6 +338,18 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
if (val & 0x08)
pr_pic_unimpl(
"level sensitive irq not supported");
+
+ kvm_for_each_vcpu(i, vcpu, s->pics_state->kvm)
+ if (kvm_apic_accept_pic_intr(vcpu)) {
+ found = true;
+ break;
+ }
+
+
+ if (found)
+ for (irq = 0; irq < PIC_NUM_PINS/2; irq++)
+ if (edge_irr & (1 << irq))
+ pic_clear_isr(s, irq);
} else if (val & 0x08) {
if (val & 0x04)
s->poll = 1;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c39b60707e0..c00f03de1b7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1488,13 +1488,6 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
loadsegment(ds, vmx->host_state.ds_sel);
loadsegment(es, vmx->host_state.es_sel);
}
-#else
- /*
- * The sysexit path does not restore ds/es, so we must set them to
- * a reasonable value ourselves.
- */
- loadsegment(ds, __USER_DS);
- loadsegment(es, __USER_DS);
#endif
reload_tss();
#ifdef CONFIG_X86_64
@@ -6370,6 +6363,19 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
#endif
);
+#ifndef CONFIG_X86_64
+ /*
+ * The sysexit path does not restore ds/es, so we must set them to
+ * a reasonable value ourselves.
+ *
+ * We can't defer this to vmx_load_host_state() since that function
+ * may be executed in interrupt context, which saves and restore segments
+ * around it, nullifying its effect.
+ */
+ loadsegment(ds, __USER_DS);
+ loadsegment(es, __USER_DS);
+#endif
+
vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
| (1 << VCPU_EXREG_RFLAGS)
| (1 << VCPU_EXREG_CPL)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 59b59508ff0..42bce48f692 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -925,6 +925,10 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
*/
getboottime(&boot);
+ if (kvm->arch.kvmclock_offset) {
+ struct timespec ts = ns_to_timespec(kvm->arch.kvmclock_offset);
+ boot = timespec_sub(boot, ts);
+ }
wc.sec = boot.tv_sec;
wc.nsec = boot.tv_nsec;
wc.version = version;
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index f6679a7fb8c..b91e4851242 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -56,9 +56,16 @@ static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
}
/*
- * search for a shareable pmd page for hugetlb.
+ * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
+ * and returns the corresponding pte. While this is not necessary for the
+ * !shared pmd case because we can allocate the pmd later as well, it makes the
+ * code much cleaner. pmd allocation is essential for the shared case because
+ * pud has to be populated inside the same i_mmap_mutex section - otherwise
+ * racing tasks could either miss the sharing (see huge_pte_offset) or select a
+ * bad pmd for sharing.
*/
-static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
+static pte_t *
+huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
{
struct vm_area_struct *vma = find_vma(mm, addr);
struct address_space *mapping = vma->vm_file->f_mapping;
@@ -68,9 +75,10 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
struct vm_area_struct *svma;
unsigned long saddr;
pte_t *spte = NULL;
+ pte_t *pte;
if (!vma_shareable(vma, addr))
- return;
+ return (pte_t *)pmd_alloc(mm, pud, addr);
mutex_lock(&mapping->i_mmap_mutex);
vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) {
@@ -97,7 +105,9 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
put_page(virt_to_page(spte));
spin_unlock(&mm->page_table_lock);
out:
+ pte = (pte_t *)pmd_alloc(mm, pud, addr);
mutex_unlock(&mapping->i_mmap_mutex);
+ return pte;
}
/*
@@ -142,8 +152,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
} else {
BUG_ON(sz != PMD_SIZE);
if (pud_none(*pud))
- huge_pmd_share(mm, addr, pud);
- pte = (pte_t *) pmd_alloc(mm, pud, addr);
+ pte = huge_pmd_share(mm, addr, pud);
+ else
+ pte = (pte_t *)pmd_alloc(mm, pud, addr);
}
}
BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 931930a9616..a718e0d2350 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -919,13 +919,11 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
/*
* On success we use clflush, when the CPU supports it to
- * avoid the wbindv. If the CPU does not support it, in the
- * error case, and during early boot (for EFI) we fall back
- * to cpa_flush_all (which uses wbinvd):
+ * avoid the wbindv. If the CPU does not support it and in the
+ * error case we fall back to cpa_flush_all (which uses
+ * wbindv):
*/
- if (early_boot_irqs_disabled)
- __cpa_flush_all((void *)(long)cache);
- else if (!ret && cpu_has_clflush) {
+ if (!ret && cpu_has_clflush) {
if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
cpa_flush_array(addr, numpages, cache,
cpa.flags, pages);
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 4599c3e8bcb..4ddf497ca65 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -142,23 +142,23 @@ static inline int save_add_info(void) {return 0;}
#endif
/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
-void __init
+int __init
acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
{
u64 start, end;
int node, pxm;
if (srat_disabled())
- return;
+ return -1;
if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
bad_srat();
- return;
+ return -1;
}
if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
- return;
+ return -1;
if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
- return;
+ return -1;
start = ma->base_address;
end = start + ma->length;
pxm = ma->proximity_domain;
@@ -168,12 +168,12 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
if (node < 0) {
printk(KERN_ERR "SRAT: Too many proximity domains.\n");
bad_srat();
- return;
+ return -1;
}
if (numa_add_memblk(node, start, end) < 0) {
bad_srat();
- return;
+ return -1;
}
node_set(node, numa_nodes_parsed);
@@ -181,6 +181,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n",
node, pxm,
(unsigned long long) start, (unsigned long long) end - 1);
+ return 0;
}
void __init acpi_numa_arch_fixup(void) {}
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 2dc29f51e75..92660edaa1e 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -234,7 +234,22 @@ static efi_status_t __init phys_efi_set_virtual_address_map(
return status;
}
-static int efi_set_rtc_mmss(unsigned long nowtime)
+static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
+ efi_time_cap_t *tc)
+{
+ unsigned long flags;
+ efi_status_t status;
+
+ spin_lock_irqsave(&rtc_lock, flags);
+ efi_call_phys_prelog();
+ status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm),
+ virt_to_phys(tc));
+ efi_call_phys_epilog();
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ return status;
+}
+
+int efi_set_rtc_mmss(unsigned long nowtime)
{
int real_seconds, real_minutes;
efi_status_t status;
@@ -263,7 +278,7 @@ static int efi_set_rtc_mmss(unsigned long nowtime)
return 0;
}
-static unsigned long efi_get_time(void)
+unsigned long efi_get_time(void)
{
efi_status_t status;
efi_time_t eft;
@@ -606,13 +621,18 @@ static int __init efi_runtime_init(void)
}
/*
* We will only need *early* access to the following
- * EFI runtime service before set_virtual_address_map
+ * two EFI runtime services before set_virtual_address_map
* is invoked.
*/
+ efi_phys.get_time = (efi_get_time_t *)runtime->get_time;
efi_phys.set_virtual_address_map =
(efi_set_virtual_address_map_t *)
runtime->set_virtual_address_map;
-
+ /*
+ * Make efi_get_time can be called before entering
+ * virtual mode.
+ */
+ efi.get_time = phys_efi_get_time;
early_iounmap(runtime, sizeof(efi_runtime_services_t));
return 0;
@@ -700,10 +720,12 @@ void __init efi_init(void)
efi_enabled = 0;
return;
}
+#ifdef CONFIG_X86_32
if (efi_native) {
x86_platform.get_wallclock = efi_get_time;
x86_platform.set_wallclock = efi_set_rtc_mmss;
}
+#endif
#if EFI_DEBUG
print_efi_memmap();
diff --git a/arch/x86/platform/olpc/olpc-xo1-pm.c b/arch/x86/platform/olpc/olpc-xo1-pm.c
index 0ce8616c88a..d75582d1aa5 100644
--- a/arch/x86/platform/olpc/olpc-xo1-pm.c
+++ b/arch/x86/platform/olpc/olpc-xo1-pm.c
@@ -18,6 +18,7 @@
#include <linux/pm.h>
#include <linux/mfd/core.h>
#include <linux/suspend.h>
+#include <linux/olpc-ec.h>
#include <asm/io.h>
#include <asm/olpc.h>
@@ -51,16 +52,11 @@ EXPORT_SYMBOL_GPL(olpc_xo1_pm_wakeup_clear);
static int xo1_power_state_enter(suspend_state_t pm_state)
{
unsigned long saved_sci_mask;
- int r;
/* Only STR is supported */
if (pm_state != PM_SUSPEND_MEM)
return -EINVAL;
- r = olpc_ec_cmd(EC_SET_SCI_INHIBIT, NULL, 0, NULL, 0);
- if (r)
- return r;
-
/*
* Save SCI mask (this gets lost since PM1_EN is used as a mask for
* wakeup events, which is not necessarily the same event set)
@@ -76,16 +72,6 @@ static int xo1_power_state_enter(suspend_state_t pm_state)
/* Restore SCI mask (using dword access to CS5536_PM1_EN) */
outl(saved_sci_mask, acpi_base + CS5536_PM1_STS);
- /* Tell the EC to stop inhibiting SCIs */
- olpc_ec_cmd(EC_SET_SCI_INHIBIT_RELEASE, NULL, 0, NULL, 0);
-
- /*
- * Tell the wireless module to restart USB communication.
- * Must be done twice.
- */
- olpc_ec_cmd(EC_WAKE_UP_WLAN, NULL, 0, NULL, 0);
- olpc_ec_cmd(EC_WAKE_UP_WLAN, NULL, 0, NULL, 0);
-
return 0;
}
diff --git a/arch/x86/platform/olpc/olpc-xo1-sci.c b/arch/x86/platform/olpc/olpc-xo1-sci.c
index 04b8c73659c..63d4aa40956 100644
--- a/arch/x86/platform/olpc/olpc-xo1-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo1-sci.c
@@ -23,6 +23,7 @@
#include <linux/power_supply.h>
#include <linux/suspend.h>
#include <linux/workqueue.h>
+#include <linux/olpc-ec.h>
#include <asm/io.h>
#include <asm/msr.h>
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c
index 599be499fdf..2fdca25905a 100644
--- a/arch/x86/platform/olpc/olpc-xo15-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo15-sci.c
@@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <linux/workqueue.h>
#include <linux/power_supply.h>
+#include <linux/olpc-ec.h>
#include <acpi/acpi_bus.h>
#include <acpi/acpi_drivers.h>
diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c
index a4bee53c2e5..27376081dde 100644
--- a/arch/x86/platform/olpc/olpc.c
+++ b/arch/x86/platform/olpc/olpc.c
@@ -14,14 +14,13 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/delay.h>
-#include <linux/spinlock.h>
#include <linux/io.h>
#include <linux/string.h>
#include <linux/platform_device.h>
#include <linux/of.h>
#include <linux/syscore_ops.h>
-#include <linux/debugfs.h>
#include <linux/mutex.h>
+#include <linux/olpc-ec.h>
#include <asm/geode.h>
#include <asm/setup.h>
@@ -31,17 +30,6 @@
struct olpc_platform_t olpc_platform_info;
EXPORT_SYMBOL_GPL(olpc_platform_info);
-static DEFINE_SPINLOCK(ec_lock);
-
-/* debugfs interface to EC commands */
-#define EC_MAX_CMD_ARGS (5 + 1) /* cmd byte + 5 args */
-#define EC_MAX_CMD_REPLY (8)
-
-static struct dentry *ec_debugfs_dir;
-static DEFINE_MUTEX(ec_debugfs_cmd_lock);
-static unsigned char ec_debugfs_resp[EC_MAX_CMD_REPLY];
-static unsigned int ec_debugfs_resp_bytes;
-
/* EC event mask to be applied during suspend (defining wakeup sources). */
static u16 ec_wakeup_mask;
@@ -125,16 +113,13 @@ static int __wait_on_obf(unsigned int line, unsigned int port, int desired)
* <http://wiki.laptop.org/go/Ec_specification>. Unfortunately, while
* OpenFirmware's source is available, the EC's is not.
*/
-int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen,
- unsigned char *outbuf, size_t outlen)
+static int olpc_xo1_ec_cmd(u8 cmd, u8 *inbuf, size_t inlen, u8 *outbuf,
+ size_t outlen, void *arg)
{
- unsigned long flags;
int ret = -EIO;
int i;
int restarts = 0;
- spin_lock_irqsave(&ec_lock, flags);
-
/* Clear OBF */
for (i = 0; i < 10 && (obf_status(0x6c) == 1); i++)
inb(0x68);
@@ -198,10 +183,8 @@ restart:
ret = 0;
err:
- spin_unlock_irqrestore(&ec_lock, flags);
return ret;
}
-EXPORT_SYMBOL_GPL(olpc_ec_cmd);
void olpc_ec_wakeup_set(u16 value)
{
@@ -280,96 +263,6 @@ int olpc_ec_sci_query(u16 *sci_value)
}
EXPORT_SYMBOL_GPL(olpc_ec_sci_query);
-static ssize_t ec_debugfs_cmd_write(struct file *file, const char __user *buf,
- size_t size, loff_t *ppos)
-{
- int i, m;
- unsigned char ec_cmd[EC_MAX_CMD_ARGS];
- unsigned int ec_cmd_int[EC_MAX_CMD_ARGS];
- char cmdbuf[64];
- int ec_cmd_bytes;
-
- mutex_lock(&ec_debugfs_cmd_lock);
-
- size = simple_write_to_buffer(cmdbuf, sizeof(cmdbuf), ppos, buf, size);
-
- m = sscanf(cmdbuf, "%x:%u %x %x %x %x %x", &ec_cmd_int[0],
- &ec_debugfs_resp_bytes,
- &ec_cmd_int[1], &ec_cmd_int[2], &ec_cmd_int[3],
- &ec_cmd_int[4], &ec_cmd_int[5]);
- if (m < 2 || ec_debugfs_resp_bytes > EC_MAX_CMD_REPLY) {
- /* reset to prevent overflow on read */
- ec_debugfs_resp_bytes = 0;
-
- printk(KERN_DEBUG "olpc-ec: bad ec cmd: "
- "cmd:response-count [arg1 [arg2 ...]]\n");
- size = -EINVAL;
- goto out;
- }
-
- /* convert scanf'd ints to char */
- ec_cmd_bytes = m - 2;
- for (i = 0; i <= ec_cmd_bytes; i++)
- ec_cmd[i] = ec_cmd_int[i];
-
- printk(KERN_DEBUG "olpc-ec: debugfs cmd 0x%02x with %d args "
- "%02x %02x %02x %02x %02x, want %d returns\n",
- ec_cmd[0], ec_cmd_bytes, ec_cmd[1], ec_cmd[2], ec_cmd[3],
- ec_cmd[4], ec_cmd[5], ec_debugfs_resp_bytes);
-
- olpc_ec_cmd(ec_cmd[0], (ec_cmd_bytes == 0) ? NULL : &ec_cmd[1],
- ec_cmd_bytes, ec_debugfs_resp, ec_debugfs_resp_bytes);
-
- printk(KERN_DEBUG "olpc-ec: response "
- "%02x %02x %02x %02x %02x %02x %02x %02x (%d bytes expected)\n",
- ec_debugfs_resp[0], ec_debugfs_resp[1], ec_debugfs_resp[2],
- ec_debugfs_resp[3], ec_debugfs_resp[4], ec_debugfs_resp[5],
- ec_debugfs_resp[6], ec_debugfs_resp[7], ec_debugfs_resp_bytes);
-
-out:
- mutex_unlock(&ec_debugfs_cmd_lock);
- return size;
-}
-
-static ssize_t ec_debugfs_cmd_read(struct file *file, char __user *buf,
- size_t size, loff_t *ppos)
-{
- unsigned int i, r;
- char *rp;
- char respbuf[64];
-
- mutex_lock(&ec_debugfs_cmd_lock);
- rp = respbuf;
- rp += sprintf(rp, "%02x", ec_debugfs_resp[0]);
- for (i = 1; i < ec_debugfs_resp_bytes; i++)
- rp += sprintf(rp, ", %02x", ec_debugfs_resp[i]);
- mutex_unlock(&ec_debugfs_cmd_lock);
- rp += sprintf(rp, "\n");
-
- r = rp - respbuf;
- return simple_read_from_buffer(buf, size, ppos, respbuf, r);
-}
-
-static const struct file_operations ec_debugfs_genops = {
- .write = ec_debugfs_cmd_write,
- .read = ec_debugfs_cmd_read,
-};
-
-static void setup_debugfs(void)
-{
- ec_debugfs_dir = debugfs_create_dir("olpc-ec", 0);
- if (ec_debugfs_dir == ERR_PTR(-ENODEV))
- return;
-
- debugfs_create_file("cmd", 0600, ec_debugfs_dir, NULL,
- &ec_debugfs_genops);
-}
-
-static int olpc_ec_suspend(void)
-{
- return olpc_ec_mask_write(ec_wakeup_mask);
-}
-
static bool __init check_ofw_architecture(struct device_node *root)
{
const char *olpc_arch;
@@ -424,8 +317,59 @@ static int __init add_xo1_platform_devices(void)
return 0;
}
-static struct syscore_ops olpc_syscore_ops = {
- .suspend = olpc_ec_suspend,
+static int olpc_xo1_ec_probe(struct platform_device *pdev)
+{
+ /* get the EC revision */
+ olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0,
+ (unsigned char *) &olpc_platform_info.ecver, 1);
+
+ /* EC version 0x5f adds support for wide SCI mask */
+ if (olpc_platform_info.ecver >= 0x5f)
+ olpc_platform_info.flags |= OLPC_F_EC_WIDE_SCI;
+
+ pr_info("OLPC board revision %s%X (EC=%x)\n",
+ ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "",
+ olpc_platform_info.boardrev >> 4,
+ olpc_platform_info.ecver);
+
+ return 0;
+}
+static int olpc_xo1_ec_suspend(struct platform_device *pdev)
+{
+ olpc_ec_mask_write(ec_wakeup_mask);
+
+ /*
+ * Squelch SCIs while suspended. This is a fix for
+ * <http://dev.laptop.org/ticket/1835>.
+ */
+ return olpc_ec_cmd(EC_SET_SCI_INHIBIT, NULL, 0, NULL, 0);
+}
+
+static int olpc_xo1_ec_resume(struct platform_device *pdev)
+{
+ /* Tell the EC to stop inhibiting SCIs */
+ olpc_ec_cmd(EC_SET_SCI_INHIBIT_RELEASE, NULL, 0, NULL, 0);
+
+ /*
+ * Tell the wireless module to restart USB communication.
+ * Must be done twice.
+ */
+ olpc_ec_cmd(EC_WAKE_UP_WLAN, NULL, 0, NULL, 0);
+ olpc_ec_cmd(EC_WAKE_UP_WLAN, NULL, 0, NULL, 0);
+
+ return 0;
+}
+
+static struct olpc_ec_driver ec_xo1_driver = {
+ .probe = olpc_xo1_ec_probe,
+ .suspend = olpc_xo1_ec_suspend,
+ .resume = olpc_xo1_ec_resume,
+ .ec_cmd = olpc_xo1_ec_cmd,
+};
+
+static struct olpc_ec_driver ec_xo1_5_driver = {
+ .probe = olpc_xo1_ec_probe,
+ .ec_cmd = olpc_xo1_ec_cmd,
};
static int __init olpc_init(void)
@@ -435,16 +379,17 @@ static int __init olpc_init(void)
if (!olpc_ofw_present() || !platform_detect())
return 0;
- spin_lock_init(&ec_lock);
+ /* register the XO-1 and 1.5-specific EC handler */
+ if (olpc_platform_info.boardrev < olpc_board_pre(0xd0)) /* XO-1 */
+ olpc_ec_driver_register(&ec_xo1_driver, NULL);
+ else
+ olpc_ec_driver_register(&ec_xo1_5_driver, NULL);
+ platform_device_register_simple("olpc-ec", -1, NULL, 0);
/* assume B1 and above models always have a DCON */
if (olpc_board_at_least(olpc_board(0xb1)))
olpc_platform_info.flags |= OLPC_F_DCON;
- /* get the EC revision */
- olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0,
- (unsigned char *) &olpc_platform_info.ecver, 1);
-
#ifdef CONFIG_PCI_OLPC
/* If the VSA exists let it emulate PCI, if not emulate in kernel.
* XO-1 only. */
@@ -452,14 +397,6 @@ static int __init olpc_init(void)
!cs5535_has_vsa2())
x86_init.pci.arch_init = pci_olpc_init;
#endif
- /* EC version 0x5f adds support for wide SCI mask */
- if (olpc_platform_info.ecver >= 0x5f)
- olpc_platform_info.flags |= OLPC_F_EC_WIDE_SCI;
-
- printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n",
- ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "",
- olpc_platform_info.boardrev >> 4,
- olpc_platform_info.ecver);
if (olpc_platform_info.boardrev < olpc_board_pre(0xd0)) { /* XO-1 */
r = add_xo1_platform_devices();
@@ -467,9 +404,6 @@ static int __init olpc_init(void)
return r;
}
- register_syscore_ops(&olpc_syscore_ops);
- setup_debugfs();
-
return 0;
}
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
index b2d534cab25..88692871823 100644
--- a/arch/x86/realmode/rm/Makefile
+++ b/arch/x86/realmode/rm/Makefile
@@ -72,7 +72,7 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -m32 -g -Os -D_SETUP -D__KERNEL__ -D_WAKEUP \
-Wall -Wstrict-prototypes \
-march=i386 -mregparm=3 \
-include $(srctree)/$(src)/../../boot/code16gcc.h \
- -fno-strict-aliasing -fomit-frame-pointer \
+ -fno-strict-aliasing -fomit-frame-pointer -fno-pic \
$(call cc-option, -ffreestanding) \
$(call cc-option, -fno-toplevel-reorder,\
$(call cc-option, -fno-unit-at-a-time)) \
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 51171aeff0d..a582bfed95b 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -60,8 +60,8 @@
51 common getsockname sys_getsockname
52 common getpeername sys_getpeername
53 common socketpair sys_socketpair
-54 common setsockopt sys_setsockopt
-55 common getsockopt sys_getsockopt
+54 64 setsockopt sys_setsockopt
+55 64 getsockopt sys_getsockopt
56 common clone stub_clone
57 common fork stub_fork
58 common vfork stub_vfork
@@ -318,7 +318,7 @@
309 common getcpu sys_getcpu
310 64 process_vm_readv sys_process_vm_readv
311 64 process_vm_writev sys_process_vm_writev
-312 64 kcmp sys_kcmp
+312 common kcmp sys_kcmp
#
# x32-specific system call numbers start at 512 to avoid cache impact
@@ -353,3 +353,5 @@
538 x32 sendmmsg compat_sys_sendmmsg
539 x32 process_vm_readv compat_sys_process_vm_readv
540 x32 process_vm_writev compat_sys_process_vm_writev
+541 x32 setsockopt compat_sys_setsockopt
+542 x32 getsockopt compat_sys_getsockopt
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 64effdc6da9..b2e91d40a4c 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -194,6 +194,11 @@ RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID
* boundary violation will require three middle nodes. */
RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3);
+/* When we populate back during bootup, the amount of pages can vary. The
+ * max we have is seen is 395979, but that does not mean it can't be more.
+ * But some machines can have 3GB I/O holes even. So lets reserve enough
+ * for 4GB of I/O and E820 holes. */
+RESERVE_BRK(p2m_populated, PMD_SIZE * 4);
static inline unsigned p2m_top_index(unsigned long pfn)
{
BUG_ON(pfn >= MAX_P2M_PFN);