summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/acpi/boot.c6
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c209
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c8
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c2
-rw-r--r--arch/x86/kernel/crash.c32
-rw-r--r--arch/x86/kernel/kvm.c20
-rw-r--r--arch/x86/kernel/kvmclock.c88
-rw-r--r--arch/x86/kernel/pvclock.c143
-rw-r--r--arch/x86/kernel/setup.c8
-rw-r--r--arch/x86/kernel/tboot.c78
-rw-r--r--arch/x86/kernel/vm86_32.c2
14 files changed, 346 insertions, 258 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index e48cafcf92a..bacf4b0d91f 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1706,3 +1706,9 @@ int __acpi_release_global_lock(unsigned int *lock)
} while (unlikely (val != old));
return old & 0x1;
}
+
+void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size)
+{
+ e820_add_region(addr, size, E820_ACPI);
+ update_e820();
+}
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index a65829ac2b9..9c2aa89a11c 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -22,6 +22,7 @@
#include <linux/hardirq.h>
#include <linux/delay.h>
+#include <asm/numachip/numachip.h>
#include <asm/numachip/numachip_csr.h>
#include <asm/smp.h>
#include <asm/apic.h>
@@ -179,6 +180,7 @@ static int __init numachip_system_init(void)
return 0;
x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
+ x86_init.pci.arch_init = pci_numachip_init;
map_csrs();
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 6a05c1d327a..5b7d4fa5d3b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -24,8 +24,6 @@ struct mce_bank {
int mce_severity(struct mce *a, int tolerant, char **msg);
struct dentry *mce_get_debugfs_dir(void);
-extern int mce_ser;
-
extern struct mce_bank *mce_banks;
#ifdef CONFIG_X86_MCE_INTEL
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 13017626f9a..beb1f1689e5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -193,9 +193,9 @@ int mce_severity(struct mce *m, int tolerant, char **msg)
continue;
if ((m->mcgstatus & s->mcgmask) != s->mcgres)
continue;
- if (s->ser == SER_REQUIRED && !mce_ser)
+ if (s->ser == SER_REQUIRED && !mca_cfg.ser)
continue;
- if (s->ser == NO_SER && mce_ser)
+ if (s->ser == NO_SER && mca_cfg.ser)
continue;
if (s->context && ctx != s->context)
continue;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 46cbf868969..80dbda84f1c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -58,34 +58,26 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex);
#define CREATE_TRACE_POINTS
#include <trace/events/mce.h>
-int mce_disabled __read_mostly;
-
#define SPINUNIT 100 /* 100ns */
atomic_t mce_entry;
DEFINE_PER_CPU(unsigned, mce_exception_count);
-/*
- * Tolerant levels:
- * 0: always panic on uncorrected errors, log corrected errors
- * 1: panic or SIGBUS on uncorrected errors, log corrected errors
- * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors
- * 3: never panic or SIGBUS, log all errors (for testing only)
- */
-static int tolerant __read_mostly = 1;
-static int banks __read_mostly;
-static int rip_msr __read_mostly;
-static int mce_bootlog __read_mostly = -1;
-static int monarch_timeout __read_mostly = -1;
-static int mce_panic_timeout __read_mostly;
-static int mce_dont_log_ce __read_mostly;
-int mce_cmci_disabled __read_mostly;
-int mce_ignore_ce __read_mostly;
-int mce_ser __read_mostly;
-int mce_bios_cmci_threshold __read_mostly;
-
-struct mce_bank *mce_banks __read_mostly;
+struct mce_bank *mce_banks __read_mostly;
+
+struct mca_config mca_cfg __read_mostly = {
+ .bootlog = -1,
+ /*
+ * Tolerant levels:
+ * 0: always panic on uncorrected errors, log corrected errors
+ * 1: panic or SIGBUS on uncorrected errors, log corrected errors
+ * 2: SIGBUS or log uncorrected errors (if possible), log corr. errors
+ * 3: never panic or SIGBUS, log all errors (for testing only)
+ */
+ .tolerant = 1,
+ .monarch_timeout = -1
+};
/* User mode helper program triggered by machine check event */
static unsigned long mce_need_notify;
@@ -302,7 +294,7 @@ static void wait_for_panic(void)
while (timeout-- > 0)
udelay(1);
if (panic_timeout == 0)
- panic_timeout = mce_panic_timeout;
+ panic_timeout = mca_cfg.panic_timeout;
panic("Panicing machine check CPU died");
}
@@ -360,7 +352,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
pr_emerg(HW_ERR "Machine check: %s\n", exp);
if (!fake_panic) {
if (panic_timeout == 0)
- panic_timeout = mce_panic_timeout;
+ panic_timeout = mca_cfg.panic_timeout;
panic(msg);
} else
pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg);
@@ -372,7 +364,7 @@ static int msr_to_offset(u32 msr)
{
unsigned bank = __this_cpu_read(injectm.bank);
- if (msr == rip_msr)
+ if (msr == mca_cfg.rip_msr)
return offsetof(struct mce, ip);
if (msr == MSR_IA32_MCx_STATUS(bank))
return offsetof(struct mce, status);
@@ -451,8 +443,8 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
m->cs |= 3;
}
/* Use accurate RIP reporting if available. */
- if (rip_msr)
- m->ip = mce_rdmsrl(rip_msr);
+ if (mca_cfg.rip_msr)
+ m->ip = mce_rdmsrl(mca_cfg.rip_msr);
}
}
@@ -513,7 +505,7 @@ static int mce_ring_add(unsigned long pfn)
int mce_available(struct cpuinfo_x86 *c)
{
- if (mce_disabled)
+ if (mca_cfg.disabled)
return 0;
return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
}
@@ -565,7 +557,7 @@ static void mce_read_aux(struct mce *m, int i)
/*
* Mask the reported address by the reported granularity.
*/
- if (mce_ser && (m->status & MCI_STATUS_MISCV)) {
+ if (mca_cfg.ser && (m->status & MCI_STATUS_MISCV)) {
u8 shift = MCI_MISC_ADDR_LSB(m->misc);
m->addr >>= shift;
m->addr <<= shift;
@@ -599,7 +591,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
mce_gather_info(&m, NULL);
- for (i = 0; i < banks; i++) {
+ for (i = 0; i < mca_cfg.banks; i++) {
if (!mce_banks[i].ctl || !test_bit(i, *b))
continue;
@@ -620,7 +612,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
* TBD do the same check for MCI_STATUS_EN here?
*/
if (!(flags & MCP_UC) &&
- (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)))
+ (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC)))
continue;
mce_read_aux(&m, i);
@@ -631,7 +623,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
* Don't get the IP here because it's unlikely to
* have anything to do with the actual error location.
*/
- if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce)
+ if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce)
mce_log(&m);
/*
@@ -658,14 +650,14 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
{
int i, ret = 0;
- for (i = 0; i < banks; i++) {
+ for (i = 0; i < mca_cfg.banks; i++) {
m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
if (m->status & MCI_STATUS_VAL) {
__set_bit(i, validp);
if (quirk_no_way_out)
quirk_no_way_out(i, m, regs);
}
- if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
+ if (mce_severity(m, mca_cfg.tolerant, msg) >= MCE_PANIC_SEVERITY)
ret = 1;
}
return ret;
@@ -696,11 +688,11 @@ static int mce_timed_out(u64 *t)
rmb();
if (atomic_read(&mce_paniced))
wait_for_panic();
- if (!monarch_timeout)
+ if (!mca_cfg.monarch_timeout)
goto out;
if ((s64)*t < SPINUNIT) {
/* CHECKME: Make panic default for 1 too? */
- if (tolerant < 1)
+ if (mca_cfg.tolerant < 1)
mce_panic("Timeout synchronizing machine check over CPUs",
NULL, NULL);
cpu_missing = 1;
@@ -750,7 +742,8 @@ static void mce_reign(void)
* Grade the severity of the errors of all the CPUs.
*/
for_each_possible_cpu(cpu) {
- int severity = mce_severity(&per_cpu(mces_seen, cpu), tolerant,
+ int severity = mce_severity(&per_cpu(mces_seen, cpu),
+ mca_cfg.tolerant,
&nmsg);
if (severity > global_worst) {
msg = nmsg;
@@ -764,7 +757,7 @@ static void mce_reign(void)
* This dumps all the mces in the log buffer and stops the
* other CPUs.
*/
- if (m && global_worst >= MCE_PANIC_SEVERITY && tolerant < 3)
+ if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
mce_panic("Fatal Machine check", m, msg);
/*
@@ -777,7 +770,7 @@ static void mce_reign(void)
* No machine check event found. Must be some external
* source or one CPU is hung. Panic.
*/
- if (global_worst <= MCE_KEEP_SEVERITY && tolerant < 3)
+ if (global_worst <= MCE_KEEP_SEVERITY && mca_cfg.tolerant < 3)
mce_panic("Machine check from unknown source", NULL, NULL);
/*
@@ -801,7 +794,7 @@ static int mce_start(int *no_way_out)
{
int order;
int cpus = num_online_cpus();
- u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC;
+ u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC;
if (!timeout)
return -1;
@@ -865,7 +858,7 @@ static int mce_start(int *no_way_out)
static int mce_end(int order)
{
int ret = -1;
- u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC;
+ u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC;
if (!timeout)
goto reset;
@@ -946,7 +939,7 @@ static void mce_clear_state(unsigned long *toclear)
{
int i;
- for (i = 0; i < banks; i++) {
+ for (i = 0; i < mca_cfg.banks; i++) {
if (test_bit(i, toclear))
mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
}
@@ -1011,6 +1004,7 @@ static void mce_clear_info(struct mce_info *mi)
*/
void do_machine_check(struct pt_regs *regs, long error_code)
{
+ struct mca_config *cfg = &mca_cfg;
struct mce m, *final;
int i;
int worst = 0;
@@ -1022,7 +1016,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
int order;
/*
* If no_way_out gets set, there is no safe way to recover from this
- * MCE. If tolerant is cranked up, we'll try anyway.
+ * MCE. If mca_cfg.tolerant is cranked up, we'll try anyway.
*/
int no_way_out = 0;
/*
@@ -1038,7 +1032,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
this_cpu_inc(mce_exception_count);
- if (!banks)
+ if (!cfg->banks)
goto out;
mce_gather_info(&m, regs);
@@ -1065,7 +1059,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
* because the first one to see it will clear it.
*/
order = mce_start(&no_way_out);
- for (i = 0; i < banks; i++) {
+ for (i = 0; i < cfg->banks; i++) {
__clear_bit(i, toclear);
if (!test_bit(i, valid_banks))
continue;
@@ -1084,7 +1078,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
* Non uncorrected or non signaled errors are handled by
* machine_check_poll. Leave them alone, unless this panics.
*/
- if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
+ if (!(m.status & (cfg->ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
!no_way_out)
continue;
@@ -1093,7 +1087,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
*/
add_taint(TAINT_MACHINE_CHECK);
- severity = mce_severity(&m, tolerant, NULL);
+ severity = mce_severity(&m, cfg->tolerant, NULL);
/*
* When machine check was for corrected handler don't touch,
@@ -1117,7 +1111,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
* When the ring overflows we just ignore the AO error.
* RED-PEN add some logging mechanism when
* usable_address or mce_add_ring fails.
- * RED-PEN don't ignore overflow for tolerant == 0
+ * RED-PEN don't ignore overflow for mca_cfg.tolerant == 0
*/
if (severity == MCE_AO_SEVERITY && mce_usable_address(&m))
mce_ring_add(m.addr >> PAGE_SHIFT);
@@ -1149,7 +1143,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
* issues we try to recover, or limit damage to the current
* process.
*/
- if (tolerant < 3) {
+ if (cfg->tolerant < 3) {
if (no_way_out)
mce_panic("Fatal machine check on current CPU", &m, msg);
if (worst == MCE_AR_SEVERITY) {
@@ -1377,11 +1371,13 @@ EXPORT_SYMBOL_GPL(mce_notify_irq);
static int __cpuinit __mcheck_cpu_mce_banks_init(void)
{
int i;
+ u8 num_banks = mca_cfg.banks;
- mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL);
+ mce_banks = kzalloc(num_banks * sizeof(struct mce_bank), GFP_KERNEL);
if (!mce_banks)
return -ENOMEM;
- for (i = 0; i < banks; i++) {
+
+ for (i = 0; i < num_banks; i++) {
struct mce_bank *b = &mce_banks[i];
b->ctl = -1ULL;
@@ -1401,7 +1397,7 @@ static int __cpuinit __mcheck_cpu_cap_init(void)
rdmsrl(MSR_IA32_MCG_CAP, cap);
b = cap & MCG_BANKCNT_MASK;
- if (!banks)
+ if (!mca_cfg.banks)
pr_info("CPU supports %d MCE banks\n", b);
if (b > MAX_NR_BANKS) {
@@ -1411,8 +1407,9 @@ static int __cpuinit __mcheck_cpu_cap_init(void)
}
/* Don't support asymmetric configurations today */
- WARN_ON(banks != 0 && b != banks);
- banks = b;
+ WARN_ON(mca_cfg.banks != 0 && b != mca_cfg.banks);
+ mca_cfg.banks = b;
+
if (!mce_banks) {
int err = __mcheck_cpu_mce_banks_init();
@@ -1422,25 +1419,29 @@ static int __cpuinit __mcheck_cpu_cap_init(void)
/* Use accurate RIP reporting if available. */
if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
- rip_msr = MSR_IA32_MCG_EIP;
+ mca_cfg.rip_msr = MSR_IA32_MCG_EIP;
if (cap & MCG_SER_P)
- mce_ser = 1;
+ mca_cfg.ser = true;
return 0;
}
static void __mcheck_cpu_init_generic(void)
{
+ enum mcp_flags m_fl = 0;
mce_banks_t all_banks;
u64 cap;
int i;
+ if (!mca_cfg.bootlog)
+ m_fl = MCP_DONTLOG;
+
/*
* Log the machine checks left over from the previous reset.
*/
bitmap_fill(all_banks, MAX_NR_BANKS);
- machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks);
+ machine_check_poll(MCP_UC | m_fl, &all_banks);
set_in_cr4(X86_CR4_MCE);
@@ -1448,7 +1449,7 @@ static void __mcheck_cpu_init_generic(void)
if (cap & MCG_CTL_P)
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
- for (i = 0; i < banks; i++) {
+ for (i = 0; i < mca_cfg.banks; i++) {
struct mce_bank *b = &mce_banks[i];
if (!b->init)
@@ -1489,6 +1490,8 @@ static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
/* Add per CPU specific workarounds here */
static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
{
+ struct mca_config *cfg = &mca_cfg;
+
if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
pr_info("unknown CPU type - not enabling MCE support\n");
return -EOPNOTSUPP;
@@ -1496,7 +1499,7 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
/* This should be disabled by the BIOS, but isn't always */
if (c->x86_vendor == X86_VENDOR_AMD) {
- if (c->x86 == 15 && banks > 4) {
+ if (c->x86 == 15 && cfg->banks > 4) {
/*
* disable GART TBL walk error reporting, which
* trips off incorrectly with the IOMMU & 3ware
@@ -1504,18 +1507,18 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
*/
clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
}
- if (c->x86 <= 17 && mce_bootlog < 0) {
+ if (c->x86 <= 17 && cfg->bootlog < 0) {
/*
* Lots of broken BIOS around that don't clear them
* by default and leave crap in there. Don't log:
*/
- mce_bootlog = 0;
+ cfg->bootlog = 0;
}
/*
* Various K7s with broken bank 0 around. Always disable
* by default.
*/
- if (c->x86 == 6 && banks > 0)
+ if (c->x86 == 6 && cfg->banks > 0)
mce_banks[0].ctl = 0;
/*
@@ -1566,7 +1569,7 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
* valid event later, merely don't write CTL0.
*/
- if (c->x86 == 6 && c->x86_model < 0x1A && banks > 0)
+ if (c->x86 == 6 && c->x86_model < 0x1A && cfg->banks > 0)
mce_banks[0].init = 0;
/*
@@ -1574,23 +1577,23 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
* synchronization with a one second timeout.
*/
if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) &&
- monarch_timeout < 0)
- monarch_timeout = USEC_PER_SEC;
+ cfg->monarch_timeout < 0)
+ cfg->monarch_timeout = USEC_PER_SEC;
/*
* There are also broken BIOSes on some Pentium M and
* earlier systems:
*/
- if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0)
- mce_bootlog = 0;
+ if (c->x86 == 6 && c->x86_model <= 13 && cfg->bootlog < 0)
+ cfg->bootlog = 0;
if (c->x86 == 6 && c->x86_model == 45)
quirk_no_way_out = quirk_sandybridge_ifu;
}
- if (monarch_timeout < 0)
- monarch_timeout = 0;
- if (mce_bootlog != 0)
- mce_panic_timeout = 30;
+ if (cfg->monarch_timeout < 0)
+ cfg->monarch_timeout = 0;
+ if (cfg->bootlog != 0)
+ cfg->panic_timeout = 30;
return 0;
}
@@ -1635,7 +1638,7 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t)
__this_cpu_write(mce_next_interval, iv);
- if (mce_ignore_ce || !iv)
+ if (mca_cfg.ignore_ce || !iv)
return;
t->expires = round_jiffies(jiffies + iv);
@@ -1668,7 +1671,7 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) =
*/
void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
{
- if (mce_disabled)
+ if (mca_cfg.disabled)
return;
if (__mcheck_cpu_ancient_init(c))
@@ -1678,7 +1681,7 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
return;
if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
- mce_disabled = 1;
+ mca_cfg.disabled = true;
return;
}
@@ -1951,6 +1954,8 @@ static struct miscdevice mce_chrdev_device = {
*/
static int __init mcheck_enable(char *str)
{
+ struct mca_config *cfg = &mca_cfg;
+
if (*str == 0) {
enable_p5_mce();
return 1;
@@ -1958,22 +1963,22 @@ static int __init mcheck_enable(char *str)
if (*str == '=')
str++;
if (!strcmp(str, "off"))
- mce_disabled = 1;
+ cfg->disabled = true;
else if (!strcmp(str, "no_cmci"))
- mce_cmci_disabled = 1;
+ cfg->cmci_disabled = true;
else if (!strcmp(str, "dont_log_ce"))
- mce_dont_log_ce = 1;
+ cfg->dont_log_ce = true;
else if (!strcmp(str, "ignore_ce"))
- mce_ignore_ce = 1;
+ cfg->ignore_ce = true;
else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
- mce_bootlog = (str[0] == 'b');
+ cfg->bootlog = (str[0] == 'b');
else if (!strcmp(str, "bios_cmci_threshold"))
- mce_bios_cmci_threshold = 1;
+ cfg->bios_cmci_threshold = true;
else if (isdigit(str[0])) {
- get_option(&str, &tolerant);
+ get_option(&str, &(cfg->tolerant));
if (*str == ',') {
++str;
- get_option(&str, &monarch_timeout);
+ get_option(&str, &(cfg->monarch_timeout));
}
} else {
pr_info("mce argument %s ignored. Please use /sys\n", str);
@@ -2002,7 +2007,7 @@ static int mce_disable_error_reporting(void)
{
int i;
- for (i = 0; i < banks; i++) {
+ for (i = 0; i < mca_cfg.banks; i++) {
struct mce_bank *b = &mce_banks[i];
if (b->init)
@@ -2142,15 +2147,15 @@ static ssize_t set_ignore_ce(struct device *s,
if (strict_strtoull(buf, 0, &new) < 0)
return -EINVAL;
- if (mce_ignore_ce ^ !!new) {
+ if (mca_cfg.ignore_ce ^ !!new) {
if (new) {
/* disable ce features */
mce_timer_delete_all();
on_each_cpu(mce_disable_cmci, NULL, 1);
- mce_ignore_ce = 1;
+ mca_cfg.ignore_ce = true;
} else {
/* enable ce features */
- mce_ignore_ce = 0;
+ mca_cfg.ignore_ce = false;
on_each_cpu(mce_enable_ce, (void *)1, 1);
}
}
@@ -2166,14 +2171,14 @@ static ssize_t set_cmci_disabled(struct device *s,
if (strict_strtoull(buf, 0, &new) < 0)
return -EINVAL;
- if (mce_cmci_disabled ^ !!new) {
+ if (mca_cfg.cmci_disabled ^ !!new) {
if (new) {
/* disable cmci */
on_each_cpu(mce_disable_cmci, NULL, 1);
- mce_cmci_disabled = 1;
+ mca_cfg.cmci_disabled = true;
} else {
/* enable cmci */
- mce_cmci_disabled = 0;
+ mca_cfg.cmci_disabled = false;
on_each_cpu(mce_enable_ce, NULL, 1);
}
}
@@ -2190,9 +2195,9 @@ static ssize_t store_int_with_restart(struct device *s,
}
static DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger);
-static DEVICE_INT_ATTR(tolerant, 0644, tolerant);
-static DEVICE_INT_ATTR(monarch_timeout, 0644, monarch_timeout);
-static DEVICE_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce);
+static DEVICE_INT_ATTR(tolerant, 0644, mca_cfg.tolerant);
+static DEVICE_INT_ATTR(monarch_timeout, 0644, mca_cfg.monarch_timeout);
+static DEVICE_BOOL_ATTR(dont_log_ce, 0644, mca_cfg.dont_log_ce);
static struct dev_ext_attribute dev_attr_check_interval = {
__ATTR(check_interval, 0644, device_show_int, store_int_with_restart),
@@ -2200,13 +2205,13 @@ static struct dev_ext_attribute dev_attr_check_interval = {
};
static struct dev_ext_attribute dev_attr_ignore_ce = {
- __ATTR(ignore_ce, 0644, device_show_int, set_ignore_ce),
- &mce_ignore_ce
+ __ATTR(ignore_ce, 0644, device_show_bool, set_ignore_ce),
+ &mca_cfg.ignore_ce
};
static struct dev_ext_attribute dev_attr_cmci_disabled = {
- __ATTR(cmci_disabled, 0644, device_show_int, set_cmci_disabled),
- &mce_cmci_disabled
+ __ATTR(cmci_disabled, 0644, device_show_bool, set_cmci_disabled),
+ &mca_cfg.cmci_disabled
};
static struct device_attribute *mce_device_attrs[] = {
@@ -2253,7 +2258,7 @@ static __cpuinit int mce_device_create(unsigned int cpu)
if (err)
goto error;
}
- for (j = 0; j < banks; j++) {
+ for (j = 0; j < mca_cfg.banks; j++) {
err = device_create_file(dev, &mce_banks[j].attr);
if (err)
goto error2;
@@ -2285,7 +2290,7 @@ static __cpuinit void mce_device_remove(unsigned int cpu)
for (i = 0; mce_device_attrs[i]; i++)
device_remove_file(dev, mce_device_attrs[i]);
- for (i = 0; i < banks; i++)
+ for (i = 0; i < mca_cfg.banks; i++)
device_remove_file(dev, &mce_banks[i].attr);
device_unregister(dev);
@@ -2304,7 +2309,7 @@ static void __cpuinit mce_disable_cpu(void *h)
if (!(action & CPU_TASKS_FROZEN))
cmci_clear();
- for (i = 0; i < banks; i++) {
+ for (i = 0; i < mca_cfg.banks; i++) {
struct mce_bank *b = &mce_banks[i];
if (b->init)
@@ -2322,7 +2327,7 @@ static void __cpuinit mce_reenable_cpu(void *h)
if (!(action & CPU_TASKS_FROZEN))
cmci_reenable();
- for (i = 0; i < banks; i++) {
+ for (i = 0; i < mca_cfg.banks; i++) {
struct mce_bank *b = &mce_banks[i];
if (b->init)
@@ -2375,7 +2380,7 @@ static __init void mce_init_banks(void)
{
int i;
- for (i = 0; i < banks; i++) {
+ for (i = 0; i < mca_cfg.banks; i++) {
struct mce_bank *b = &mce_banks[i];
struct device_attribute *a = &b->attr;
@@ -2426,7 +2431,7 @@ device_initcall_sync(mcheck_init_device);
*/
static int __init mcheck_disable(char *str)
{
- mce_disabled = 1;
+ mca_cfg.disabled = true;
return 1;
}
__setup("nomce", mcheck_disable);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 4f9a3cbfc4a..402c454fbff 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -53,7 +53,7 @@ static int cmci_supported(int *banks)
{
u64 cap;
- if (mce_cmci_disabled || mce_ignore_ce)
+ if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce)
return 0;
/*
@@ -200,7 +200,7 @@ static void cmci_discover(int banks)
continue;
}
- if (!mce_bios_cmci_threshold) {
+ if (!mca_cfg.bios_cmci_threshold) {
val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
val |= CMCI_THRESHOLD;
} else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
@@ -227,7 +227,7 @@ static void cmci_discover(int banks)
* set the thresholds properly or does not work with
* this boot option. Note down now and report later.
*/
- if (mce_bios_cmci_threshold && bios_zero_thresh &&
+ if (mca_cfg.bios_cmci_threshold && bios_zero_thresh &&
(val & MCI_CTL2_CMCI_THRESHOLD_MASK))
bios_wrong_thresh = 1;
} else {
@@ -235,7 +235,7 @@ static void cmci_discover(int banks)
}
}
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
- if (mce_bios_cmci_threshold && bios_wrong_thresh) {
+ if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
pr_info_once(
"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
pr_info_once(
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index e4c1a418453..726bf963c22 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -606,7 +606,7 @@ void __init mtrr_bp_init(void)
/*
* This is an AMD specific MSR, but we assume(hope?) that
- * Intel will implement it to when they extend the address
+ * Intel will implement it too when they extend the address
* bus of the Xeon.
*/
if (cpuid_eax(0x80000000) >= 0x80000008) {
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 13ad89971d4..74467feb4dc 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -16,6 +16,7 @@
#include <linux/delay.h>
#include <linux/elf.h>
#include <linux/elfcore.h>
+#include <linux/module.h>
#include <asm/processor.h>
#include <asm/hardirq.h>
@@ -30,6 +31,27 @@
int in_crash_kexec;
+/*
+ * This is used to VMCLEAR all VMCSs loaded on the
+ * processor. And when loading kvm_intel module, the
+ * callback function pointer will be assigned.
+ *
+ * protected by rcu.
+ */
+crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
+EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
+
+static inline void cpu_crash_vmclear_loaded_vmcss(void)
+{
+ crash_vmclear_fn *do_vmclear_operation = NULL;
+
+ rcu_read_lock();
+ do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss);
+ if (do_vmclear_operation)
+ do_vmclear_operation();
+ rcu_read_unlock();
+}
+
#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
@@ -46,6 +68,11 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
#endif
crash_save_cpu(regs, cpu);
+ /*
+ * VMCLEAR VMCSs loaded on all cpus if needed.
+ */
+ cpu_crash_vmclear_loaded_vmcss();
+
/* Disable VMX or SVM if needed.
*
* We need to disable virtualization on all CPUs.
@@ -88,6 +115,11 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
kdump_nmi_shootdown_cpus();
+ /*
+ * VMCLEAR VMCSs loaded on this cpu if needed.
+ */
+ cpu_crash_vmclear_loaded_vmcss();
+
/* Booting kdump kernel with VMX or SVM enabled won't work,
* because (among other limitations) we can't disable paging
* with the virt flags.
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 4180a874c76..08b973f6403 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -42,6 +42,7 @@
#include <asm/apic.h>
#include <asm/apicdef.h>
#include <asm/hypervisor.h>
+#include <asm/kvm_guest.h>
static int kvmapf = 1;
@@ -62,6 +63,15 @@ static int parse_no_stealacc(char *arg)
early_param("no-steal-acc", parse_no_stealacc);
+static int kvmclock_vsyscall = 1;
+static int parse_no_kvmclock_vsyscall(char *arg)
+{
+ kvmclock_vsyscall = 0;
+ return 0;
+}
+
+early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
+
static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
static int has_steal_clock = 0;
@@ -110,11 +120,6 @@ void kvm_async_pf_task_wait(u32 token)
struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
struct kvm_task_sleep_node n, *e;
DEFINE_WAIT(wait);
- int cpu, idle;
-
- cpu = get_cpu();
- idle = idle_cpu(cpu);
- put_cpu();
spin_lock(&b->lock);
e = _find_apf_task(b, token);
@@ -128,7 +133,7 @@ void kvm_async_pf_task_wait(u32 token)
n.token = token;
n.cpu = smp_processor_id();
- n.halted = idle || preempt_count() > 1;
+ n.halted = is_idle_task(current) || preempt_count() > 1;
init_waitqueue_head(&n.wq);
hlist_add_head(&n.link, &b->list);
spin_unlock(&b->lock);
@@ -471,6 +476,9 @@ void __init kvm_guest_init(void)
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
apic_set_eoi_write(kvm_guest_apic_eoi_write);
+ if (kvmclock_vsyscall)
+ kvm_setup_vsyscall_timeinfo();
+
#ifdef CONFIG_SMP
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
register_cpu_notifier(&kvm_cpu_notifier);
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index f1b42b3a186..220a360010f 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -23,6 +23,7 @@
#include <asm/apic.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
+#include <linux/memblock.h>
#include <asm/x86_init.h>
#include <asm/reboot.h>
@@ -39,7 +40,7 @@ static int parse_no_kvmclock(char *arg)
early_param("no-kvmclock", parse_no_kvmclock);
/* The hypervisor will put information about time periodically here */
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock);
+static struct pvclock_vsyscall_time_info *hv_clock;
static struct pvclock_wall_clock wall_clock;
/*
@@ -52,15 +53,20 @@ static unsigned long kvm_get_wallclock(void)
struct pvclock_vcpu_time_info *vcpu_time;
struct timespec ts;
int low, high;
+ int cpu;
low = (int)__pa_symbol(&wall_clock);
high = ((u64)__pa_symbol(&wall_clock) >> 32);
native_write_msr(msr_kvm_wall_clock, low, high);
- vcpu_time = &get_cpu_var(hv_clock);
+ preempt_disable();
+ cpu = smp_processor_id();
+
+ vcpu_time = &hv_clock[cpu].pvti;
pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
- put_cpu_var(hv_clock);
+
+ preempt_enable();
return ts.tv_sec;
}
@@ -74,9 +80,11 @@ static cycle_t kvm_clock_read(void)
{
struct pvclock_vcpu_time_info *src;
cycle_t ret;
+ int cpu;
preempt_disable_notrace();
- src = &__get_cpu_var(hv_clock);
+ cpu = smp_processor_id();
+ src = &hv_clock[cpu].pvti;
ret = pvclock_clocksource_read(src);
preempt_enable_notrace();
return ret;
@@ -99,8 +107,15 @@ static cycle_t kvm_clock_get_cycles(struct clocksource *cs)
static unsigned long kvm_get_tsc_khz(void)
{
struct pvclock_vcpu_time_info *src;
- src = &per_cpu(hv_clock, 0);
- return pvclock_tsc_khz(src);
+ int cpu;
+ unsigned long tsc_khz;
+
+ preempt_disable();
+ cpu = smp_processor_id();
+ src = &hv_clock[cpu].pvti;
+ tsc_khz = pvclock_tsc_khz(src);
+ preempt_enable();
+ return tsc_khz;
}
static void kvm_get_preset_lpj(void)
@@ -119,10 +134,14 @@ bool kvm_check_and_clear_guest_paused(void)
{
bool ret = false;
struct pvclock_vcpu_time_info *src;
+ int cpu = smp_processor_id();
- src = &__get_cpu_var(hv_clock);
+ if (!hv_clock)
+ return ret;
+
+ src = &hv_clock[cpu].pvti;
if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) {
- __this_cpu_and(hv_clock.flags, ~PVCLOCK_GUEST_STOPPED);
+ src->flags &= ~PVCLOCK_GUEST_STOPPED;
ret = true;
}
@@ -141,9 +160,10 @@ int kvm_register_clock(char *txt)
{
int cpu = smp_processor_id();
int low, high, ret;
+ struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti;
- low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
- high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
+ low = (int)__pa(src) | 1;
+ high = ((u64)__pa(src) >> 32);
ret = native_write_msr_safe(msr_kvm_system_time, low, high);
printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
cpu, high, low, txt);
@@ -197,6 +217,8 @@ static void kvm_shutdown(void)
void __init kvmclock_init(void)
{
+ unsigned long mem;
+
if (!kvm_para_available())
return;
@@ -209,8 +231,18 @@ void __init kvmclock_init(void)
printk(KERN_INFO "kvm-clock: Using msrs %x and %x",
msr_kvm_system_time, msr_kvm_wall_clock);
- if (kvm_register_clock("boot clock"))
+ mem = memblock_alloc(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS,
+ PAGE_SIZE);
+ if (!mem)
+ return;
+ hv_clock = __va(mem);
+
+ if (kvm_register_clock("boot clock")) {
+ hv_clock = NULL;
+ memblock_free(mem,
+ sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
return;
+ }
pv_time_ops.sched_clock = kvm_clock_read;
x86_platform.calibrate_tsc = kvm_get_tsc_khz;
x86_platform.get_wallclock = kvm_get_wallclock;
@@ -233,3 +265,37 @@ void __init kvmclock_init(void)
if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
}
+
+int __init kvm_setup_vsyscall_timeinfo(void)
+{
+#ifdef CONFIG_X86_64
+ int cpu;
+ int ret;
+ u8 flags;
+ struct pvclock_vcpu_time_info *vcpu_time;
+ unsigned int size;
+
+ size = sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS;
+
+ preempt_disable();
+ cpu = smp_processor_id();
+
+ vcpu_time = &hv_clock[cpu].pvti;
+ flags = pvclock_read_flags(vcpu_time);
+
+ if (!(flags & PVCLOCK_TSC_STABLE_BIT)) {
+ preempt_enable();
+ return 1;
+ }
+
+ if ((ret = pvclock_init_vsyscall(hv_clock, size))) {
+ preempt_enable();
+ return ret;
+ }
+
+ preempt_enable();
+
+ kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
+#endif
+ return 0;
+}
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 42eb3300dfc..85c39590c1a 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -17,23 +17,13 @@
#include <linux/kernel.h>
#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/sched.h>
+#include <linux/gfp.h>
+#include <linux/bootmem.h>
+#include <asm/fixmap.h>
#include <asm/pvclock.h>
-/*
- * These are perodically updated
- * xen: magic shared_info page
- * kvm: gpa registered via msr
- * and then copied here.
- */
-struct pvclock_shadow_time {
- u64 tsc_timestamp; /* TSC at last update of time vals. */
- u64 system_timestamp; /* Time, in nanosecs, since boot. */
- u32 tsc_to_nsec_mul;
- int tsc_shift;
- u32 version;
- u8 flags;
-};
-
static u8 valid_flags __read_mostly = 0;
void pvclock_set_flags(u8 flags)
@@ -41,34 +31,6 @@ void pvclock_set_flags(u8 flags)
valid_flags = flags;
}
-static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
-{
- u64 delta = native_read_tsc() - shadow->tsc_timestamp;
- return pvclock_scale_delta(delta, shadow->tsc_to_nsec_mul,
- shadow->tsc_shift);
-}
-
-/*
- * Reads a consistent set of time-base values from hypervisor,
- * into a shadow data area.
- */
-static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
- struct pvclock_vcpu_time_info *src)
-{
- do {
- dst->version = src->version;
- rmb(); /* fetch version before data */
- dst->tsc_timestamp = src->tsc_timestamp;
- dst->system_timestamp = src->system_time;
- dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
- dst->tsc_shift = src->tsc_shift;
- dst->flags = src->flags;
- rmb(); /* test version after fetching data */
- } while ((src->version & 1) || (dst->version != src->version));
-
- return dst->version;
-}
-
unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src)
{
u64 pv_tsc_khz = 1000000ULL << 32;
@@ -88,23 +50,32 @@ void pvclock_resume(void)
atomic64_set(&last_value, 0);
}
+u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src)
+{
+ unsigned version;
+ cycle_t ret;
+ u8 flags;
+
+ do {
+ version = __pvclock_read_cycles(src, &ret, &flags);
+ } while ((src->version & 1) || version != src->version);
+
+ return flags & valid_flags;
+}
+
cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
{
- struct pvclock_shadow_time shadow;
unsigned version;
- cycle_t ret, offset;
+ cycle_t ret;
u64 last;
+ u8 flags;
do {
- version = pvclock_get_time_values(&shadow, src);
- barrier();
- offset = pvclock_get_nsec_offset(&shadow);
- ret = shadow.system_timestamp + offset;
- barrier();
- } while (version != src->version);
+ version = __pvclock_read_cycles(src, &ret, &flags);
+ } while ((src->version & 1) || version != src->version);
if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) &&
- (shadow.flags & PVCLOCK_TSC_STABLE_BIT))
+ (flags & PVCLOCK_TSC_STABLE_BIT))
return ret;
/*
@@ -156,3 +127,71 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
}
+
+static struct pvclock_vsyscall_time_info *pvclock_vdso_info;
+
+static struct pvclock_vsyscall_time_info *
+pvclock_get_vsyscall_user_time_info(int cpu)
+{
+ if (!pvclock_vdso_info) {
+ BUG();
+ return NULL;
+ }
+
+ return &pvclock_vdso_info[cpu];
+}
+
+struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu)
+{
+ return &pvclock_get_vsyscall_user_time_info(cpu)->pvti;
+}
+
+#ifdef CONFIG_X86_64
+static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l,
+ void *v)
+{
+ struct task_migration_notifier *mn = v;
+ struct pvclock_vsyscall_time_info *pvti;
+
+ pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu);
+
+ /* this is NULL when pvclock vsyscall is not initialized */
+ if (unlikely(pvti == NULL))
+ return NOTIFY_DONE;
+
+ pvti->migrate_count++;
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block pvclock_migrate = {
+ .notifier_call = pvclock_task_migrate,
+};
+
+/*
+ * Initialize the generic pvclock vsyscall state. This will allocate
+ * a/some page(s) for the per-vcpu pvclock information, set up a
+ * fixmap mapping for the page(s)
+ */
+
+int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
+ int size)
+{
+ int idx;
+
+ WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
+
+ pvclock_vdso_info = i;
+
+ for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
+ __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
+ __pa_symbol(i) + (idx*PAGE_SIZE),
+ PAGE_KERNEL_VVAR);
+ }
+
+
+ register_task_migration_notifier(&pvclock_migrate);
+
+ return 0;
+}
+#endif
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ca45696f30f..23ddd558fbd 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -143,11 +143,7 @@ int default_check_phys_apicid_present(int phys_apicid)
}
#endif
-#ifndef CONFIG_DEBUG_BOOT_PARAMS
-struct boot_params __initdata boot_params;
-#else
struct boot_params boot_params;
-#endif
/*
* Machine setup..
@@ -956,6 +952,10 @@ void __init setup_arch(char **cmdline_p)
reserve_initrd();
+#if defined(CONFIG_ACPI) && defined(CONFIG_BLK_DEV_INITRD)
+ acpi_initrd_override((void *)initrd_start, initrd_end - initrd_start);
+#endif
+
reserve_crashkernel();
vsmp_init();
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index f84fe00fad4..d4f460f962e 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -103,71 +103,13 @@ void __init tboot_probe(void)
pr_debug("tboot_size: 0x%x\n", tboot->tboot_size);
}
-static pgd_t *tboot_pg_dir;
-static struct mm_struct tboot_mm = {
- .mm_rb = RB_ROOT,
- .pgd = swapper_pg_dir,
- .mm_users = ATOMIC_INIT(2),
- .mm_count = ATOMIC_INIT(1),
- .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem),
- .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
- .mmlist = LIST_HEAD_INIT(init_mm.mmlist),
-};
-
static inline void switch_to_tboot_pt(void)
{
- write_cr3(virt_to_phys(tboot_pg_dir));
-}
-
-static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
- pgprot_t prot)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-
- pgd = pgd_offset(&tboot_mm, vaddr);
- pud = pud_alloc(&tboot_mm, pgd, vaddr);
- if (!pud)
- return -1;
- pmd = pmd_alloc(&tboot_mm, pud, vaddr);
- if (!pmd)
- return -1;
- pte = pte_alloc_map(&tboot_mm, NULL, pmd, vaddr);
- if (!pte)
- return -1;
- set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
- pte_unmap(pte);
- return 0;
-}
-
-static int map_tboot_pages(unsigned long vaddr, unsigned long start_pfn,
- unsigned long nr)
-{
- /* Reuse the original kernel mapping */
- tboot_pg_dir = pgd_alloc(&tboot_mm);
- if (!tboot_pg_dir)
- return -1;
-
- for (; nr > 0; nr--, vaddr += PAGE_SIZE, start_pfn++) {
- if (map_tboot_page(vaddr, start_pfn, PAGE_KERNEL_EXEC))
- return -1;
- }
-
- return 0;
-}
-
-static void tboot_create_trampoline(void)
-{
- u32 map_base, map_size;
-
- /* Create identity map for tboot shutdown code. */
- map_base = PFN_DOWN(tboot->tboot_base);
- map_size = PFN_UP(tboot->tboot_size);
- if (map_tboot_pages(map_base << PAGE_SHIFT, map_base, map_size))
- panic("tboot: Error mapping tboot pages (mfns) @ 0x%x, 0x%x\n",
- map_base, map_size);
+#ifdef CONFIG_X86_32
+ load_cr3(initial_page_table);
+#else
+ write_cr3(real_mode_header->trampoline_pgd);
+#endif
}
#ifdef CONFIG_ACPI_SLEEP
@@ -225,14 +167,6 @@ void tboot_shutdown(u32 shutdown_type)
if (!tboot_enabled())
return;
- /*
- * if we're being called before the 1:1 mapping is set up then just
- * return and let the normal shutdown happen; this should only be
- * due to very early panic()
- */
- if (!tboot_pg_dir)
- return;
-
/* if this is S3 then set regions to MAC */
if (shutdown_type == TB_SHUTDOWN_S3)
if (tboot_setup_sleep())
@@ -343,8 +277,6 @@ static __init int tboot_late_init(void)
if (!tboot_enabled())
return 0;
- tboot_create_trampoline();
-
atomic_set(&ap_wfs_count, 0);
register_hotcpu_notifier(&tboot_cpu_notifier);
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 5c9687b1bde..1dfe69cc78a 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -182,7 +182,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
if (pud_none_or_clear_bad(pud))
goto out;
pmd = pmd_offset(pud, 0xA0000);
- split_huge_page_pmd(mm, pmd);
+ split_huge_page_pmd_mm(mm, 0xA0000, pmd);
if (pmd_none_or_clear_bad(pmd))
goto out;
pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl);