From 0327318445d55808991a63137cfb698a90ab6adf Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 18 Apr 2008 17:49:15 -0700 Subject: x86_64: simplify the memtest parameter setting use CONFIG_MEMTEST only. if it is set, will have memtest=0 (disabled) need to have memtest=4 in command line to test more patterns. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fe361ae7ef2..a1a9021146b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -419,35 +419,19 @@ config PARAVIRT endif -config MEMTEST_BOOTPARAM - bool "Memtest boot parameter" +config MEMTEST + bool "Memtest" depends on X86_64 default y help This option adds a kernel parameter 'memtest', which allows memtest - to be disabled at boot. If this option is selected, memtest - functionality can be disabled with memtest=0 on the kernel - command line. The purpose of this option is to allow a single - kernel image to be distributed with memtest built in, but not - necessarily enabled. - + to be set. + memtest=0, mean disabled; -- default + memtest=1, mean do 1 test pattern; + ... + memtest=4, mean do 4 test patterns. If you are unsure how to answer this question, answer Y. -config MEMTEST_BOOTPARAM_VALUE - int "Memtest boot parameter default value (0-4)" - depends on MEMTEST_BOOTPARAM - range 0 4 - default 0 - help - This option sets the default value for the kernel parameter - 'memtest', which allows memtest to be disabled at boot. If this - option is set to 0 (zero), the memtest kernel parameter will - default to 0, disabling memtest at bootup. If this option is - set to 4, the memtest kernel parameter will default to 4, - enabling memtest at bootup, and use that as pattern number. - - If you are unsure how to answer this question, answer 0. - config ACPI_SRAT def_bool y depends on X86_32 && ACPI && NUMA && (X86_SUMMIT || X86_GENERICARCH) -- cgit v1.2.3-70-g09d2 From c3ed64295f0fed9131b42122234b1ce4a4a266cf Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 16 May 2008 10:44:39 -0700 Subject: x86: change maximum NR_CPUS to 4096 and MAX_NUMNODES to 512 * Change the range of NR_CPUS from 2-255 to 2-4096 and change the range of MAX_NUMNODES (NODES_SHIFT) from 1-32768 to 1-512. * Alter comment about how much each increment of NR_CPUS consumes. (This was found by configuring for 256 cpus and then 512 cpus and dividing the difference by 256.) Signed-off-by: Mike Travis Cc: Andrew Morton Cc: Dave Jones Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fe361ae7ef2..bbafeac8911 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -565,18 +565,18 @@ config IOMMU_HELPER def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB) config NR_CPUS - int "Maximum number of CPUs (2-255)" - range 2 255 + int "Maximum number of CPUs (2-4096)" + range 2 4096 depends on SMP default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000 default "8" help This allows you to specify the maximum number of CPUs which this - kernel will support. The maximum supported value is 255 and the + kernel will support. The maximum supported value is 4096 and the minimum value which makes sense is 2. This is purely to save memory - each supported CPU adds - approximately eight kilobytes to the kernel image. + approximately one kilobyte to the kernel image. config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" @@ -968,8 +968,8 @@ config NUMA_EMU number of nodes. This is only useful for debugging. config NODES_SHIFT - int "Max num nodes shift(1-15)" - range 1 15 if X86_64 + int "Max num nodes shift(1-9)" + range 1 9 if X86_64 default "6" if X86_64 default "4" if X86_NUMAQ default "3" -- cgit v1.2.3-70-g09d2 From c9fea78dc9775981bed2da379568c8a8fddd1fb6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 19 May 2008 13:53:39 +0200 Subject: x86: make NUMAQ depend on PCI NUMAQ depends on the existence of PCI hardware, and requiring PCI makes this subarch simpler and avoids build failures if PCI is disabled. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bbafeac8911..029372813ae 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -270,7 +270,7 @@ config X86_VOYAGER config X86_NUMAQ bool "NUMAQ (IBM/Sequent)" - depends on SMP && X86_32 + depends on SMP && X86_32 && PCI select NUMA help This option is used for getting Linux to run on a (IBM/Sequent) NUMA -- cgit v1.2.3-70-g09d2 From 1ac97018169c5a13feaa90d9671f2d6ba2d9e86e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 19 May 2008 14:10:14 +0200 Subject: x86: untangle pci dependencies make PCI-less subarches not build with PCI - instead of complicating the PCI dependencies. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 029372813ae..78700f5faf6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -258,7 +258,7 @@ config X86_ELAN config X86_VOYAGER bool "Voyager (NCR)" - depends on X86_32 && (SMP || BROKEN) + depends on X86_32 && (SMP || BROKEN) && !PCI help Voyager is an MCA-based 32-way capable SMP architecture proprietary to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. @@ -300,7 +300,7 @@ config X86_BIGSMP config X86_VISWS bool "SGI 320/540 (Visual Workstation)" - depends on X86_32 + depends on X86_32 && !PCI help The SGI Visual Workstation series is an IA32-based workstation based on SGI systems chips with some legacy PC hardware attached. @@ -344,7 +344,7 @@ config X86_RDC321X config X86_VSMP bool "Support for ScaleMP vSMP" select PARAVIRT - depends on X86_64 + depends on X86_64 && !PCI help Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is supposed to run on these EM64T-based machines. Only choose this option @@ -1477,8 +1477,7 @@ endmenu menu "Bus options (PCI etc.)" config PCI - bool "PCI support" if !X86_VISWS && !X86_VSMP - depends on !X86_VOYAGER + bool "PCI support" default y select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC) help -- cgit v1.2.3-70-g09d2 From 73531905ed53576d9e8707659a761e7046a60497 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sun, 25 May 2008 23:03:18 +0200 Subject: Kconfig: introduce ARCH_DEFCONFIG to DEFCONFIG_LIST init/Kconfig contains a list of configs that are searched for if 'make *config' are used with no .config present. Extend this list to look at the config identified by ARCH_DEFCONFIG. With this change we now try the defconfig targets last. This fixes a regression reported by: Linus Torvalds Signed-off-by: Sam Ravnborg Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" --- arch/x86/Kconfig | 13 +++---------- init/Kconfig | 1 + 2 files changed, 4 insertions(+), 10 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fe361ae7ef2..dcbec34154c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -26,17 +26,10 @@ config X86 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) select HAVE_ARCH_KGDB if !X86_VOYAGER -config DEFCONFIG_LIST +config ARCH_DEFCONFIG string - depends on X86_32 - option defconfig_list - default "arch/x86/configs/i386_defconfig" - -config DEFCONFIG_LIST - string - depends on X86_64 - option defconfig_list - default "arch/x86/configs/x86_64_defconfig" + default "arch/x86/configs/i386_defconfig" if X86_32 + default "arch/x86/configs/x86_64_defconfig" if X86_64 config GENERIC_LOCKBREAK diff --git a/init/Kconfig b/init/Kconfig index 6135d07f31e..6199d112090 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -13,6 +13,7 @@ config DEFCONFIG_LIST default "/lib/modules/$UNAME_RELEASE/.config" default "/etc/kernel-config" default "/boot/config-$UNAME_RELEASE" + default "$ARCH_DEFCONFIG" default "arch/$ARCH/defconfig" menu "General setup" -- cgit v1.2.3-70-g09d2 From 2bdd1b031b200d55c2512c8d7e0e9bdcf85d011f Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Thu, 5 Jun 2008 14:14:41 -0700 Subject: PCI/x86: fix up PCI stuff so that PCI_GOANY supports OLPC Previously, one would have to specifically choose CONFIG_OLPC and CONFIG_PCI_GOOLPC in order to enable PCI_OLPC. That doesn't really work for distro kernels, so this patch allows one to choose CONFIG_OLPC and CONFIG_PCI_GOANY in order to build in OLPC support in a generic kernel (as requested by Robert Millan). This also moves GOOLPC before GOANY in the menuconfig list. Finally, make pci_access_init return early if we detect OLPC hardware. There's no need to continue probing stuff, and pci_pcbios_init specifically trashes our settings (we didn't run into that before because PCI_GOANY wasn't supported). Signed-off-by: Andres Salomon Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Jesse Barnes --- arch/x86/Kconfig | 11 +++++------ arch/x86/pci/init.c | 3 ++- arch/x86/pci/olpc.c | 5 +++-- arch/x86/pci/pci.h | 2 +- 4 files changed, 11 insertions(+), 10 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index dcbec34154c..52e18e6d2ba 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1508,13 +1508,13 @@ config PCI_GOMMCONFIG config PCI_GODIRECT bool "Direct" -config PCI_GOANY - bool "Any" - config PCI_GOOLPC bool "OLPC" depends on OLPC +config PCI_GOANY + bool "Any" + endchoice config PCI_BIOS @@ -1531,9 +1531,8 @@ config PCI_MMCONFIG depends on X86_32 && PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY) config PCI_OLPC - bool - depends on PCI && PCI_GOOLPC - default y + def_bool y + depends on PCI && OLPC && (PCI_GOOLPC || PCI_GOANY) config PCI_DOMAINS def_bool y diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c index e70b9c57b88..b821f4462d9 100644 --- a/arch/x86/pci/init.c +++ b/arch/x86/pci/init.c @@ -15,7 +15,8 @@ static __init int pci_access_init(void) pci_mmcfg_early_init(); #ifdef CONFIG_PCI_OLPC - pci_olpc_init(); + if (!pci_olpc_init()) + return 0; /* skip additional checks if it's an XO */ #endif #ifdef CONFIG_PCI_BIOS pci_pcbios_init(); diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c index 5e7636558c0..e11e9e803d5 100644 --- a/arch/x86/pci/olpc.c +++ b/arch/x86/pci/olpc.c @@ -302,12 +302,13 @@ static struct pci_raw_ops pci_olpc_conf = { .write = pci_olpc_write, }; -void __init pci_olpc_init(void) +int __init pci_olpc_init(void) { if (!machine_is_olpc() || olpc_has_vsa()) - return; + return -ENODEV; printk(KERN_INFO "PCI: Using configuration type OLPC\n"); raw_pci_ops = &pci_olpc_conf; is_lx = is_geode_lx(); + return 0; } diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h index f3972b12c60..720c4c55453 100644 --- a/arch/x86/pci/pci.h +++ b/arch/x86/pci/pci.h @@ -101,7 +101,7 @@ extern struct pci_raw_ops pci_direct_conf1; extern int pci_direct_probe(void); extern void pci_direct_init(int type); extern void pci_pcbios_init(void); -extern void pci_olpc_init(void); +extern int pci_olpc_init(void); /* pci-mmconfig.c */ -- cgit v1.2.3-70-g09d2 From 7af192c954017499ec163bc9dbaaee2e593d7ef2 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Tue, 3 Jun 2008 16:17:29 +0200 Subject: x86: Add structs and functions for paravirt clocksource This patch adds structs for the paravirt clocksource ABI used by both xen and kvm (pvclock-abi.h). It also adds some helper functions to read system time and wall clock time from a paravirtual clocksource (pvclock.[ch]). They are based on the xen code. They are enabled using CONFIG_PARAVIRT_CLOCK. Subsequent patches of this series will put the code in use. Signed-off-by: Gerd Hoffmann Acked-by: Jeremy Fitzhardinge Signed-off-by: Avi Kivity --- arch/x86/Kconfig | 4 ++ arch/x86/kernel/Makefile | 1 + arch/x86/kernel/pvclock.c | 141 ++++++++++++++++++++++++++++++++++++++++++ include/asm-x86/pvclock-abi.h | 42 +++++++++++++ include/asm-x86/pvclock.h | 13 ++++ 5 files changed, 201 insertions(+) create mode 100644 arch/x86/kernel/pvclock.c create mode 100644 include/asm-x86/pvclock-abi.h create mode 100644 include/asm-x86/pvclock.h (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 52e18e6d2ba..f94bca6ff47 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -410,6 +410,10 @@ config PARAVIRT over full virtualization. However, when run without a hypervisor the kernel is theoretically slower and slightly larger. +config PARAVIRT_CLOCK + bool + default n + endif config MEMTEST_BOOTPARAM diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5e618c3b472..77807d4769c 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -82,6 +82,7 @@ obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o obj-$(CONFIG_KVM_GUEST) += kvm.o obj-$(CONFIG_KVM_CLOCK) += kvmclock.o obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o +obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c new file mode 100644 index 00000000000..05fbe9a0325 --- /dev/null +++ b/arch/x86/kernel/pvclock.c @@ -0,0 +1,141 @@ +/* paravirtual clock -- common code used by kvm/xen + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include +#include +#include + +/* + * These are perodically updated + * xen: magic shared_info page + * kvm: gpa registered via msr + * and then copied here. + */ +struct pvclock_shadow_time { + u64 tsc_timestamp; /* TSC at last update of time vals. */ + u64 system_timestamp; /* Time, in nanosecs, since boot. */ + u32 tsc_to_nsec_mul; + int tsc_shift; + u32 version; +}; + +/* + * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, + * yielding a 64-bit result. + */ +static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) +{ + u64 product; +#ifdef __i386__ + u32 tmp1, tmp2; +#endif + + if (shift < 0) + delta >>= -shift; + else + delta <<= shift; + +#ifdef __i386__ + __asm__ ( + "mul %5 ; " + "mov %4,%%eax ; " + "mov %%edx,%4 ; " + "mul %5 ; " + "xor %5,%5 ; " + "add %4,%%eax ; " + "adc %5,%%edx ; " + : "=A" (product), "=r" (tmp1), "=r" (tmp2) + : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); +#elif __x86_64__ + __asm__ ( + "mul %%rdx ; shrd $32,%%rdx,%%rax" + : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); +#else +#error implement me! +#endif + + return product; +} + +static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow) +{ + u64 delta = native_read_tsc() - shadow->tsc_timestamp; + return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); +} + +/* + * Reads a consistent set of time-base values from hypervisor, + * into a shadow data area. + */ +static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst, + struct pvclock_vcpu_time_info *src) +{ + do { + dst->version = src->version; + rmb(); /* fetch version before data */ + dst->tsc_timestamp = src->tsc_timestamp; + dst->system_timestamp = src->system_time; + dst->tsc_to_nsec_mul = src->tsc_to_system_mul; + dst->tsc_shift = src->tsc_shift; + rmb(); /* test version after fetching data */ + } while ((src->version & 1) || (dst->version != src->version)); + + return dst->version; +} + +cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) +{ + struct pvclock_shadow_time shadow; + unsigned version; + cycle_t ret, offset; + + do { + version = pvclock_get_time_values(&shadow, src); + barrier(); + offset = pvclock_get_nsec_offset(&shadow); + ret = shadow.system_timestamp + offset; + barrier(); + } while (version != src->version); + + return ret; +} + +void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, + struct pvclock_vcpu_time_info *vcpu_time, + struct timespec *ts) +{ + u32 version; + u64 delta; + struct timespec now; + + /* get wallclock at system boot */ + do { + version = wall_clock->version; + rmb(); /* fetch version before time */ + now.tv_sec = wall_clock->sec; + now.tv_nsec = wall_clock->nsec; + rmb(); /* fetch time before checking version */ + } while ((wall_clock->version & 1) || (version != wall_clock->version)); + + delta = pvclock_clocksource_read(vcpu_time); /* time since system boot */ + delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec; + + now.tv_nsec = do_div(delta, NSEC_PER_SEC); + now.tv_sec = delta; + + set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); +} diff --git a/include/asm-x86/pvclock-abi.h b/include/asm-x86/pvclock-abi.h new file mode 100644 index 00000000000..6857f840b24 --- /dev/null +++ b/include/asm-x86/pvclock-abi.h @@ -0,0 +1,42 @@ +#ifndef _ASM_X86_PVCLOCK_ABI_H_ +#define _ASM_X86_PVCLOCK_ABI_H_ +#ifndef __ASSEMBLY__ + +/* + * These structs MUST NOT be changed. + * They are the ABI between hypervisor and guest OS. + * Both Xen and KVM are using this. + * + * pvclock_vcpu_time_info holds the system time and the tsc timestamp + * of the last update. So the guest can use the tsc delta to get a + * more precise system time. There is one per virtual cpu. + * + * pvclock_wall_clock references the point in time when the system + * time was zero (usually boot time), thus the guest calculates the + * current wall clock by adding the system time. + * + * Protocol for the "version" fields is: hypervisor raises it (making + * it uneven) before it starts updating the fields and raises it again + * (making it even) when it is done. Thus the guest can make sure the + * time values it got are consistent by checking the version before + * and after reading them. + */ + +struct pvclock_vcpu_time_info { + u32 version; + u32 pad0; + u64 tsc_timestamp; + u64 system_time; + u32 tsc_to_system_mul; + s8 tsc_shift; + u8 pad[3]; +} __attribute__((__packed__)); /* 32 bytes */ + +struct pvclock_wall_clock { + u32 version; + u32 sec; + u32 nsec; +} __attribute__((__packed__)); + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_X86_PVCLOCK_ABI_H_ */ diff --git a/include/asm-x86/pvclock.h b/include/asm-x86/pvclock.h new file mode 100644 index 00000000000..85b1bba8e0a --- /dev/null +++ b/include/asm-x86/pvclock.h @@ -0,0 +1,13 @@ +#ifndef _ASM_X86_PVCLOCK_H_ +#define _ASM_X86_PVCLOCK_H_ + +#include +#include + +/* some helper functions for xen and kvm pv clock sources */ +cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); +void pvclock_read_wallclock(struct pvclock_wall_clock *wall, + struct pvclock_vcpu_time_info *vcpu, + struct timespec *ts); + +#endif /* _ASM_X86_PVCLOCK_H_ */ -- cgit v1.2.3-70-g09d2 From f6e16d5ad463d15f285666f588cfe49495c692d9 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Tue, 3 Jun 2008 16:17:32 +0200 Subject: x86: KVM guest: Use the paravirt clocksource structs and functions This patch updates the kvm host code to use the pvclock structs and functions, thereby making it compatible with Xen. The patch also fixes an initialization bug: on SMP systems the per-cpu has two different locations early at boot and after CPU bringup. kvmclock must take that in account when registering the physical address within the host. Signed-off-by: Gerd Hoffmann Signed-off-by: Avi Kivity --- arch/x86/Kconfig | 1 + arch/x86/kernel/kvmclock.c | 89 +++++++++++++++++----------------------------- 2 files changed, 34 insertions(+), 56 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f94bca6ff47..e0edaaa6920 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -383,6 +383,7 @@ config VMI config KVM_CLOCK bool "KVM paravirtualized clock" select PARAVIRT + select PARAVIRT_CLOCK depends on !(X86_VISWS || X86_VOYAGER) help Turning on this option will allow you to run a paravirtualized clock diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 08a30986d47..87edf1ceb1d 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -36,18 +37,9 @@ static int parse_no_kvmclock(char *arg) early_param("no-kvmclock", parse_no_kvmclock); /* The hypervisor will put information about time periodically here */ -static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock); -#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field +static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock); +static struct pvclock_wall_clock wall_clock; -static inline u64 kvm_get_delta(u64 last_tsc) -{ - int cpu = smp_processor_id(); - u64 delta = native_read_tsc() - last_tsc; - return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE; -} - -static struct kvm_wall_clock wall_clock; -static cycle_t kvm_clock_read(void); /* * The wallclock is the time of day when we booted. Since then, some time may * have elapsed since the hypervisor wrote the data. So we try to account for @@ -55,64 +47,37 @@ static cycle_t kvm_clock_read(void); */ static unsigned long kvm_get_wallclock(void) { - u32 wc_sec, wc_nsec; - u64 delta; + struct pvclock_vcpu_time_info *vcpu_time; struct timespec ts; - int version, nsec; int low, high; low = (int)__pa(&wall_clock); high = ((u64)__pa(&wall_clock) >> 32); + native_write_msr(MSR_KVM_WALL_CLOCK, low, high); - delta = kvm_clock_read(); + vcpu_time = &get_cpu_var(hv_clock); + pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); + put_cpu_var(hv_clock); - native_write_msr(MSR_KVM_WALL_CLOCK, low, high); - do { - version = wall_clock.wc_version; - rmb(); - wc_sec = wall_clock.wc_sec; - wc_nsec = wall_clock.wc_nsec; - rmb(); - } while ((wall_clock.wc_version != version) || (version & 1)); - - delta = kvm_clock_read() - delta; - delta += wc_nsec; - nsec = do_div(delta, NSEC_PER_SEC); - set_normalized_timespec(&ts, wc_sec + delta, nsec); - /* - * Of all mechanisms of time adjustment I've tested, this one - * was the champion! - */ - return ts.tv_sec + 1; + return ts.tv_sec; } static int kvm_set_wallclock(unsigned long now) { - return 0; + return -1; } -/* - * This is our read_clock function. The host puts an tsc timestamp each time - * it updates a new time. Without the tsc adjustment, we can have a situation - * in which a vcpu starts to run earlier (smaller system_time), but probes - * time later (compared to another vcpu), leading to backwards time - */ static cycle_t kvm_clock_read(void) { - u64 last_tsc, now; - int cpu; + struct pvclock_vcpu_time_info *src; + cycle_t ret; - preempt_disable(); - cpu = smp_processor_id(); - - last_tsc = get_clock(cpu, tsc_timestamp); - now = get_clock(cpu, system_time); - - now += kvm_get_delta(last_tsc); - preempt_enable(); - - return now; + src = &get_cpu_var(hv_clock); + ret = pvclock_clocksource_read(src); + put_cpu_var(hv_clock); + return ret; } + static struct clocksource kvm_clock = { .name = "kvm-clock", .read = kvm_clock_read, @@ -123,13 +88,14 @@ static struct clocksource kvm_clock = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -static int kvm_register_clock(void) +static int kvm_register_clock(char *txt) { int cpu = smp_processor_id(); int low, high; low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1; high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); - + printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", + cpu, high, low, txt); return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); } @@ -140,12 +106,20 @@ static void kvm_setup_secondary_clock(void) * Now that the first cpu already had this clocksource initialized, * we shouldn't fail. */ - WARN_ON(kvm_register_clock()); + WARN_ON(kvm_register_clock("secondary cpu clock")); /* ok, done with our trickery, call native */ setup_secondary_APIC_clock(); } #endif +#ifdef CONFIG_SMP +void __init kvm_smp_prepare_boot_cpu(void) +{ + WARN_ON(kvm_register_clock("primary cpu clock")); + native_smp_prepare_boot_cpu(); +} +#endif + /* * After the clock is registered, the host will keep writing to the * registered memory location. If the guest happens to shutdown, this memory @@ -174,13 +148,16 @@ void __init kvmclock_init(void) return; if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { - if (kvm_register_clock()) + if (kvm_register_clock("boot clock")) return; pv_time_ops.get_wallclock = kvm_get_wallclock; pv_time_ops.set_wallclock = kvm_set_wallclock; pv_time_ops.sched_clock = kvm_clock_read; #ifdef CONFIG_X86_LOCAL_APIC pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; +#endif +#ifdef CONFIG_SMP + smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; #endif machine_ops.shutdown = kvm_shutdown; #ifdef CONFIG_KEXEC -- cgit v1.2.3-70-g09d2 From efac41894df57d32b483ac622d03541b5b2692c0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 1 Jul 2008 08:56:32 +0200 Subject: x86: fix NODES_SHIFT Kconfig range commit 4323838215184f5a2f081e0d17b8d60731b03164 x86: change size of node ids from u8 to s16 set the range for NODES_SHIFT to 1..15. The possible range is 1..9 Fixes Bugzilla #10726 Reported-by: Dave Jones Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/Kconfig') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 52e18e6d2ba..8a07f417f5f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -961,8 +961,8 @@ config NUMA_EMU number of nodes. This is only useful for debugging. config NODES_SHIFT - int "Max num nodes shift(1-15)" - range 1 15 if X86_64 + int "Max num nodes shift(1-9)" + range 1 9 if X86_64 default "6" if X86_64 default "4" if X86_NUMAQ default "3" -- cgit v1.2.3-70-g09d2