From 6d612b0f943289856c6e8186c564cda922cd040e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 12 Jan 2009 12:52:23 +0100 Subject: locking, hpet: annotate false positive warning Alexander Beregalov reported that this warning is caused by the HPET code: > hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0 > hpet0: 3 comparators, 64-bit 14.318180 MHz counter > ODEBUG: object is on stack, but not annotated > ------------[ cut here ]------------ > WARNING: at lib/debugobjects.c:251 __debug_object_init+0x2a4/0x352() > Bisected down to 26afe5f2fbf06ea0765aaa316640c4dd472310c0 > (x86: HPET_MSI Initialise per-cpu HPET timers) The commit is fine - but the on-stack workqueue entry needs annotation. Reported-and-bisected-by: Alexander Beregalov Signed-off-by: Peter Zijlstra Tested-by: Alexander Beregalov Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index cd759ad9069..bb2e0f0975a 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -628,7 +628,7 @@ static int hpet_cpuhp_notify(struct notifier_block *n, switch (action & 0xf) { case CPU_ONLINE: - INIT_DELAYED_WORK(&work.work, hpet_work); + INIT_DELAYED_WORK_ON_STACK(&work.work, hpet_work); init_completion(&work.complete); /* FIXME: add schedule_work_on() */ schedule_delayed_work_on(cpu, &work.work, 0); -- cgit v1.2.3-70-g09d2 From 42bb8cc5e81028e217105299001070d57eb84ad7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 9 Jan 2009 12:17:40 -0800 Subject: x86: hpet: allow force enable on ICH10 HPET Intel "Smackover" x58 BIOS don't have HPET enabled in the BIOS, so allow to force enable it at least. The register layout is the same as in other recent ICHs, so all the code can be reused. Using numerical PCI-ID because it's unlikely the PCI-ID will be used anywhere else. Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/quirks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 309949e9e1c..697d1b78cfb 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -172,7 +172,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_4, ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7, ich_force_enable_hpet); - +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x3a16, /* ICH10 */ + ich_force_enable_hpet); static struct pci_dev *cached_dev; -- cgit v1.2.3-70-g09d2 From 4a922a969cb0190ce4580d4b064e2ac35f3ac9bf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 13 Jan 2009 16:11:00 +0100 Subject: x86, cpufreq: remove leftover copymask_copy() Impact: fix potential boot crash on MAXSMP Remove code left over by: 50c668d: Revert "cpumask: use work_on_cpu in acpi-cpufreq.c for drv_read That cmd.cpumask is not allocated anymore. No impact on default !MAXSMP kernels. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 6f11e029e8c..8f3c95c7e61 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -235,8 +235,6 @@ static u32 get_cur_val(const struct cpumask *mask) return 0; } - cpumask_copy(cmd.mask, mask); - drv_read(&cmd); dprintk("get_cur_val = %u\n", cmd.val); -- cgit v1.2.3-70-g09d2 From c8399943bdb70fef78798b97f975506ecc99e039 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 12 Jan 2009 23:01:15 +0100 Subject: x86, generic: mark complex bitops.h inlines as __always_inline Impact: reduce kernel image size Hugh Dickins noticed that older gcc versions when the kernel is built for code size didn't inline some of the bitops. Mark all complex x86 bitops that have more than a single asm statement or two as always inline to avoid this problem. Probably should be done for other architectures too. Ingo then found a better fix that only requires a single line change, but it unfortunately only works on gcc 4.3. On older gccs the original patch still makes a ~0.3% defconfig difference with CONFIG_OPTIMIZE_INLINING=y. With gcc 4.1 and a defconfig like build: 6116998 1138540 883788 8139326 7c323e vmlinux-oi-with-patch 6137043 1138540 883788 8159371 7c808b vmlinux-optimize-inlining ~20k / 0.3% difference. Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bitops.h | 14 ++++++++++---- include/asm-generic/bitops/__ffs.h | 2 +- include/asm-generic/bitops/__fls.h | 2 +- include/asm-generic/bitops/fls.h | 2 +- include/asm-generic/bitops/fls64.h | 4 ++-- 5 files changed, 15 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index e02a359d2aa..02b47a603fc 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -3,6 +3,9 @@ /* * Copyright 1992, Linus Torvalds. + * + * Note: inlines with more than a single statement should be marked + * __always_inline to avoid problems with older gcc's inlining heuristics. */ #ifndef _LINUX_BITOPS_H @@ -53,7 +56,8 @@ * Note that @nr may be almost arbitrarily large; this function is not * restricted to acting on a single-word quantity. */ -static inline void set_bit(unsigned int nr, volatile unsigned long *addr) +static __always_inline void +set_bit(unsigned int nr, volatile unsigned long *addr) { if (IS_IMMEDIATE(nr)) { asm volatile(LOCK_PREFIX "orb %1,%0" @@ -90,7 +94,8 @@ static inline void __set_bit(int nr, volatile unsigned long *addr) * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() * in order to ensure changes are visible on other processors. */ -static inline void clear_bit(int nr, volatile unsigned long *addr) +static __always_inline void +clear_bit(int nr, volatile unsigned long *addr) { if (IS_IMMEDIATE(nr)) { asm volatile(LOCK_PREFIX "andb %1,%0" @@ -204,7 +209,8 @@ static inline int test_and_set_bit(int nr, volatile unsigned long *addr) * * This is the same as test_and_set_bit on x86. */ -static inline int test_and_set_bit_lock(int nr, volatile unsigned long *addr) +static __always_inline int +test_and_set_bit_lock(int nr, volatile unsigned long *addr) { return test_and_set_bit(nr, addr); } @@ -300,7 +306,7 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr) return oldbit; } -static inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr) +static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr) { return ((1UL << (nr % BITS_PER_LONG)) & (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; diff --git a/include/asm-generic/bitops/__ffs.h b/include/asm-generic/bitops/__ffs.h index 9a3274aecf8..937d7c43557 100644 --- a/include/asm-generic/bitops/__ffs.h +++ b/include/asm-generic/bitops/__ffs.h @@ -9,7 +9,7 @@ * * Undefined if no bit exists, so code should check against 0 first. */ -static inline unsigned long __ffs(unsigned long word) +static __always_inline unsigned long __ffs(unsigned long word) { int num = 0; diff --git a/include/asm-generic/bitops/__fls.h b/include/asm-generic/bitops/__fls.h index be24465403d..a60a7ccb678 100644 --- a/include/asm-generic/bitops/__fls.h +++ b/include/asm-generic/bitops/__fls.h @@ -9,7 +9,7 @@ * * Undefined if no set bit exists, so code should check against 0 first. */ -static inline unsigned long __fls(unsigned long word) +static __always_inline unsigned long __fls(unsigned long word) { int num = BITS_PER_LONG - 1; diff --git a/include/asm-generic/bitops/fls.h b/include/asm-generic/bitops/fls.h index 850859bc506..0576d1f42f4 100644 --- a/include/asm-generic/bitops/fls.h +++ b/include/asm-generic/bitops/fls.h @@ -9,7 +9,7 @@ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. */ -static inline int fls(int x) +static __always_inline int fls(int x) { int r = 32; diff --git a/include/asm-generic/bitops/fls64.h b/include/asm-generic/bitops/fls64.h index 86d403f8b25..b097cf8444e 100644 --- a/include/asm-generic/bitops/fls64.h +++ b/include/asm-generic/bitops/fls64.h @@ -15,7 +15,7 @@ * at position 64. */ #if BITS_PER_LONG == 32 -static inline int fls64(__u64 x) +static __always_inline int fls64(__u64 x) { __u32 h = x >> 32; if (h) @@ -23,7 +23,7 @@ static inline int fls64(__u64 x) return fls(x); } #elif BITS_PER_LONG == 64 -static inline int fls64(__u64 x) +static __always_inline int fls64(__u64 x) { if (x == 0) return 0; -- cgit v1.2.3-70-g09d2 From 5cca0cf15a94417f49625ce52e23589eed0a1675 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 9 Jan 2009 14:35:20 -0800 Subject: x86, pat: fix reserve_memtype() for legacy 1MB range Thierry Vignaud reported: > http://bugzilla.kernel.org/show_bug.cgi?id=12372 > > On P4 with an SiS motherboard (video card is a SiS 651) > X server fails to start with error: > xf86MapVidMem: Could not mmap framebuffer (0x00000000,0x2000) (Invalid > argument) Here X is trying to map first 8KB of memory using /dev/mem. Existing code treats first 0-4KB of memory as non-RAM and 4KB-8KB as RAM. Recent code changes don't allow to map memory with different attributes at the same time. Fix this by treating the first 1MB legacy region as special and always track the attribute requests with in this region using linear linked list (and don't bother if the range is RAM or non-RAM or mixed) Reported-and-tested-by: Thierry Vignaud Signed-off-by: Suresh Siddha Signed-off-by: Venkatesh Pallipadi Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 85cbd3cd372..070ee4a3b22 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -333,11 +333,20 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, req_type & _PAGE_CACHE_MASK); } - is_range_ram = pagerange_is_ram(start, end); - if (is_range_ram == 1) - return reserve_ram_pages_type(start, end, req_type, new_type); - else if (is_range_ram < 0) - return -EINVAL; + /* + * For legacy reasons, some parts of the physical address range in the + * legacy 1MB region is treated as non-RAM (even when listed as RAM in + * the e820 tables). So we will track the memory attributes of this + * legacy 1MB region using the linear memtype_list always. + */ + if (end >= ISA_END_ADDRESS) { + is_range_ram = pagerange_is_ram(start, end); + if (is_range_ram == 1) + return reserve_ram_pages_type(start, end, req_type, + new_type); + else if (is_range_ram < 0) + return -EINVAL; + } new = kmalloc(sizeof(struct memtype), GFP_KERNEL); if (!new) @@ -437,11 +446,19 @@ int free_memtype(u64 start, u64 end) if (is_ISA_range(start, end - 1)) return 0; - is_range_ram = pagerange_is_ram(start, end); - if (is_range_ram == 1) - return free_ram_pages_type(start, end); - else if (is_range_ram < 0) - return -EINVAL; + /* + * For legacy reasons, some parts of the physical address range in the + * legacy 1MB region is treated as non-RAM (even when listed as RAM in + * the e820 tables). So we will track the memory attributes of this + * legacy 1MB region using the linear memtype_list always. + */ + if (end >= ISA_END_ADDRESS) { + is_range_ram = pagerange_is_ram(start, end); + if (is_range_ram == 1) + return free_ram_pages_type(start, end); + else if (is_range_ram < 0) + return -EINVAL; + } spin_lock(&memtype_lock); list_for_each_entry(entry, &memtype_list, nd) { -- cgit v1.2.3-70-g09d2 From 2ea038917bbdd51a7ae4a898c6a04641324dd033 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Wed, 14 Jan 2009 21:38:20 +0100 Subject: Revert "kbuild: strip generated symbols from *.ko" This reverts commit ad7a953c522ceb496611d127e51e278bfe0ff483. And commit: ("allow stripping of generated symbols under CONFIG_KALLSYMS_ALL") 9bb482476c6c9d1ae033306440c51ceac93ea80c These stripping patches has caused a set of issues: 1) People have reported compatibility issues with binutils due to lack of support for `--strip-unneeded-symbols' with objcopy 2.15.92.0.2 Reported by: Wenji 2) ccache and distcc no longer works as expeced Reported by: Ted, Roland, + others 3) The installed modules increased a lot in size Reported by: Ted, Davej + others Reported-by: Wenji Huang Reported-by: "Theodore Ts'o" Reported-by: Dave Jones Reported-by: Roland McGrath Signed-off-by: Sam Ravnborg --- Makefile | 59 ++++------- arch/x86/scripts/strip-symbols | 1 - init/Kconfig | 7 -- kernel/kallsyms.c | 16 +-- scripts/Makefile.build | 55 ++++------- scripts/Makefile.modinst | 3 +- scripts/genksyms/genksyms.c | 21 ++-- scripts/genksyms/keywords.c_shipped | 189 ++++++++++++++++++------------------ scripts/genksyms/keywords.gperf | 2 - scripts/kallsyms.c | 21 ++-- scripts/mksysmap | 7 +- scripts/strip-symbols | 22 ----- 12 files changed, 166 insertions(+), 237 deletions(-) delete mode 100644 arch/x86/scripts/strip-symbols delete mode 100644 scripts/strip-symbols (limited to 'arch/x86') diff --git a/Makefile b/Makefile index c06e250eca1..c2c4bbeef59 100644 --- a/Makefile +++ b/Makefile @@ -606,25 +606,20 @@ export INSTALL_PATH ?= /boot MODLIB = $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE) export MODLIB -strip-symbols := $(srctree)/scripts/strip-symbols \ - $(wildcard $(srctree)/arch/$(ARCH)/scripts/strip-symbols) - # -# INSTALL_MOD_STRIP, if defined, will cause modules to be stripped while -# they get installed. If INSTALL_MOD_STRIP is '1', then the default -# options (see below) will be used. Otherwise, INSTALL_MOD_STRIP will -# be used as the option(s) to the objcopy command. +# INSTALL_MOD_STRIP, if defined, will cause modules to be +# stripped after they are installed. If INSTALL_MOD_STRIP is '1', then +# the default option --strip-debug will be used. Otherwise, +# INSTALL_MOD_STRIP will used as the options to the strip command. + ifdef INSTALL_MOD_STRIP ifeq ($(INSTALL_MOD_STRIP),1) -mod_strip_cmd = $(OBJCOPY) --strip-debug -ifeq ($(CONFIG_KALLSYMS_ALL),$(CONFIG_KALLSYMS_STRIP_GENERATED)) -mod_strip_cmd += --wildcard $(addprefix --strip-symbols ,$(strip-symbols)) -endif +mod_strip_cmd = $(STRIP) --strip-debug else -mod_strip_cmd = $(OBJCOPY) $(INSTALL_MOD_STRIP) +mod_strip_cmd = $(STRIP) $(INSTALL_MOD_STRIP) endif # INSTALL_MOD_STRIP=1 else -mod_strip_cmd = false +mod_strip_cmd = true endif # INSTALL_MOD_STRIP export mod_strip_cmd @@ -754,7 +749,6 @@ last_kallsyms := 2 endif kallsyms.o := .tmp_kallsyms$(last_kallsyms).o -kallsyms.h := $(wildcard include/config/kallsyms/*.h) $(wildcard include/config/kallsyms/*/*.h) define verify_kallsyms $(Q)$(if $($(quiet)cmd_sysmap), \ @@ -779,41 +773,24 @@ endef # Generate .S file with all kernel symbols quiet_cmd_kallsyms = KSYM $@ - cmd_kallsyms = { test $* -eq 0 || $(NM) -n $<; } \ - | $(KALLSYMS) $(if $(CONFIG_KALLSYMS_ALL),--all-symbols) >$@ - -quiet_cmd_kstrip = STRIP $@ - cmd_kstrip = $(OBJCOPY) --wildcard $(addprefix --strip$(if $(CONFIG_RELOCATABLE),-unneeded)-symbols ,$(filter %/scripts/strip-symbols,$^)) $< $@ + cmd_kallsyms = $(NM) -n $< | $(KALLSYMS) \ + $(if $(CONFIG_KALLSYMS_ALL),--all-symbols) > $@ -$(foreach n,0 1 2 3,.tmp_kallsyms$(n).o): KBUILD_AFLAGS += -Wa,--strip-local-absolute -$(foreach n,0 1 2 3,.tmp_kallsyms$(n).o): %.o: %.S scripts FORCE +.tmp_kallsyms1.o .tmp_kallsyms2.o .tmp_kallsyms3.o: %.o: %.S scripts FORCE $(call if_changed_dep,as_o_S) -ifeq ($(CONFIG_KALLSYMS_STRIP_GENERATED),y) -strip-ext := .stripped -endif - -.tmp_kallsyms%.S: .tmp_vmlinux%$(strip-ext) $(KALLSYMS) $(kallsyms.h) +.tmp_kallsyms%.S: .tmp_vmlinux% $(KALLSYMS) $(call cmd,kallsyms) -# make -jN seems to have problems with intermediate files, see bug #3330. -.SECONDARY: $(foreach n,1 2 3,.tmp_vmlinux$(n).stripped) -.tmp_vmlinux%.stripped: .tmp_vmlinux% $(strip-symbols) $(kallsyms.h) - $(call cmd,kstrip) - -ifneq ($(CONFIG_DEBUG_INFO),y) -.tmp_vmlinux%: LDFLAGS_vmlinux += -S -endif # .tmp_vmlinux1 must be complete except kallsyms, so update vmlinux version -.tmp_vmlinux%: $(vmlinux-lds) $(vmlinux-all) FORCE - $(if $(filter 1,$*),$(call if_changed_rule,ksym_ld),$(call if_changed,vmlinux__)) +.tmp_vmlinux1: $(vmlinux-lds) $(vmlinux-all) FORCE + $(call if_changed_rule,ksym_ld) -.tmp_vmlinux0$(strip-ext): - $(Q)echo "placeholder" >$@ +.tmp_vmlinux2: $(vmlinux-lds) $(vmlinux-all) .tmp_kallsyms1.o FORCE + $(call if_changed,vmlinux__) -.tmp_vmlinux1: .tmp_kallsyms0.o -.tmp_vmlinux2: .tmp_kallsyms1.o -.tmp_vmlinux3: .tmp_kallsyms2.o +.tmp_vmlinux3: $(vmlinux-lds) $(vmlinux-all) .tmp_kallsyms2.o FORCE + $(call if_changed,vmlinux__) # Needs to visit scripts/ before $(KALLSYMS) can be used. $(KALLSYMS): scripts ; diff --git a/arch/x86/scripts/strip-symbols b/arch/x86/scripts/strip-symbols deleted file mode 100644 index a2f1ccb827c..00000000000 --- a/arch/x86/scripts/strip-symbols +++ /dev/null @@ -1 +0,0 @@ -__cpu_vendor_dev_X86_VENDOR_* diff --git a/init/Kconfig b/init/Kconfig index a724a149bf3..0e9924743a1 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -626,13 +626,6 @@ config KALLSYMS_ALL Say N. -config KALLSYMS_STRIP_GENERATED - bool "Strip machine generated symbols from kallsyms" - depends on KALLSYMS_ALL - default y - help - Say N if you want kallsyms to retain even machine generated symbols. - config KALLSYMS_EXTRA_PASS bool "Do an extra kallsyms pass" depends on KALLSYMS diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index e694afa0eb8..7b8b0f21a5b 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -30,19 +30,20 @@ #define all_var 0 #endif -extern const unsigned long kallsyms_addresses[]; -extern const u8 kallsyms_names[]; +/* These will be re-linked against their real values during the second link stage */ +extern const unsigned long kallsyms_addresses[] __attribute__((weak)); +extern const u8 kallsyms_names[] __attribute__((weak)); /* tell the compiler that the count isn't in the small data section if the arch * has one (eg: FRV) */ extern const unsigned long kallsyms_num_syms - __attribute__((__section__(".rodata"))); +__attribute__((weak, section(".rodata"))); -extern const u8 kallsyms_token_table[]; -extern const u16 kallsyms_token_index[]; +extern const u8 kallsyms_token_table[] __attribute__((weak)); +extern const u16 kallsyms_token_index[] __attribute__((weak)); -extern const unsigned long kallsyms_markers[]; +extern const unsigned long kallsyms_markers[] __attribute__((weak)); static inline int is_kernel_inittext(unsigned long addr) { @@ -167,6 +168,9 @@ static unsigned long get_symbol_pos(unsigned long addr, unsigned long symbol_start = 0, symbol_end = 0; unsigned long i, low, high, mid; + /* This kernel should never had been booted. */ + BUG_ON(!kallsyms_addresses); + /* do a binary search on the sorted kallsyms_addresses array */ low = 0; high = kallsyms_num_syms; diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 5d900307de3..c7de8b39fcf 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -151,16 +151,16 @@ cmd_cc_i_c = $(CPP) $(c_flags) -o $@ $< $(obj)/%.i: $(src)/%.c FORCE $(call if_changed_dep,cc_i_c) -cmd_genksyms = \ +cmd_gensymtypes = \ $(CPP) -D__GENKSYMS__ $(c_flags) $< | \ - $(GENKSYMS) -T $@ -A -a $(ARCH) \ + $(GENKSYMS) -T $@ -a $(ARCH) \ $(if $(KBUILD_PRESERVE),-p) \ $(if $(1),-r $(firstword $(wildcard $(@:.symtypes=.symref) /dev/null))) quiet_cmd_cc_symtypes_c = SYM $(quiet_modtag) $@ cmd_cc_symtypes_c = \ set -e; \ - $(call cmd_genksyms, true) >/dev/null; \ + $(call cmd_gensymtypes, true) >/dev/null; \ test -s $@ || rm -f $@ $(obj)/%.symtypes : $(src)/%.c FORCE @@ -177,38 +177,28 @@ cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< else # When module versioning is enabled the following steps are executed: -# o compile a .tmp_.s from .c -# o if .tmp_.s doesn't contain a __ksymtab version, i.e. does -# not export symbols, we just assemble .tmp_.s to .o and +# o compile a .tmp_.o from .c +# o if .tmp_.o doesn't contain a __ksymtab version, i.e. does +# not export symbols, we just rename .tmp_.o to .o and # are done. # o otherwise, we calculate symbol versions using the good old # genksyms on the preprocessed source and postprocess them in a way -# that they are usable as assembly source -# o assemble .o from .tmp_.s forcing inclusion of directives -# defining the actual values of __crc_*, followed by objcopy-ing them -# to force these symbols to be local to permit stripping them later. -s_file = $(@D)/.tmp_$(@F:.o=.s) -v_file = $(@D)/.tmp_$(@F:.o=.v) -tmp_o_file = $(@D)/.tmp_$(@F) -no_g_c_flags = $(filter-out -g%,$(c_flags)) - -cmd_cc_o_c = $(CC) $(c_flags) -S -o $(s_file) $< +# that they are usable as a linker script +# o generate .o from .tmp_.o using the linker to +# replace the unresolved symbols __crc_exported_symbol with +# the actual value of the checksum generated by genksyms +cmd_cc_o_c = $(CC) $(c_flags) -c -o $(@D)/.tmp_$(@F) $< cmd_modversions = \ - if grep -q __ksymtab $(s_file); then \ - if $(call cmd_genksyms, $(KBUILD_SYMTYPES)) > $(v_file) \ - && $(CC) $(no_g_c_flags) -c -Wa,$(v_file) \ - -o $(tmp_o_file) $(s_file) \ - && $(OBJCOPY) -L '__crc_*' -L '___crc_*' -w \ - $(tmp_o_file) $@; \ - then \ - : ; \ - else \ - rm -f $@; exit 1; \ - fi; \ + if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ + $(call cmd_gensymtypes, $(KBUILD_SYMTYPES)) \ + > $(@D)/.tmp_$(@F:.o=.ver); \ + \ + $(LD) $(LDFLAGS) -r -o $@ $(@D)/.tmp_$(@F) \ + -T $(@D)/.tmp_$(@F:.o=.ver); \ + rm -f $(@D)/.tmp_$(@F) $(@D)/.tmp_$(@F:.o=.ver); \ else \ - rm -f $(v_file); \ - $(CC) $(no_g_c_flags) -c -o $@ $(s_file); \ + mv -f $(@D)/.tmp_$(@F) $@; \ fi; endif @@ -225,12 +215,7 @@ define rule_cc_o_c $(cmd_record_mcount) \ scripts/basic/fixdep $(depfile) $@ '$(call make-cmd,cc_o_c)' > \ $(dot-target).tmp; \ - if [ -r $(@D)/.tmp_$(@F:.o=.v) ]; then \ - echo >> $(dot-target).tmp; \ - echo '$@: $(GENKSYMS)' >> $(dot-target).tmp; \ - echo '$(GENKSYMS):: ;' >> $(dot-target).tmp; \ - fi; \ - rm -f $(depfile) $(@D)/.tmp_$(@F:.o=.?); \ + rm -f $(depfile); \ mv -f $(dot-target).tmp $(dot-target).cmd endef diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst index a5122dce126..efa5d940e63 100644 --- a/scripts/Makefile.modinst +++ b/scripts/Makefile.modinst @@ -17,8 +17,7 @@ __modinst: $(modules) @: quiet_cmd_modules_install = INSTALL $@ - cmd_modules_install = mkdir -p $(2); \ - $(mod_strip_cmd) $@ $(2)/$(notdir $@) || cp $@ $(2) + cmd_modules_install = mkdir -p $(2); cp $@ $(2) ; $(mod_strip_cmd) $(2)/$(notdir $@) # Modules built outside the kernel source tree go into extra by default INSTALL_MOD_DIR ?= extra diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c index f8bb4cabd62..3a8297b5184 100644 --- a/scripts/genksyms/genksyms.c +++ b/scripts/genksyms/genksyms.c @@ -43,7 +43,7 @@ int cur_line = 1; char *cur_filename; static int flag_debug, flag_dump_defs, flag_reference, flag_dump_types, - flag_preserve, flag_warnings, flag_asm; + flag_preserve, flag_warnings; static const char *arch = ""; static const char *mod_prefix = ""; @@ -610,11 +610,8 @@ void export_symbol(const char *name) if (flag_dump_defs) fputs(">\n", debugfile); - /* Used as assembly source or a linker script. */ - printf(flag_asm - ? ".equiv %s__crc_%s, %#08lx\n" - : "%s__crc_%s = %#08lx ;\n", - mod_prefix, name, crc); + /* Used as a linker script. */ + printf("%s__crc_%s = 0x%08lx ;\n", mod_prefix, name, crc); } } @@ -651,10 +648,9 @@ void error_with_pos(const char *fmt, ...) static void genksyms_usage(void) { - fputs("Usage:\n" "genksyms [-aAdDTwqhV] > /path/to/.tmp_obj.ver\n" "\n" + fputs("Usage:\n" "genksyms [-adDTwqhV] > /path/to/.tmp_obj.ver\n" "\n" #ifdef __GNU_LIBRARY__ " -a, --arch Select architecture\n" - " -A, --asm Generate assembly rather than linker script\n" " -d, --debug Increment the debug level (repeatable)\n" " -D, --dump Dump expanded symbol defs (for debugging only)\n" " -r, --reference file Read reference symbols from a file\n" @@ -666,7 +662,6 @@ static void genksyms_usage(void) " -V, --version Print the release version\n" #else /* __GNU_LIBRARY__ */ " -a Select architecture\n" - " -A Generate assembly rather than linker script\n" " -d Increment the debug level (repeatable)\n" " -D Dump expanded symbol defs (for debugging only)\n" " -r file Read reference symbols from a file\n" @@ -688,7 +683,6 @@ int main(int argc, char **argv) #ifdef __GNU_LIBRARY__ struct option long_opts[] = { {"arch", 1, 0, 'a'}, - {"asm", 0, 0, 'A'}, {"debug", 0, 0, 'd'}, {"warnings", 0, 0, 'w'}, {"quiet", 0, 0, 'q'}, @@ -701,10 +695,10 @@ int main(int argc, char **argv) {0, 0, 0, 0} }; - while ((o = getopt_long(argc, argv, "a:dwqVADr:T:ph", + while ((o = getopt_long(argc, argv, "a:dwqVDr:T:ph", &long_opts[0], NULL)) != EOF) #else /* __GNU_LIBRARY__ */ - while ((o = getopt(argc, argv, "a:dwqVADr:T:ph")) != EOF) + while ((o = getopt(argc, argv, "a:dwqVDr:T:ph")) != EOF) #endif /* __GNU_LIBRARY__ */ switch (o) { case 'a': @@ -722,9 +716,6 @@ int main(int argc, char **argv) case 'V': fputs("genksyms version 2.5.60\n", stderr); break; - case 'A': - flag_asm = 1; - break; case 'D': flag_dump_defs = 1; break; diff --git a/scripts/genksyms/keywords.c_shipped b/scripts/genksyms/keywords.c_shipped index 83484fe93ed..971e0113ae7 100644 --- a/scripts/genksyms/keywords.c_shipped +++ b/scripts/genksyms/keywords.c_shipped @@ -1,4 +1,4 @@ -/* ANSI-C code produced by gperf version 3.0.1 */ +/* ANSI-C code produced by gperf version 3.0.2 */ /* Command-line: gperf -L ANSI-C -a -C -E -g -H is_reserved_hash -k '1,3,$' -N is_reserved_word -p -t scripts/genksyms/keywords.gperf */ #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ @@ -32,7 +32,7 @@ #line 3 "scripts/genksyms/keywords.gperf" struct resword { const char *name; int token; }; -/* maximum key range = 64, duplicates = 0 */ +/* maximum key range = 62, duplicates = 0 */ #ifdef __GNUC__ __inline @@ -46,32 +46,32 @@ is_reserved_hash (register const char *str, register unsigned int len) { static const unsigned char asso_values[] = { - 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 67, 67, 67, 67, 0, - 67, 67, 67, 67, 67, 67, 15, 67, 67, 67, - 0, 67, 67, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 0, 67, 0, 67, 5, - 25, 20, 15, 30, 67, 15, 67, 67, 10, 0, - 10, 40, 20, 67, 10, 5, 0, 10, 15, 67, - 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 67 + 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, 65, 5, + 65, 65, 65, 65, 65, 65, 35, 65, 65, 65, + 0, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 0, 65, 0, 65, 5, + 20, 15, 10, 30, 65, 15, 65, 65, 20, 0, + 10, 35, 20, 65, 10, 5, 0, 10, 5, 65, + 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65 }; return len + asso_values[(unsigned char)str[2]] + asso_values[(unsigned char)str[0]] + asso_values[(unsigned char)str[len - 1]]; } @@ -84,119 +84,116 @@ is_reserved_word (register const char *str, register unsigned int len) { enum { - TOTAL_KEYWORDS = 45, + TOTAL_KEYWORDS = 43, MIN_WORD_LENGTH = 3, MAX_WORD_LENGTH = 24, MIN_HASH_VALUE = 3, - MAX_HASH_VALUE = 66 + MAX_HASH_VALUE = 64 }; static const struct resword wordlist[] = { {""}, {""}, {""}, -#line 28 "scripts/genksyms/keywords.gperf" +#line 26 "scripts/genksyms/keywords.gperf" {"asm", ASM_KEYW}, {""}, -#line 10 "scripts/genksyms/keywords.gperf" +#line 8 "scripts/genksyms/keywords.gperf" {"__asm", ASM_KEYW}, {""}, -#line 11 "scripts/genksyms/keywords.gperf" +#line 9 "scripts/genksyms/keywords.gperf" {"__asm__", ASM_KEYW}, {""}, {""}, -#line 54 "scripts/genksyms/keywords.gperf" +#line 52 "scripts/genksyms/keywords.gperf" {"__typeof__", TYPEOF_KEYW}, {""}, -#line 14 "scripts/genksyms/keywords.gperf" +#line 12 "scripts/genksyms/keywords.gperf" {"__const", CONST_KEYW}, -#line 13 "scripts/genksyms/keywords.gperf" +#line 11 "scripts/genksyms/keywords.gperf" {"__attribute__", ATTRIBUTE_KEYW}, -#line 15 "scripts/genksyms/keywords.gperf" +#line 13 "scripts/genksyms/keywords.gperf" {"__const__", CONST_KEYW}, -#line 20 "scripts/genksyms/keywords.gperf" +#line 18 "scripts/genksyms/keywords.gperf" {"__signed__", SIGNED_KEYW}, -#line 46 "scripts/genksyms/keywords.gperf" +#line 44 "scripts/genksyms/keywords.gperf" {"static", STATIC_KEYW}, - {""}, -#line 41 "scripts/genksyms/keywords.gperf" +#line 20 "scripts/genksyms/keywords.gperf" + {"__volatile__", VOLATILE_KEYW}, +#line 39 "scripts/genksyms/keywords.gperf" {"int", INT_KEYW}, -#line 34 "scripts/genksyms/keywords.gperf" +#line 32 "scripts/genksyms/keywords.gperf" {"char", CHAR_KEYW}, -#line 35 "scripts/genksyms/keywords.gperf" +#line 33 "scripts/genksyms/keywords.gperf" {"const", CONST_KEYW}, -#line 47 "scripts/genksyms/keywords.gperf" +#line 45 "scripts/genksyms/keywords.gperf" {"struct", STRUCT_KEYW}, -#line 26 "scripts/genksyms/keywords.gperf" +#line 24 "scripts/genksyms/keywords.gperf" {"__restrict__", RESTRICT_KEYW}, -#line 27 "scripts/genksyms/keywords.gperf" - {"restrict", RESTRICT_KEYW}, -#line 7 "scripts/genksyms/keywords.gperf" - {"EXPORT_SYMBOL_GPL_FUTURE", EXPORT_SYMBOL_KEYW}, -#line 18 "scripts/genksyms/keywords.gperf" - {"__inline__", INLINE_KEYW}, - {""}, -#line 22 "scripts/genksyms/keywords.gperf" - {"__volatile__", VOLATILE_KEYW}, -#line 5 "scripts/genksyms/keywords.gperf" - {"EXPORT_SYMBOL", EXPORT_SYMBOL_KEYW}, #line 25 "scripts/genksyms/keywords.gperf" + {"restrict", RESTRICT_KEYW}, +#line 23 "scripts/genksyms/keywords.gperf" {"_restrict", RESTRICT_KEYW}, - {""}, -#line 12 "scripts/genksyms/keywords.gperf" - {"__attribute", ATTRIBUTE_KEYW}, -#line 6 "scripts/genksyms/keywords.gperf" - {"EXPORT_SYMBOL_GPL", EXPORT_SYMBOL_KEYW}, #line 16 "scripts/genksyms/keywords.gperf" + {"__inline__", INLINE_KEYW}, +#line 10 "scripts/genksyms/keywords.gperf" + {"__attribute", ATTRIBUTE_KEYW}, + {""}, +#line 14 "scripts/genksyms/keywords.gperf" {"__extension__", EXTENSION_KEYW}, -#line 37 "scripts/genksyms/keywords.gperf" +#line 35 "scripts/genksyms/keywords.gperf" {"enum", ENUM_KEYW}, -#line 8 "scripts/genksyms/keywords.gperf" - {"EXPORT_UNUSED_SYMBOL", EXPORT_SYMBOL_KEYW}, -#line 38 "scripts/genksyms/keywords.gperf" +#line 19 "scripts/genksyms/keywords.gperf" + {"__volatile", VOLATILE_KEYW}, +#line 36 "scripts/genksyms/keywords.gperf" {"extern", EXTERN_KEYW}, {""}, -#line 19 "scripts/genksyms/keywords.gperf" +#line 17 "scripts/genksyms/keywords.gperf" {"__signed", SIGNED_KEYW}, -#line 9 "scripts/genksyms/keywords.gperf" - {"EXPORT_UNUSED_SYMBOL_GPL", EXPORT_SYMBOL_KEYW}, -#line 49 "scripts/genksyms/keywords.gperf" - {"union", UNION_KEYW}, -#line 53 "scripts/genksyms/keywords.gperf" +#line 7 "scripts/genksyms/keywords.gperf" + {"EXPORT_SYMBOL_GPL_FUTURE", EXPORT_SYMBOL_KEYW}, + {""}, +#line 51 "scripts/genksyms/keywords.gperf" {"typeof", TYPEOF_KEYW}, -#line 48 "scripts/genksyms/keywords.gperf" +#line 46 "scripts/genksyms/keywords.gperf" {"typedef", TYPEDEF_KEYW}, -#line 17 "scripts/genksyms/keywords.gperf" +#line 15 "scripts/genksyms/keywords.gperf" {"__inline", INLINE_KEYW}, -#line 33 "scripts/genksyms/keywords.gperf" +#line 31 "scripts/genksyms/keywords.gperf" {"auto", AUTO_KEYW}, -#line 21 "scripts/genksyms/keywords.gperf" - {"__volatile", VOLATILE_KEYW}, +#line 47 "scripts/genksyms/keywords.gperf" + {"union", UNION_KEYW}, {""}, {""}, -#line 50 "scripts/genksyms/keywords.gperf" +#line 48 "scripts/genksyms/keywords.gperf" {"unsigned", UNSIGNED_KEYW}, - {""}, -#line 44 "scripts/genksyms/keywords.gperf" +#line 49 "scripts/genksyms/keywords.gperf" + {"void", VOID_KEYW}, +#line 42 "scripts/genksyms/keywords.gperf" {"short", SHORT_KEYW}, -#line 40 "scripts/genksyms/keywords.gperf" + {""}, {""}, +#line 50 "scripts/genksyms/keywords.gperf" + {"volatile", VOLATILE_KEYW}, + {""}, +#line 37 "scripts/genksyms/keywords.gperf" + {"float", FLOAT_KEYW}, +#line 34 "scripts/genksyms/keywords.gperf" + {"double", DOUBLE_KEYW}, + {""}, +#line 5 "scripts/genksyms/keywords.gperf" + {"EXPORT_SYMBOL", EXPORT_SYMBOL_KEYW}, + {""}, {""}, +#line 38 "scripts/genksyms/keywords.gperf" {"inline", INLINE_KEYW}, +#line 6 "scripts/genksyms/keywords.gperf" + {"EXPORT_SYMBOL_GPL", EXPORT_SYMBOL_KEYW}, +#line 41 "scripts/genksyms/keywords.gperf" + {"register", REGISTER_KEYW}, {""}, -#line 52 "scripts/genksyms/keywords.gperf" - {"volatile", VOLATILE_KEYW}, -#line 42 "scripts/genksyms/keywords.gperf" - {"long", LONG_KEYW}, -#line 24 "scripts/genksyms/keywords.gperf" +#line 22 "scripts/genksyms/keywords.gperf" {"_Bool", BOOL_KEYW}, - {""}, {""}, #line 43 "scripts/genksyms/keywords.gperf" - {"register", REGISTER_KEYW}, -#line 51 "scripts/genksyms/keywords.gperf" - {"void", VOID_KEYW}, -#line 39 "scripts/genksyms/keywords.gperf" - {"float", FLOAT_KEYW}, -#line 36 "scripts/genksyms/keywords.gperf" - {"double", DOUBLE_KEYW}, - {""}, {""}, {""}, {""}, -#line 45 "scripts/genksyms/keywords.gperf" - {"signed", SIGNED_KEYW} + {"signed", SIGNED_KEYW}, + {""}, {""}, +#line 40 "scripts/genksyms/keywords.gperf" + {"long", LONG_KEYW} }; if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) diff --git a/scripts/genksyms/keywords.gperf b/scripts/genksyms/keywords.gperf index 8abe7ab8d88..5ef3733225f 100644 --- a/scripts/genksyms/keywords.gperf +++ b/scripts/genksyms/keywords.gperf @@ -5,8 +5,6 @@ struct resword { const char *name; int token; } EXPORT_SYMBOL, EXPORT_SYMBOL_KEYW EXPORT_SYMBOL_GPL, EXPORT_SYMBOL_KEYW EXPORT_SYMBOL_GPL_FUTURE, EXPORT_SYMBOL_KEYW -EXPORT_UNUSED_SYMBOL, EXPORT_SYMBOL_KEYW -EXPORT_UNUSED_SYMBOL_GPL, EXPORT_SYMBOL_KEYW __asm, ASM_KEYW __asm__, ASM_KEYW __attribute, ATTRIBUTE_KEYW diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 92758120a76..ad2434b2697 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -130,9 +130,18 @@ static int read_symbol(FILE *in, struct sym_entry *s) static int symbol_valid(struct sym_entry *s) { /* Symbols which vary between passes. Passes 1 and 2 must have - * identical symbol lists. + * identical symbol lists. The kallsyms_* symbols below are only added + * after pass 1, they would be included in pass 2 when --all-symbols is + * specified so exclude them to get a stable symbol list. */ static char *special_symbols[] = { + "kallsyms_addresses", + "kallsyms_num_syms", + "kallsyms_names", + "kallsyms_markers", + "kallsyms_token_table", + "kallsyms_token_index", + /* Exclude linker generated symbols which vary between passes */ "_SDA_BASE_", /* ppc */ "_SDA2_BASE_", /* ppc */ @@ -164,9 +173,7 @@ static int symbol_valid(struct sym_entry *s) } /* Exclude symbols which vary between passes. */ - if (strstr((char *)s->sym + offset, "_compiled.") || - strncmp((char*)s->sym + offset, "__compound_literal.", 19) == 0 || - strncmp((char*)s->sym + offset, "__compound_literal$", 19) == 0) + if (strstr((char *)s->sym + offset, "_compiled.")) return 0; for (i = 0; special_symbols[i]; i++) @@ -543,10 +550,8 @@ int main(int argc, char **argv) usage(); read_map(stdin); - if (table_cnt) { - sort_symbols(); - optimize_token_table(); - } + sort_symbols(); + optimize_token_table(); write_src(); return 0; diff --git a/scripts/mksysmap b/scripts/mksysmap index 1db316a3712..6e133a0bae7 100644 --- a/scripts/mksysmap +++ b/scripts/mksysmap @@ -37,6 +37,9 @@ # readprofile starts reading symbols when _stext is found, and # continue until it finds a symbol which is not either of 'T', 't', -# 'W' or 'w'. +# 'W' or 'w'. __crc_ are 'A' and placed in the middle +# so we just ignore them to let readprofile continue to work. +# (At least sparc64 has __crc_ in the middle). + +$NM -n $1 | grep -v '\( [aNUw] \)\|\(__crc_\)\|\( \$[adt]\)' > $2 -$NM -n $1 | grep -v '\( [aNUw] \)\|\( \$[adt]\)' > $2 diff --git a/scripts/strip-symbols b/scripts/strip-symbols deleted file mode 100644 index 29ee8c1a014..00000000000 --- a/scripts/strip-symbols +++ /dev/null @@ -1,22 +0,0 @@ -<*> -*.h -__compound_literal[$.][0-9]* -__crc_[a-zA-Z_]* -__exitcall_[a-zA-Z_]* -__func__[$.][0-9]* -__FUNCTION__[$.][0-9]* -gcc[0-9]_compiled[$.] -__initcall_[a-zA-Z_]* -__kcrctab_[a-zA-Z_]* -__kstrtab_[a-zA-Z_]* -__ksymtab_[a-zA-Z_]* -__mod_[a-zA-Z_]*[0-9] -__module_depends -__param_[a-zA-Z_]* -__pci_fixup_*PCI_ANY_IDPCI_ANY_ID* -__pci_fixup_*PCI_ANY_IDPCI_DEVICE_ID_* -__pci_fixup_*PCI_VENDOR_ID_*PCI_ANY_ID* -__pci_fixup_*PCI_VENDOR_ID_*PCI_DEVICE_ID_* -__PRETTY_FUNCTION__[$.][0-9]* -__setup_[a-zA-Z_]* -____versions -- cgit v1.2.3-70-g09d2 From 4a13ad0bd8869bb491c67918662f9b1852595af5 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 14 Jan 2009 12:28:51 +0000 Subject: x86: avoid early crash in disable_local_APIC() E.g. when called due to an early panic. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 0f830e4f567..d19aa3aab62 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -895,6 +895,10 @@ void disable_local_APIC(void) { unsigned int value; + /* APIC hasn't been mapped yet */ + if (!apic_phys) + return; + clear_local_APIC(); /* -- cgit v1.2.3-70-g09d2 From 18c07cf530cf4aa8b7551801f68ed40db5ee4e45 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Thu, 15 Jan 2009 09:51:20 -0600 Subject: x86, UV: cpu_relax in uv_wait_completion The function uv_wait_completion() spins on reads of a memory-mapped register, waiting for completion of BAU hardware replies. It should call "cpu_relax()" between those reads to improve performance on hyperthreaded configurations. Signed-off-by: Cliff Wickman Acked-by: Jack Steiner Signed-off-by: Ingo Molnar --- arch/x86/kernel/tlb_uv.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index f885023167e..6812b829ed8 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -200,6 +200,7 @@ static int uv_wait_completion(struct bau_desc *bau_desc, destination_timeouts = 0; } } + cpu_relax(); } return FLUSH_COMPLETE; } -- cgit v1.2.3-70-g09d2 From a3c6018e565dc07cf3738ace6bbe412f97b1bba8 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 16 Jan 2009 11:59:33 +0000 Subject: x86: fix assumed to be contiguous leaf page tables for kmap_atomic region (take 2) Debugging and original patch from Nick Piggin The early fixmap pmd entry inserted at the very top of the KVA is causing the subsequent fixmap mapping code to not provide physically linear pte pages over the kmap atomic portion of the fixmap (which relies on said property to calculate pte addresses). This has caused weird boot failures in kmap_atomic much later in the boot process (initial userspace faults) on a 32-bit PAE system with a larger number of CPUs (smaller CPU counts tend not to run over into the next page so don't show up the problem). Solve this by attempting to clear out the page table, and copy any of its entries to the new one. Also, add a bug if a nonlinear condition is encountered and can't be resolved, which might save some hours of debugging if this fragile scheme ever breaks again... Once we have such logic, we can also use it to eliminate the early ioremap trickery around the page table setup for the fixmap area. This also fixes potential issues with FIX_* entries sharing the leaf page table with the early ioremap ones getting discarded by early_ioremap_clear() and not restored by early_ioremap_reset(). It at once eliminates the temporary (and configuration, namely NR_CPUS, dependent) unavailability of early fixed mappings during the time the fixmap area page tables get constructed. Finally, also replace the hard coded calculation of the initial table space needed for the fixmap area with a proper one, allowing kernels configured for large CPU counts to actually boot. Based-on: Nick Piggin Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io.h | 1 - arch/x86/mm/init_32.c | 48 ++++++++++++++++++++++++++++++++++++++++++++--- arch/x86/mm/ioremap.c | 25 ------------------------ 3 files changed, 45 insertions(+), 29 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 05cfed4485f..1dbbdf4be9b 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -99,7 +99,6 @@ extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size); * A boot-time mapping is currently limited to at most 16 pages. */ extern void early_ioremap_init(void); -extern void early_ioremap_clear(void); extern void early_ioremap_reset(void); extern void __iomem *early_ioremap(unsigned long offset, unsigned long size); extern void __iomem *early_memremap(unsigned long offset, unsigned long size); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 88f1b10de3b..2cef0507441 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -138,6 +138,47 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) return pte_offset_kernel(pmd, 0); } +static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, + unsigned long vaddr, pte_t *lastpte) +{ +#ifdef CONFIG_HIGHMEM + /* + * Something (early fixmap) may already have put a pte + * page here, which causes the page table allocation + * to become nonlinear. Attempt to fix it, and if it + * is still nonlinear then we have to bug. + */ + int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT; + int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT; + + if (pmd_idx_kmap_begin != pmd_idx_kmap_end + && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin + && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end + && ((__pa(pte) >> PAGE_SHIFT) < table_start + || (__pa(pte) >> PAGE_SHIFT) >= table_end)) { + pte_t *newpte; + int i; + + BUG_ON(after_init_bootmem); + newpte = alloc_low_page(); + for (i = 0; i < PTRS_PER_PTE; i++) + set_pte(newpte + i, pte[i]); + + paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT); + set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE)); + BUG_ON(newpte != pte_offset_kernel(pmd, 0)); + __flush_tlb_all(); + + paravirt_release_pte(__pa(pte) >> PAGE_SHIFT); + pte = newpte; + } + BUG_ON(vaddr < fix_to_virt(FIX_KMAP_BEGIN - 1) + && vaddr > fix_to_virt(FIX_KMAP_END) + && lastpte && lastpte + PTRS_PER_PTE != pte); +#endif + return pte; +} + /* * This function initializes a certain range of kernel virtual memory * with new bootmem page tables, everywhere page tables are missing in @@ -154,6 +195,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) unsigned long vaddr; pgd_t *pgd; pmd_t *pmd; + pte_t *pte = NULL; vaddr = start; pgd_idx = pgd_index(vaddr); @@ -165,7 +207,8 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) pmd = pmd + pmd_index(vaddr); for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { - one_page_table_init(pmd); + pte = page_table_kmap_check(one_page_table_init(pmd), + pmd, vaddr, pte); vaddr += PMD_SIZE; } @@ -508,7 +551,6 @@ static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base) * Fixed mappings, only the page table structure has to be * created - mappings will be set by set_fixmap(): */ - early_ioremap_clear(); vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; page_table_range_init(vaddr, end, pgd_base); @@ -801,7 +843,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse) tables += PAGE_ALIGN(ptes * sizeof(pte_t)); /* for fixmap */ - tables += PAGE_SIZE * 2; + tables += PAGE_ALIGN(__end_of_fixed_addresses * sizeof(pte_t)); /* * RED-PEN putting page tables only on node 0 could diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index bd85d42819e..af750ab973b 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -557,34 +557,9 @@ void __init early_ioremap_init(void) } } -void __init early_ioremap_clear(void) -{ - pmd_t *pmd; - - if (early_ioremap_debug) - printk(KERN_INFO "early_ioremap_clear()\n"); - - pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); - pmd_clear(pmd); - paravirt_release_pte(__pa(bm_pte) >> PAGE_SHIFT); - __flush_tlb_all(); -} - void __init early_ioremap_reset(void) { - enum fixed_addresses idx; - unsigned long addr, phys; - pte_t *pte; - after_paging_init = 1; - for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) { - addr = fix_to_virt(idx); - pte = early_ioremap_pte(addr); - if (pte_present(*pte)) { - phys = pte_val(*pte) & PAGE_MASK; - set_fixmap(idx, phys); - } - } } static void __init __early_set_fixmap(enum fixed_addresses idx, -- cgit v1.2.3-70-g09d2 From 5a4ccaf37ffece09ef33f1cfec67efa8ee56f967 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 6 Jan 2009 21:15:32 +0100 Subject: kprobes: check CONFIG_FREEZER instead of CONFIG_PM Check CONFIG_FREEZER instead of CONFIG_PM because kprobe booster depends on freeze_processes() and thaw_processes() when CONFIG_PREEMPT=y. This fixes a linkage error which occurs when CONFIG_PREEMPT=y, CONFIG_PM=y and CONFIG_FREEZER=n. Reported-by: Cheng Renquan Signed-off-by: Masami Hiramatsu Signed-off-by: Rafael J. Wysocki Acked-by: Ingo Molnar Signed-off-by: Len Brown --- arch/ia64/kernel/kprobes.c | 2 +- arch/x86/kernel/kprobes.c | 2 +- kernel/kprobes.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index f90be51b112..9adac441ac9 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -870,7 +870,7 @@ static int __kprobes pre_kprobes_handler(struct die_args *args) return 1; ss_probe: -#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM) +#if !defined(CONFIG_PREEMPT) || defined(CONFIG_FREEZER) if (p->ainsn.inst_flag == INST_FLAG_BOOSTABLE && !p->post_handler) { /* Boost up -- we can execute copied instructions directly */ ia64_psr(regs)->ri = p->ainsn.slot; diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 884d985b8b8..e948b28a5a9 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -446,7 +446,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { -#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM) +#if !defined(CONFIG_PREEMPT) || defined(CONFIG_FREEZER) if (p->ainsn.boostable == 1 && !p->post_handler) { /* Boost up -- we can execute copied instructions directly */ reset_current_kprobe(); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 1b9cbdc0127..7ba8cd9845c 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -123,7 +123,7 @@ static int collect_garbage_slots(void); static int __kprobes check_safety(void) { int ret = 0; -#if defined(CONFIG_PREEMPT) && defined(CONFIG_PM) +#if defined(CONFIG_PREEMPT) && defined(CONFIG_FREEZER) ret = freeze_processes(); if (ret == 0) { struct task_struct *p, *q; -- cgit v1.2.3-70-g09d2 From 5d8b532af9e52ea89208f5ef31889f646e67ba28 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 16 Jan 2009 23:09:14 +0100 Subject: ACPI suspend: Fix compilation warnings in drivers/acpi/sleep.c Fix two compilation warnings in drivers/acpi/sleep.c, one triggered by unsetting CONFIG_SUSPEND and the other triggered by unsetting CONFIG_HIBERNATION, by moving some code under the appropriate #ifdefs . Signed-off-by: Rafael J. Wysocki Signed-off-by: Len Brown --- arch/x86/kernel/acpi/sleep.c | 4 ++-- drivers/acpi/sleep.c | 51 ++++++++++++++++++++++---------------------- 2 files changed, 28 insertions(+), 27 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 707c1f6f95f..a60c1f3bcb8 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -156,11 +156,11 @@ static int __init acpi_sleep_setup(char *str) #ifdef CONFIG_HIBERNATION if (strncmp(str, "s4_nohwsig", 10) == 0) acpi_no_s4_hw_signature(); + if (strncmp(str, "s4_nonvs", 8) == 0) + acpi_s4_no_nvs(); #endif if (strncmp(str, "old_ordering", 12) == 0) acpi_old_suspend_ordering(); - if (strncmp(str, "s4_nonvs", 8) == 0) - acpi_s4_no_nvs(); str = strchr(str, ','); if (str != NULL) str += strspn(str, ", \t"); diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 7e3c609cbef..af85f5ba1be 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -90,31 +90,6 @@ void __init acpi_old_suspend_ordering(void) old_suspend_ordering = true; } -/* - * According to the ACPI specification the BIOS should make sure that ACPI is - * enabled and SCI_EN bit is set on wake-up from S1 - S3 sleep states. Still, - * some BIOSes don't do that and therefore we use acpi_enable() to enable ACPI - * on such systems during resume. Unfortunately that doesn't help in - * particularly pathological cases in which SCI_EN has to be set directly on - * resume, although the specification states very clearly that this flag is - * owned by the hardware. The set_sci_en_on_resume variable will be set in such - * cases. - */ -static bool set_sci_en_on_resume; -/* - * The ACPI specification wants us to save NVS memory regions during hibernation - * and to restore them during the subsequent resume. However, it is not certain - * if this mechanism is going to work on all machines, so we allow the user to - * disable this mechanism using the 'acpi_sleep=s4_nonvs' kernel command line - * option. - */ -static bool s4_no_nvs; - -void __init acpi_s4_no_nvs(void) -{ - s4_no_nvs = true; -} - /** * acpi_pm_disable_gpes - Disable the GPEs. */ @@ -193,6 +168,18 @@ static void acpi_pm_end(void) #endif /* CONFIG_ACPI_SLEEP */ #ifdef CONFIG_SUSPEND +/* + * According to the ACPI specification the BIOS should make sure that ACPI is + * enabled and SCI_EN bit is set on wake-up from S1 - S3 sleep states. Still, + * some BIOSes don't do that and therefore we use acpi_enable() to enable ACPI + * on such systems during resume. Unfortunately that doesn't help in + * particularly pathological cases in which SCI_EN has to be set directly on + * resume, although the specification states very clearly that this flag is + * owned by the hardware. The set_sci_en_on_resume variable will be set in such + * cases. + */ +static bool set_sci_en_on_resume; + extern void do_suspend_lowlevel(void); static u32 acpi_suspend_states[] = { @@ -396,6 +383,20 @@ static struct dmi_system_id __initdata acpisleep_dmi_table[] = { #endif /* CONFIG_SUSPEND */ #ifdef CONFIG_HIBERNATION +/* + * The ACPI specification wants us to save NVS memory regions during hibernation + * and to restore them during the subsequent resume. However, it is not certain + * if this mechanism is going to work on all machines, so we allow the user to + * disable this mechanism using the 'acpi_sleep=s4_nonvs' kernel command line + * option. + */ +static bool s4_no_nvs; + +void __init acpi_s4_no_nvs(void) +{ + s4_no_nvs = true; +} + static unsigned long s4_hardware_signature; static struct acpi_table_facs *facs; static bool nosigcheck; -- cgit v1.2.3-70-g09d2 From b2b815d80a5c4e5b50be0a98aba8c445ce8f3e1f Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 16 Jan 2009 15:22:16 -0800 Subject: x86: put trigger in to detect mismatched apic versions Impact: add debug warning Fire off one message if two apic's discovered with different apic versions. (this code is only called during CPU init) The goal of this is to pave the way of the removal of the apic_version[] array. We dont expect any apic version incompatibilities in the x86 landscape of systems [if so we dont handle them very well and probably never will handle deep apic version assymetries well], but it's prudent to have a debug check for one kernel cycle nevertheless. Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index d19aa3aab62..4b6df2469fe 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1837,6 +1837,11 @@ void __cpuinit generic_processor_info(int apicid, int version) num_processors++; cpu = cpumask_next_zero(-1, cpu_present_mask); + if (version != apic_version[boot_cpu_physical_apicid]) + WARN_ONCE(1, + "ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n", + apic_version[boot_cpu_physical_apicid], cpu, version); + physid_set(apicid, phys_cpu_present_map); if (apicid == boot_cpu_physical_apicid) { /* -- cgit v1.2.3-70-g09d2 From c7f8562a51c2e5dcc1a00a2bdd232b9965ff960d Mon Sep 17 00:00:00 2001 From: Leonardo Potenza Date: Sun, 18 Jan 2009 23:03:56 +0100 Subject: x86: fix section mismatch warnings in kernel/setup_percpu.c The function setup_cpu_local_masks() has been marked __init, in order to remove the following section mismatch messages: WARNING: vmlinux.o(.text+0x3c2c7): Section mismatch in reference from the function setup_cpu_local_masks() to the function .init.text:alloc_bootmem_cpumask_var() The function setup_cpu_local_masks() references the function __init alloc_bootmem_cpumask_var(). This is often because setup_cpu_local_masks lacks a __init annotation or the annotation of alloc_bootmem_cpumask_var is wrong. WARNING: vmlinux.o(.text+0x3c2d3): Section mismatch in reference from the function setup_cpu_local_masks() to the function .init.text:alloc_bootmem_cpumask_var() The function setup_cpu_local_masks() references the function __init alloc_bootmem_cpumask_var(). This is often because setup_cpu_local_masks lacks a __init annotation or the annotation of alloc_bootmem_cpumask_var is wrong. WARNING: vmlinux.o(.text+0x3c2df): Section mismatch in reference from the function setup_cpu_local_masks() to the function .init.text:alloc_bootmem_cpumask_var() The function setup_cpu_local_masks() references the function __init alloc_bootmem_cpumask_var(). This is often because setup_cpu_local_masks lacks a __init annotation or the annotation of alloc_bootmem_cpumask_var is wrong. WARNING: vmlinux.o(.text+0x3c2eb): Section mismatch in reference from the function setup_cpu_local_masks() to the function .init.text:alloc_bootmem_cpumask_var() The function setup_cpu_local_masks() references the function __init alloc_bootmem_cpumask_var(). This is often because setup_cpu_local_masks lacks a __init annotation or the annotation of alloc_bootmem_cpumask_var is wrong. Signed-off-by: Leonardo Potenza Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 55c46074eba..01161077a49 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -136,7 +136,7 @@ static void __init setup_cpu_pda_map(void) #ifdef CONFIG_X86_64 /* correctly size the local cpu masks */ -static void setup_cpu_local_masks(void) +static void __init setup_cpu_local_masks(void) { alloc_bootmem_cpumask_var(&cpu_initialized_mask); alloc_bootmem_cpumask_var(&cpu_callin_mask); -- cgit v1.2.3-70-g09d2 From 72859081851af2bda04117ca3d64206ffa199e5e Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 16 Jan 2009 15:31:15 -0800 Subject: cpufreq: use work_on_cpu in acpi-cpufreq.c for drv_read and drv_write Impact: use new work_on_cpu function to reduce stack usage Replace the saving of current->cpus_allowed and set_cpus_allowed_ptr() with a work_on_cpu function for drv_read() and drv_write(). Basically converts do_drv_{read,write} into "work_on_cpu" functions that are now called by drv_read and drv_write. Note: This patch basically reverts 50c668d6 which reverted 7503bfba, now that the work_on_cpu() function is more stable. Signed-off-by: Mike Travis Acked-by: Rusty Russell Tested-by: Dieter Ries Tested-by: Maciej Rutecki Cc: Dave Jones Cc: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 8f3c95c7e61..5bf2c834a23 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -150,8 +150,9 @@ struct drv_cmd { u32 val; }; -static void do_drv_read(struct drv_cmd *cmd) +static long do_drv_read(void *_cmd) { + struct drv_cmd *cmd = _cmd; u32 h; switch (cmd->type) { @@ -166,10 +167,12 @@ static void do_drv_read(struct drv_cmd *cmd) default: break; } + return 0; } -static void do_drv_write(struct drv_cmd *cmd) +static long do_drv_write(void *_cmd) { + struct drv_cmd *cmd = _cmd; u32 lo, hi; switch (cmd->type) { @@ -186,30 +189,23 @@ static void do_drv_write(struct drv_cmd *cmd) default: break; } + return 0; } static void drv_read(struct drv_cmd *cmd) { - cpumask_t saved_mask = current->cpus_allowed; cmd->val = 0; - set_cpus_allowed_ptr(current, cmd->mask); - do_drv_read(cmd); - set_cpus_allowed_ptr(current, &saved_mask); + work_on_cpu(cpumask_any(cmd->mask), do_drv_read, cmd); } static void drv_write(struct drv_cmd *cmd) { - cpumask_t saved_mask = current->cpus_allowed; unsigned int i; for_each_cpu(i, cmd->mask) { - set_cpus_allowed_ptr(current, cpumask_of(i)); - do_drv_write(cmd); + work_on_cpu(i, do_drv_write, cmd); } - - set_cpus_allowed_ptr(current, &saved_mask); - return; } static u32 get_cur_val(const struct cpumask *mask) @@ -366,7 +362,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) return freq; } -static unsigned int check_freqs(const cpumask_t *mask, unsigned int freq, +static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq, struct acpi_cpufreq_data *data) { unsigned int cur_freq; -- cgit v1.2.3-70-g09d2 From bfa318ad52a23f1e303d176b44366cdd2bb71ad2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 15 Jan 2009 15:46:08 +0100 Subject: fix: crash: IP: __bitmap_intersects+0x48/0x73 -tip testing found this crash: > [ 35.258515] calling acpi_cpufreq_init+0x0/0x127 @ 1 > [ 35.264127] BUG: unable to handle kernel NULL pointer dereference at (null) > [ 35.267554] IP: [] __bitmap_intersects+0x48/0x73 > [ 35.267554] PGD 0 > [ 35.267554] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c is still broken: there's no allocation of the variable mask, so we pass in an uninitialized cmd.mask field to drv_read(), which then passes it to the scheduler which then crashes ... Switch it over to the much simpler constant-cpumask-pointers approach. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 5bf2c834a23..4b1c319d30c 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -145,7 +145,7 @@ typedef union { struct drv_cmd { unsigned int type; - cpumask_var_t mask; + const struct cpumask *mask; drv_addr_union addr; u32 val; }; @@ -231,6 +231,7 @@ static u32 get_cur_val(const struct cpumask *mask) return 0; } + cmd.mask = mask; drv_read(&cmd); dprintk("get_cur_val = %u\n", cmd.val); @@ -397,9 +398,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, return -ENODEV; } - if (unlikely(!alloc_cpumask_var(&cmd.mask, GFP_KERNEL))) - return -ENOMEM; - perf = data->acpi_data; result = cpufreq_frequency_table_target(policy, data->freq_table, @@ -444,9 +442,9 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, /* cpufreq holds the hotplug lock, so we are safe from here on */ if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) - cpumask_and(cmd.mask, cpu_online_mask, policy->cpus); + cmd.mask = policy->cpus; else - cpumask_copy(cmd.mask, cpumask_of(policy->cpu)); + cmd.mask = cpumask_of(policy->cpu); freqs.old = perf->states[perf->state].core_frequency * 1000; freqs.new = data->freq_table[next_state].frequency; @@ -473,7 +471,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, perf->state = next_perf_state; out: - free_cpumask_var(cmd.mask); return result; } -- cgit v1.2.3-70-g09d2 From f5495506c3c1300d249d403c36f92de71920dbeb Mon Sep 17 00:00:00 2001 From: Gary Hade Date: Mon, 19 Jan 2009 13:46:41 -0800 Subject: x86: remove kernel_physical_mapping_init() from init section Impact: fix crash with memory hotplug enabled kernel_physical_mapping_init() is called during memory hotplug so it does not belong in the init section. If the kernel is built with CONFIG_DEBUG_SECTION_MISMATCH=y on the make command line, arch/x86/mm/init_64.c is compiled with the -fno-inline-functions-called-once gcc option defeating inlining of kernel_physical_mapping_init() within init_memory_mapping(). When kernel_physical_mapping_init() is not inlined it is placed in the .init.text section according to the __init in it's current declaration. A later call to kernel_physical_mapping_init() during a memory hotplug operation encounters an int3 trap because the .init.text section memory has been freed. This patch eliminates the crash caused by the int3 trap by moving the non-inlined kernel_physical_mapping_init() from .init.text to .meminit.text. Signed-off-by: Gary Hade Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 23f68e77ad1..e6d36b49025 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -596,7 +596,7 @@ static void __init init_gbpages(void) direct_gbpages = 0; } -static unsigned long __init kernel_physical_mapping_init(unsigned long start, +static unsigned long __meminit kernel_physical_mapping_init(unsigned long start, unsigned long end, unsigned long page_size_mask) { -- cgit v1.2.3-70-g09d2 From e0a96129db574d6365e3439d16d88517c437ab33 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 16 Jan 2009 15:22:11 +0100 Subject: x86: use early clobbers in usercopy*.c Impact: fix rare (but currently harmless) miscompile with certain configs and gcc versions Hugh Dickins noticed that strncpy_from_user() was miscompiled in some circumstances with gcc 4.3. Thanks to Hugh's excellent analysis it was easy to track down. Hugh writes: > Try building an x86_64 defconfig 2.6.29-rc1 kernel tree, > except not quite defconfig, switch CONFIG_PREEMPT_NONE=y > and CONFIG_PREEMPT_VOLUNTARY off (because it expands a > might_fault() there, which hides the issue): using a > gcc 4.3.2 (I've checked both openSUSE 11.1 and Fedora 10). > > It generates the following: > > 0000000000000000 <__strncpy_from_user>: > 0: 48 89 d1 mov %rdx,%rcx > 3: 48 85 c9 test %rcx,%rcx > 6: 74 0e je 16 <__strncpy_from_user+0x16> > 8: ac lods %ds:(%rsi),%al > 9: aa stos %al,%es:(%rdi) > a: 84 c0 test %al,%al > c: 74 05 je 13 <__strncpy_from_user+0x13> > e: 48 ff c9 dec %rcx > 11: 75 f5 jne 8 <__strncpy_from_user+0x8> > 13: 48 29 c9 sub %rcx,%rcx > 16: 48 89 c8 mov %rcx,%rax > 19: c3 retq > > Observe that "sub %rcx,%rcx; mov %rcx,%rax", whereas gcc 4.2.1 > (and many other configs) say "sub %rcx,%rdx; mov %rdx,%rax". > Isn't it returning 0 when it ought to be returning strlen? The asm constraints for the strncpy_from_user() result were missing an early clobber, which tells gcc that the last output arguments are written before all input arguments are read. Also add more early clobbers in the rest of the file and fix 32-bit usercopy.c in the same way. Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin [ since this API is rarely used and no in-kernel user relies on a 'len' return value (they only rely on negative return values) this miscompile was never noticed in the field. But it's worth fixing it nevertheless. ] Signed-off-by: Ingo Molnar --- arch/x86/lib/usercopy_32.c | 4 ++-- arch/x86/lib/usercopy_64.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 4a20b2f9a38..7c8ca91bb9e 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -56,7 +56,7 @@ do { \ " jmp 2b\n" \ ".previous\n" \ _ASM_EXTABLE(0b,3b) \ - : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ + : "=&d"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \ "=&D" (__d2) \ : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ : "memory"); \ @@ -218,7 +218,7 @@ long strnlen_user(const char __user *s, long n) " .align 4\n" " .long 0b,2b\n" ".previous" - :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp) + :"=&r" (n), "=&D" (s), "=&a" (res), "=&c" (tmp) :"0" (n), "1" (s), "2" (0), "3" (mask) :"cc"); return res & mask; diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 64d6c84e635..ec13cb5f17e 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -32,7 +32,7 @@ do { \ " jmp 2b\n" \ ".previous\n" \ _ASM_EXTABLE(0b,3b) \ - : "=r"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ + : "=&r"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \ "=&D" (__d2) \ : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ : "memory"); \ @@ -86,7 +86,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size) ".previous\n" _ASM_EXTABLE(0b,3b) _ASM_EXTABLE(1b,2b) - : [size8] "=c"(size), [dst] "=&D" (__d0) + : [size8] "=&c"(size), [dst] "=&D" (__d0) : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr), [zero] "r" (0UL), [eight] "r" (8UL)); return size; -- cgit v1.2.3-70-g09d2 From 552b8aa4d1edcc1c764ff6f61a7686347a2d1827 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 20 Jan 2009 09:31:49 +0100 Subject: Revert "x86: signal: change type of paramter for sys_rt_sigreturn()" This reverts commit 4217458dafaa57d8e26a46f5d05ab8c53cf64191. Justin Madru bisected this commit, it was causing weird Firefox crashes. The reason is that GCC mis-optimizes (re-uses) the on-stack parameters of the calling frame, which corrupts the syscall return pt_regs state and thus corrupts user-space register state. So we go back to the slightly less clean but more optimization-safe method of getting to pt_regs. Also add a comment to explain this. Resolves: http://bugzilla.kernel.org/show_bug.cgi?id=12505 Reported-and-bisected-by: Justin Madru Tested-by: Justin Madru Signed-off-by: Ingo Molnar --- arch/x86/include/asm/syscalls.h | 2 +- arch/x86/kernel/signal.c | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 9c6797c3e56..c0b0bda754e 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -40,7 +40,7 @@ asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, struct old_sigaction __user *); asmlinkage int sys_sigaltstack(unsigned long); asmlinkage unsigned long sys_sigreturn(unsigned long); -asmlinkage int sys_rt_sigreturn(struct pt_regs); +asmlinkage int sys_rt_sigreturn(unsigned long); /* kernel/ioport.c */ asmlinkage long sys_iopl(unsigned long); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 89bb7668041..df0587f24c5 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -632,9 +632,16 @@ badframe: } #ifdef CONFIG_X86_32 -asmlinkage int sys_rt_sigreturn(struct pt_regs regs) +/* + * Note: do not pass in pt_regs directly as with tail-call optimization + * GCC will incorrectly stomp on the caller's frame and corrupt user-space + * register state: + */ +asmlinkage int sys_rt_sigreturn(unsigned long __unused) { - return do_rt_sigreturn(®s); + struct pt_regs *regs = (struct pt_regs *)&__unused; + + return do_rt_sigreturn(regs); } #else /* !CONFIG_X86_32 */ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) -- cgit v1.2.3-70-g09d2 From a1e46212a410793d575718818e81ddc442a65283 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 20 Jan 2009 14:20:21 -0800 Subject: x86: fix page attribute corruption with cpa() Impact: fix sporadic slowdowns and warning messages This patch fixes a performance issue reported by Linus on his Nehalem system. While Linus reverted the PAT patch (commit 58dab916dfb57328d50deb0aa9b3fc92efa248ff) which exposed the issue, existing cpa() code can potentially still cause wrong(page attribute corruption) behavior. This patch also fixes the "WARNING: at arch/x86/mm/pageattr.c:560" that various people reported. In 64bit kernel, kernel identity mapping might have holes depending on the available memory and how e820 reports the address range covering the RAM, ACPI, PCI reserved regions. If there is a 2MB/1GB hole in the address range that is not listed by e820 entries, kernel identity mapping will have a corresponding hole in its 1-1 identity mapping. If cpa() happens on the kernel identity mapping which falls into these holes, existing code fails like this: __change_page_attr_set_clr() __change_page_attr() returns 0 because of if (!kpte). But doesn't set cpa->numpages and cpa->pfn. cpa_process_alias() uses uninitialized cpa->pfn (random value) which can potentially lead to changing the page attribute of kernel text/data, kernel identity mapping of RAM pages etc. oops! This bug was easily exposed by another PAT patch which was doing cpa() more often on kernel identity mapping holes (physical range between max_low_pfn_mapped and 4GB), where in here it was setting the cache disable attribute(PCD) for kernel identity mappings aswell. Fix cpa() to handle the kernel identity mapping holes. Retain the WARN() for cpa() calls to other not present address ranges (kernel-text/data, ioremap() addresses) Signed-off-by: Suresh Siddha Signed-off-by: Venkatesh Pallipadi Cc: Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 49 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index e89d24815f2..84ba74820ad 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -534,6 +534,36 @@ out_unlock: return 0; } +static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, + int primary) +{ + /* + * Ignore all non primary paths. + */ + if (!primary) + return 0; + + /* + * Ignore the NULL PTE for kernel identity mapping, as it is expected + * to have holes. + * Also set numpages to '1' indicating that we processed cpa req for + * one virtual address page and its pfn. TBD: numpages can be set based + * on the initial value and the level returned by lookup_address(). + */ + if (within(vaddr, PAGE_OFFSET, + PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) { + cpa->numpages = 1; + cpa->pfn = __pa(vaddr) >> PAGE_SHIFT; + return 0; + } else { + WARN(1, KERN_WARNING "CPA: called for zero pte. " + "vaddr = %lx cpa->vaddr = %lx\n", vaddr, + *cpa->vaddr); + + return -EFAULT; + } +} + static int __change_page_attr(struct cpa_data *cpa, int primary) { unsigned long address; @@ -549,17 +579,11 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) repeat: kpte = lookup_address(address, &level); if (!kpte) - return 0; + return __cpa_process_fault(cpa, address, primary); old_pte = *kpte; - if (!pte_val(old_pte)) { - if (!primary) - return 0; - WARN(1, KERN_WARNING "CPA: called for zero pte. " - "vaddr = %lx cpa->vaddr = %lx\n", address, - *cpa->vaddr); - return -EINVAL; - } + if (!pte_val(old_pte)) + return __cpa_process_fault(cpa, address, primary); if (level == PG_LEVEL_4K) { pte_t new_pte; @@ -657,12 +681,7 @@ static int cpa_process_alias(struct cpa_data *cpa) vaddr = *cpa->vaddr; if (!(within(vaddr, PAGE_OFFSET, - PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT)) -#ifdef CONFIG_X86_64 - || within(vaddr, PAGE_OFFSET + (1UL<<32), - PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)) -#endif - )) { + PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) { alias_cpa = *cpa; temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); -- cgit v1.2.3-70-g09d2 From 731f1872f4e8a0f1eabd49c3548207e79a421202 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Tue, 20 Jan 2009 10:37:39 +0100 Subject: x86: mtrr fix debug boot parameter while looking at: http://bugzilla.kernel.org/show_bug.cgi?id=11541 I realized that the mtrr.show param cannot work, because the code is processed much too early. This patch: - Declares mtrr.show as early_param - Stays consistent with the previous param (which I doubt that it ever worked), so mtrr.show=1 would still work - Declares mtrr_show as initdata Signed-off-by: Thomas Renninger Acked-by: Jan Beulich Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/generic.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index b59ddcc88cd..0c0a455fe95 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -33,11 +33,13 @@ u64 mtrr_tom2; struct mtrr_state_type mtrr_state = {}; EXPORT_SYMBOL_GPL(mtrr_state); -#undef MODULE_PARAM_PREFIX -#define MODULE_PARAM_PREFIX "mtrr." - -static int mtrr_show; -module_param_named(show, mtrr_show, bool, 0); +static int __initdata mtrr_show; +static int __init mtrr_debug(char *opt) +{ + mtrr_show = 1; + return 0; +} +early_param("mtrr.show", mtrr_debug); /* * Returns the effective MTRR type for the region -- cgit v1.2.3-70-g09d2 From 9597134218300c045cf219be3664615e97cb239c Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 13 Jan 2009 10:21:30 -0800 Subject: x86: fix PTE corruption issue while mapping RAM using /dev/mem Beschorner Daniel reported: > hwinfo problem since 2.6.28, showing this in the oops: > Corrupted page table at address 7fd04de3ec00 Also, PaX Team reported a regression with this commit: > commit 9542ada803198e6eba29d3289abb39ea82047b92 > Author: Suresh Siddha > Date: Wed Sep 24 08:53:33 2008 -0700 > > x86: track memtype for RAM in page struct This commit breaks mapping any RAM page through /dev/mem, as the reserve_memtype() was not initializing the return attribute type and as such corrupting the PTE entry that was setup with the return attribute type. Because of this bug, application mapping this RAM page through /dev/mem will die with "Corrupted page table at address xxxx" message in the kernel log and also the kernel identity mapping which maps the underlying RAM page gets converted to UC. Fix this by initializing the return attribute type before calling reserve_ram_pages_type() Reported-by: PaX Team Reported-and-tested-by: Beschorner Daniel Tested-and-Acked-by: PaX Team Signed-off-by: Suresh Siddha Signed-off-by: Venkatesh Pallipadi Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 070ee4a3b22..ffc88cc00fd 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -333,6 +333,9 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, req_type & _PAGE_CACHE_MASK); } + if (new_type) + *new_type = actual_type; + /* * For legacy reasons, some parts of the physical address range in the * legacy 1MB region is treated as non-RAM (even when listed as RAM in @@ -356,9 +359,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, new->end = end; new->type = actual_type; - if (new_type) - *new_type = actual_type; - spin_lock(&memtype_lock); if (cached_entry && start >= cached_start) -- cgit v1.2.3-70-g09d2 From bdf21a49bab28f0d9613e8d8724ef9c9168b61b9 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 21 Jan 2009 15:01:56 -0800 Subject: x86: add MSR_IA32_MISC_ENABLE bits to Impact: None (new bit definitions currently unused) Add bit definitions for the MSR_IA32_MISC_ENABLE MSRs to . Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/msr-index.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index cb58643947b..358acc59ae0 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -202,6 +202,35 @@ #define MSR_IA32_THERM_STATUS 0x0000019c #define MSR_IA32_MISC_ENABLE 0x000001a0 +/* MISC_ENABLE bits: architectural */ +#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0) +#define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1) +#define MSR_IA32_MISC_ENABLE_EMON (1ULL << 7) +#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << 11) +#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << 12) +#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << 16) +#define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << 18) +#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << 22) +#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << 23) +#define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << 34) + +/* MISC_ENABLE bits: model-specific, meaning may vary from core to core */ +#define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << 2) +#define MSR_IA32_MISC_ENABLE_TM1 (1ULL << 3) +#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << 4) +#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << 6) +#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << 8) +#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << 9) +#define MSR_IA32_MISC_ENABLE_FERR (1ULL << 10) +#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << 10) +#define MSR_IA32_MISC_ENABLE_TM2 (1ULL << 13) +#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << 19) +#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << 20) +#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << 24) +#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << 37) +#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << 38) +#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << 39) + /* Intel Model 6 */ #define MSR_P6_EVNTSEL0 0x00000186 #define MSR_P6_EVNTSEL1 0x00000187 -- cgit v1.2.3-70-g09d2 From 066941bd4eeb159307a5d7d795100d0887c00442 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 21 Jan 2009 15:04:32 -0800 Subject: x86: unmask CPUID levels on Intel CPUs Impact: Fixes crashes with misconfigured BIOSes on XSAVE hardware Avuton Olrich reported early boot crashes with v2.6.28 and bisected it down to dc1e35c6e95e8923cf1d3510438b63c600fee1e2 ("x86, xsave: enable xsave/xrstor on cpus with xsave support"). If the CPUID limit bit in MSR_IA32_MISC_ENABLE is set, clear it to make all CPUID information available. This is required for some features to work, in particular XSAVE. Reported-and-bisected-by: Avuton Olrich Tested-by: Avuton Olrich Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/intel.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 8ea6929e974..43c1dcf0bec 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -29,6 +29,16 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { + u64 misc_enable; + + /* Unmask CPUID levels if masked */ + if (!rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_enable) && + (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID)) { + misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID; + wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); + c->cpuid_level = cpuid_eax(0); + } + if ((c->x86 == 0xf && c->x86_model >= 0x03) || (c->x86 == 0x6 && c->x86_model >= 0x0e)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); -- cgit v1.2.3-70-g09d2 From 336f6c322d87806ef93afad6308ac65083a865e5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 22 Jan 2009 09:50:44 +0100 Subject: debugobjects: add and use INIT_WORK_ON_STACK Impact: Fix debugobjects warning debugobject enabled kernels spit out a warning in hpet code due to a workqueue which is initialized on stack. Add INIT_WORK_ON_STACK() which calls init_timer_on_stack() and use it in hpet. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/hpet.c | 3 ++- include/linux/workqueue.h | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index cd759ad9069..64d5ad0b8ad 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -628,11 +628,12 @@ static int hpet_cpuhp_notify(struct notifier_block *n, switch (action & 0xf) { case CPU_ONLINE: - INIT_DELAYED_WORK(&work.work, hpet_work); + INIT_DELAYED_WORK_ON_STACK(&work.work, hpet_work); init_completion(&work.complete); /* FIXME: add schedule_work_on() */ schedule_delayed_work_on(cpu, &work.work, 0); wait_for_completion(&work.complete); + destroy_timer_on_stack(&work.work.timer); break; case CPU_DEAD: if (hdev) { diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index b36291130f2..20b59eb1fac 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -124,6 +124,12 @@ struct execute_work { init_timer_deferrable(&(_work)->timer); \ } while (0) +#define INIT_DELAYED_WORK_ON_STACK(_work, _func) \ + do { \ + INIT_WORK(&(_work)->work, (_func)); \ + init_timer_on_stack(&(_work)->timer); \ + } while (0) + /** * work_pending - Find out whether a work item is currently pending * @work: The work item in question -- cgit v1.2.3-70-g09d2 From ba2607fe9c1f2d4ad5a3d4c4ae9117c5bfdca826 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Mon, 19 Jan 2009 10:38:35 +0100 Subject: x86, ds, bts: cleanup/fix DS configuration Cleanup the cpuid check for DS configuration. This also fixes a Corei7 CPUID enumeration bug. Signed-off-by: Markus Metzger Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ds.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index da91701a234..169a120587b 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -15,8 +15,8 @@ * - buffer allocation (memory accounting) * * - * Copyright (C) 2007-2008 Intel Corporation. - * Markus Metzger , 2007-2008 + * Copyright (C) 2007-2009 Intel Corporation. + * Markus Metzger , 2007-2009 */ @@ -890,7 +890,7 @@ int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value) } static const struct ds_configuration ds_cfg_netburst = { - .name = "netburst", + .name = "Netburst", .ctl[dsf_bts] = (1 << 2) | (1 << 3), .ctl[dsf_bts_kernel] = (1 << 5), .ctl[dsf_bts_user] = (1 << 6), @@ -904,7 +904,7 @@ static const struct ds_configuration ds_cfg_netburst = { #endif }; static const struct ds_configuration ds_cfg_pentium_m = { - .name = "pentium m", + .name = "Pentium M", .ctl[dsf_bts] = (1 << 6) | (1 << 7), .sizeof_field = sizeof(long), @@ -915,8 +915,8 @@ static const struct ds_configuration ds_cfg_pentium_m = { .sizeof_rec[ds_pebs] = sizeof(long) * 18, #endif }; -static const struct ds_configuration ds_cfg_core2 = { - .name = "core 2", +static const struct ds_configuration ds_cfg_core2_atom = { + .name = "Core 2/Atom", .ctl[dsf_bts] = (1 << 6) | (1 << 7), .ctl[dsf_bts_kernel] = (1 << 9), .ctl[dsf_bts_user] = (1 << 10), @@ -949,19 +949,22 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) switch (c->x86) { case 0x6: switch (c->x86_model) { - case 0 ... 0xC: - /* sorry, don't know about them */ - break; - case 0xD: - case 0xE: /* Pentium M */ + case 0x9: + case 0xd: /* Pentium M */ ds_configure(&ds_cfg_pentium_m); break; - default: /* Core2, Atom, ... */ - ds_configure(&ds_cfg_core2); + case 0xf: + case 0x17: /* Core2 */ + case 0x1c: /* Atom */ + ds_configure(&ds_cfg_core2_atom); + break; + case 0x1a: /* i7 */ + default: + /* sorry, don't know about them */ break; } break; - case 0xF: + case 0xf: switch (c->x86_model) { case 0x0: case 0x1: -- cgit v1.2.3-70-g09d2 From 42ef73fe134732b2e91c0326df5fd568da17c4b2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 23 Jan 2009 17:37:49 +0100 Subject: x86, mm: fix pte_free() On -rt we were seeing spurious bad page states like: Bad page state in process 'firefox' page:c1bc2380 flags:0x40000000 mapping:c1bc2390 mapcount:0 count:0 Trying to fix it up, but a reboot is needed Backtrace: Pid: 503, comm: firefox Not tainted 2.6.26.8-rt13 #3 [] ? printk+0x14/0x19 [] bad_page+0x4e/0x79 [] free_hot_cold_page+0x5b/0x1d3 [] free_hot_page+0xf/0x11 [] __free_pages+0x20/0x2b [] __pte_alloc+0x87/0x91 [] handle_mm_fault+0xe4/0x733 [] ? rt_mutex_down_read_trylock+0x57/0x63 [] ? rt_mutex_down_read_trylock+0x57/0x63 [] do_page_fault+0x36f/0x88a This is the case where a concurrent fault already installed the PTE and we get to free the newly allocated one. This is due to pgtable_page_ctor() doing the spin_lock_init(&page->ptl) which is overlaid with the {private, mapping} struct. union { struct { unsigned long private; struct address_space *mapping; }; spinlock_t ptl; struct kmem_cache *slab; struct page *first_page; }; Normally the spinlock is small enough to not stomp on page->mapping, but PREEMPT_RT=y has huge 'spin'locks. But lockdep kernels should also be able to trigger this splat, as the lock tracking code grows the spinlock to cover page->mapping. The obvious fix is calling pgtable_page_dtor() like the regular pte free path __pte_free_tlb() does. It seems all architectures except x86 and nm10300 already do this, and nm10300 doesn't seem to use pgtable_page_ctor(), which suggests it doesn't do SMP or simply doesnt do MMU at all or something. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar Cc: --- arch/x86/include/asm/pgalloc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index cb7c151a8bf..dd14c54ac71 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -42,6 +42,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) static inline void pte_free(struct mm_struct *mm, struct page *pte) { + pgtable_page_dtor(pte); __free_page(pte); } -- cgit v1.2.3-70-g09d2 From e1b4d1143651fb3838be1117785b6e0386fa151f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 25 Jan 2009 16:57:00 +0100 Subject: x86: use standard PIT frequency the RDC and ELAN platforms use slighly different PIT clocks, resulting in a timex.h hack that changes PIT_TICK_RATE during build time. But if a tester enables any of these platform support .config options, the PIT will be miscalibrated on standard PC platforms. So use one frequency - in a subsequent patch we'll add a quirk to allow x86 platforms to define different PIT frequencies. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/timex.h | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/timex.h b/arch/x86/include/asm/timex.h index 1287dc1347d..b5c9d45c981 100644 --- a/arch/x86/include/asm/timex.h +++ b/arch/x86/include/asm/timex.h @@ -1,18 +1,13 @@ -/* x86 architecture timex specifications */ #ifndef _ASM_X86_TIMEX_H #define _ASM_X86_TIMEX_H #include #include -#ifdef CONFIG_X86_ELAN -# define PIT_TICK_RATE 1189200 /* AMD Elan has different frequency! */ -#elif defined(CONFIG_X86_RDC321X) -# define PIT_TICK_RATE 1041667 /* Underlying HZ for R8610 */ -#else -# define PIT_TICK_RATE 1193182 /* Underlying HZ */ -#endif -#define CLOCK_TICK_RATE PIT_TICK_RATE +/* The PIT ticks at this frequency (in HZ): */ +#define PIT_TICK_RATE 1193182 + +#define CLOCK_TICK_RATE PIT_TICK_RATE #define ARCH_HAS_READ_CURRENT_TIMER -- cgit v1.2.3-70-g09d2 From ef5fa0ab24b87646c7bc98645acbb4b51fc2acd4 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 23 Jan 2009 14:14:21 -0800 Subject: x86: work around PAGE_KERNEL_WC not getting WC in iomap_atomic_prot_pfn. In the absence of PAT, PAGE_KERNEL_WC ends up mapping to a memory type that gets UC behavior even in the presence of a WC MTRR covering the area in question. By swapping to PAGE_KERNEL_UC_MINUS, we can get the actual behavior the caller wanted (WC if you can manage it, UC otherwise). This recovers the 40% performance improvement of using WC in the DRM to upload vertex data. Signed-off-by: Eric Anholt Signed-off-by: H. Peter Anvin --- arch/x86/mm/iomap_32.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index d0151d8ce45..ca53224fc56 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -17,6 +17,7 @@ */ #include +#include #include /* Map 'pfn' using fixed map 'type' and protections 'prot' @@ -29,6 +30,15 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) pagefault_disable(); + /* + * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS. + * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the + * MTRR is UC or WC. UC_MINUS gets the real intention, of the + * user, which is "WC if the MTRR is WC, UC if you can't do that." + */ + if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC)) + prot = PAGE_KERNEL_UC_MINUS; + idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); set_pte(kmap_pte-idx, pfn_pte(pfn, prot)); -- cgit v1.2.3-70-g09d2 From 99fb4d349db7e7dacb2099c5cc320a9e2d31c1ef Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 26 Jan 2009 04:30:41 +0100 Subject: x86: unmask CPUID levels on Intel CPUs, fix Impact: fix boot hang on pre-model-15 Intel CPUs rdmsrl_safe() does not work in very early bootup code yet, because we dont have the pagefault handler installed yet so exception section does not get parsed. rdmsr_safe() will just crash and hang the bootup. So limit the MSR_IA32_MISC_ENABLE MSR read to those CPU types that support it. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 43c1dcf0bec..549f2ada55f 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -29,14 +29,17 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { - u64 misc_enable; - - /* Unmask CPUID levels if masked */ - if (!rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_enable) && - (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID)) { - misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID; - wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); - c->cpuid_level = cpuid_eax(0); + /* Unmask CPUID levels if masked: */ + if (c->x86 == 6 && c->x86_model >= 15) { + u64 misc_enable; + + rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); + + if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) { + misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID; + wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); + c->cpuid_level = cpuid_eax(0); + } } if ((c->x86 == 0xf && c->x86_model >= 0x03) || -- cgit v1.2.3-70-g09d2 From 659d2618b38f8a4d91bdb19cfc5c7fb330a4c55a Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Sat, 24 Jan 2009 01:46:03 +0600 Subject: x86: fix section mismatch warning Here function vmi_activate calls a init function activate_vmi , which causes the following section mismatch warnings: LD arch/x86/kernel/built-in.o WARNING: arch/x86/kernel/built-in.o(.text+0x13ba9): Section mismatch in reference from the function vmi_activate() to the function .init.text:vmi_time_init() The function vmi_activate() references the function __init vmi_time_init(). This is often because vmi_activate lacks a __init annotation or the annotation of vmi_time_init is wrong. WARNING: arch/x86/kernel/built-in.o(.text+0x13bd1): Section mismatch in reference from the function vmi_activate() to the function .devinit.text:vmi_time_bsp_init() The function vmi_activate() references the function __devinit vmi_time_bsp_init(). This is often because vmi_activate lacks a __devinit annotation or the annotation of vmi_time_bsp_init is wrong. WARNING: arch/x86/kernel/built-in.o(.text+0x13bdb): Section mismatch in reference from the function vmi_activate() to the function .devinit.text:vmi_time_ap_init() The function vmi_activate() references the function __devinit vmi_time_ap_init(). This is often because vmi_activate lacks a __devinit annotation or the annotation of vmi_time_ap_init is wrong. Fix it by marking vmi_activate() as __init too. Signed-off-by: Rakib Mullick Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmi_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 23206ba1687..1d3302cc2dd 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -858,7 +858,7 @@ void __init vmi_init(void) #endif } -void vmi_activate(void) +void __init vmi_activate(void) { unsigned long flags; -- cgit v1.2.3-70-g09d2 From 30a0fb947a68ad3ab8a7184e3b3d79dce10e3688 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 26 Jan 2009 09:40:58 -0800 Subject: x86: correct the CPUID pattern for MSR_IA32_MISC_ENABLE availability Impact: re-enable CPUID unmasking on affected processors As far as I am capable of discerning from the documentation, MSR_IA32_MISC_ENABLE should be available for all family 0xf CPUs, as well as family 6 for model >= 0xd (newer Pentium M). The documentation on this isn't ideal, so we need to be on the lookout for errors, still. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 549f2ada55f..430e5c38a54 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -30,7 +30,7 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { /* Unmask CPUID levels if masked: */ - if (c->x86 == 6 && c->x86_model >= 15) { + if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { u64 misc_enable; rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); -- cgit v1.2.3-70-g09d2 From dd7f8dbe2b3c0611ba969cd867c10cb63d163e25 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 26 Jan 2009 21:19:57 +0100 Subject: eeprom: More consistent symbol names Now that all EEPROM drivers live in the same place, let's harmonize their symbol names. Also fix eeprom's dependencies, it definitely needs sysfs, and is no longer experimental after many years in the kernel tree. Signed-off-by: Jean Delvare Acked-by: Wolfram Sang Cc: David Brownell --- arch/arm/configs/afeb9260_defconfig | 6 +++--- arch/arm/configs/ams_delta_defconfig | 2 +- arch/arm/configs/at91cap9adk_defconfig | 4 ++-- arch/arm/configs/at91rm9200dk_defconfig | 2 +- arch/arm/configs/at91rm9200ek_defconfig | 2 +- arch/arm/configs/at91sam9260ek_defconfig | 2 +- arch/arm/configs/at91sam9261ek_defconfig | 4 ++-- arch/arm/configs/at91sam9263ek_defconfig | 4 ++-- arch/arm/configs/at91sam9g20ek_defconfig | 2 +- arch/arm/configs/at91sam9rlek_defconfig | 4 ++-- arch/arm/configs/ateb9200_defconfig | 2 +- arch/arm/configs/badge4_defconfig | 2 +- arch/arm/configs/cam60_defconfig | 4 ++-- arch/arm/configs/cm_x300_defconfig | 4 ++-- arch/arm/configs/colibri_defconfig | 2 +- arch/arm/configs/corgi_defconfig | 6 +++--- arch/arm/configs/csb337_defconfig | 2 +- arch/arm/configs/csb637_defconfig | 2 +- arch/arm/configs/ecbat91_defconfig | 4 ++-- arch/arm/configs/ep93xx_defconfig | 2 +- arch/arm/configs/ezx_defconfig | 4 ++-- arch/arm/configs/footbridge_defconfig | 2 +- arch/arm/configs/iop13xx_defconfig | 2 +- arch/arm/configs/iop32x_defconfig | 2 +- arch/arm/configs/iop33x_defconfig | 2 +- arch/arm/configs/ixp2000_defconfig | 2 +- arch/arm/configs/ixp23xx_defconfig | 2 +- arch/arm/configs/ixp4xx_defconfig | 2 +- arch/arm/configs/kafa_defconfig | 2 +- arch/arm/configs/kirkwood_defconfig | 6 +++--- arch/arm/configs/loki_defconfig | 4 ++-- arch/arm/configs/magician_defconfig | 2 +- arch/arm/configs/msm_defconfig | 2 +- arch/arm/configs/mv78xx0_defconfig | 2 +- arch/arm/configs/n770_defconfig | 4 ++-- arch/arm/configs/neocore926_defconfig | 6 +++--- arch/arm/configs/neponset_defconfig | 2 +- arch/arm/configs/omap3_beagle_defconfig | 4 ++-- arch/arm/configs/omap3_pandora_defconfig | 6 +++--- arch/arm/configs/omap_2430sdp_defconfig | 4 ++-- arch/arm/configs/omap_apollon_2420_defconfig | 2 +- arch/arm/configs/omap_generic_1510_defconfig | 2 +- arch/arm/configs/omap_generic_1610_defconfig | 2 +- arch/arm/configs/omap_h2_1610_defconfig | 4 ++-- arch/arm/configs/omap_h4_2420_defconfig | 2 +- arch/arm/configs/omap_innovator_1510_defconfig | 2 +- arch/arm/configs/omap_ldp_defconfig | 6 +++--- arch/arm/configs/omap_osk_5912_defconfig | 2 +- arch/arm/configs/onearm_defconfig | 2 +- arch/arm/configs/orion5x_defconfig | 4 ++-- arch/arm/configs/overo_defconfig | 6 +++--- arch/arm/configs/palmz71_defconfig | 2 +- arch/arm/configs/palmz72_defconfig | 6 +++--- arch/arm/configs/pcm027_defconfig | 2 +- arch/arm/configs/pcm038_defconfig | 4 ++-- arch/arm/configs/picotux200_defconfig | 2 +- arch/arm/configs/pnx4008_defconfig | 2 +- arch/arm/configs/qil-a9260_defconfig | 4 ++-- arch/arm/configs/rpc_defconfig | 2 +- arch/arm/configs/s3c2410_defconfig | 4 ++-- arch/arm/configs/s3c6400_defconfig | 4 ++-- arch/arm/configs/spitz_defconfig | 6 +++--- arch/arm/configs/sx1_defconfig | 2 +- arch/arm/configs/trizeps4_defconfig | 2 +- arch/arm/configs/usb-a9260_defconfig | 2 +- arch/arm/configs/usb-a9263_defconfig | 2 +- arch/arm/configs/versatile_defconfig | 2 +- arch/arm/configs/viper_defconfig | 4 ++-- arch/arm/configs/xm_x2xx_defconfig | 4 ++-- arch/arm/configs/yl9200_defconfig | 2 +- arch/avr32/configs/atngw100_defconfig | 6 +++--- arch/avr32/configs/atngw100_evklcd100_defconfig | 4 ++-- arch/avr32/configs/atngw100_evklcd101_defconfig | 4 ++-- arch/avr32/configs/atstk1002_defconfig | 6 +++--- arch/avr32/configs/atstk1003_defconfig | 6 +++--- arch/avr32/configs/atstk1004_defconfig | 2 +- arch/avr32/configs/atstk1006_defconfig | 6 +++--- arch/avr32/configs/favr-32_defconfig | 6 +++--- arch/avr32/configs/hammerhead_defconfig | 6 +++--- arch/avr32/configs/mimc200_defconfig | 6 +++--- arch/blackfin/configs/BF518F-EZBRD_defconfig | 4 ++-- arch/blackfin/configs/BF526-EZBRD_defconfig | 6 +++--- arch/blackfin/configs/BF527-EZKIT_defconfig | 6 +++--- arch/blackfin/configs/BF533-EZKIT_defconfig | 2 +- arch/blackfin/configs/BF533-STAMP_defconfig | 6 +++--- arch/blackfin/configs/BF537-STAMP_defconfig | 6 +++--- arch/blackfin/configs/BF538-EZKIT_defconfig | 6 +++--- arch/blackfin/configs/BF548-EZKIT_defconfig | 6 +++--- arch/blackfin/configs/BF561-EZKIT_defconfig | 2 +- arch/blackfin/configs/BlackStamp_defconfig | 4 ++-- arch/blackfin/configs/CM-BF527_defconfig | 4 ++-- arch/blackfin/configs/CM-BF548_defconfig | 4 ++-- arch/blackfin/configs/H8606_defconfig | 2 +- arch/blackfin/configs/IP0X_defconfig | 2 +- arch/blackfin/configs/PNAV-10_defconfig | 6 +++--- arch/blackfin/configs/SRV1_defconfig | 4 ++-- arch/blackfin/configs/TCM-BF537_defconfig | 2 +- arch/ia64/configs/bigsur_defconfig | 2 +- arch/ia64/configs/zx1_defconfig | 2 +- arch/m32r/configs/m32104ut_defconfig | 2 +- arch/mips/configs/bigsur_defconfig | 2 +- arch/mips/configs/emma2rh_defconfig | 2 +- arch/mips/configs/fulong_defconfig | 4 ++-- arch/mips/configs/msp71xx_defconfig | 2 +- arch/mips/configs/mtx1_defconfig | 4 ++-- arch/mips/configs/pnx8335-stb225_defconfig | 4 ++-- arch/mips/configs/rbtx49xx_defconfig | 2 +- arch/powerpc/configs/44x/sam440ep_defconfig | 4 ++-- arch/powerpc/configs/44x/warp_defconfig | 4 ++-- arch/powerpc/configs/52xx/cm5200_defconfig | 4 ++-- arch/powerpc/configs/52xx/lite5200b_defconfig | 4 ++-- arch/powerpc/configs/52xx/motionpro_defconfig | 4 ++-- arch/powerpc/configs/52xx/pcm030_defconfig | 4 ++-- arch/powerpc/configs/52xx/tqm5200_defconfig | 4 ++-- arch/powerpc/configs/83xx/asp8347_defconfig | 4 ++-- arch/powerpc/configs/83xx/mpc8313_rdb_defconfig | 6 +++--- arch/powerpc/configs/83xx/mpc8315_rdb_defconfig | 6 +++--- arch/powerpc/configs/83xx/mpc832x_mds_defconfig | 4 ++-- arch/powerpc/configs/83xx/mpc832x_rdb_defconfig | 6 +++--- arch/powerpc/configs/83xx/mpc834x_itx_defconfig | 6 +++--- arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig | 6 +++--- arch/powerpc/configs/83xx/mpc834x_mds_defconfig | 4 ++-- arch/powerpc/configs/83xx/mpc836x_mds_defconfig | 4 ++-- arch/powerpc/configs/83xx/mpc836x_rdk_defconfig | 6 +++--- arch/powerpc/configs/83xx/mpc837x_mds_defconfig | 4 ++-- arch/powerpc/configs/83xx/mpc837x_rdb_defconfig | 4 ++-- arch/powerpc/configs/83xx/sbc834x_defconfig | 4 ++-- arch/powerpc/configs/85xx/mpc8536_ds_defconfig | 4 ++-- arch/powerpc/configs/85xx/mpc8544_ds_defconfig | 4 ++-- arch/powerpc/configs/85xx/mpc8568mds_defconfig | 4 ++-- arch/powerpc/configs/85xx/mpc8572_ds_defconfig | 4 ++-- arch/powerpc/configs/85xx/stx_gp3_defconfig | 4 ++-- arch/powerpc/configs/85xx/tqm8540_defconfig | 4 ++-- arch/powerpc/configs/85xx/tqm8541_defconfig | 4 ++-- arch/powerpc/configs/85xx/tqm8548_defconfig | 4 ++-- arch/powerpc/configs/85xx/tqm8555_defconfig | 4 ++-- arch/powerpc/configs/85xx/tqm8560_defconfig | 4 ++-- arch/powerpc/configs/86xx/gef_sbc610_defconfig | 4 ++-- arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig | 4 ++-- arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig | 4 ++-- arch/powerpc/configs/86xx/sbc8641d_defconfig | 4 ++-- arch/powerpc/configs/c2k_defconfig | 4 ++-- arch/powerpc/configs/cell_defconfig | 2 +- arch/powerpc/configs/celleb_defconfig | 2 +- arch/powerpc/configs/chrp32_defconfig | 4 ++-- arch/powerpc/configs/g5_defconfig | 4 ++-- arch/powerpc/configs/linkstation_defconfig | 4 ++-- arch/powerpc/configs/maple_defconfig | 4 ++-- arch/powerpc/configs/mpc5200_defconfig | 4 ++-- arch/powerpc/configs/mpc83xx_defconfig | 4 ++-- arch/powerpc/configs/mpc85xx_defconfig | 4 ++-- arch/powerpc/configs/mpc86xx_defconfig | 4 ++-- arch/powerpc/configs/pasemi_defconfig | 2 +- arch/powerpc/configs/pmac32_defconfig | 4 ++-- arch/powerpc/configs/ppc40x_defconfig | 4 ++-- arch/powerpc/configs/ppc44x_defconfig | 4 ++-- arch/powerpc/configs/ppc64_defconfig | 4 ++-- arch/powerpc/configs/ppc6xx_defconfig | 4 ++-- arch/powerpc/configs/prpmc2800_defconfig | 4 ++-- arch/powerpc/configs/pseries_defconfig | 4 ++-- arch/powerpc/configs/storcenter_defconfig | 4 ++-- arch/sh/configs/cayman_defconfig | 4 ++-- arch/sh/configs/edosk7760_defconfig | 4 ++-- arch/sh/configs/migor_defconfig | 4 ++-- arch/sh/configs/r7780mp_defconfig | 4 ++-- arch/sh/configs/r7785rp_defconfig | 4 ++-- arch/sh/configs/rts7751r2d1_defconfig | 2 +- arch/sh/configs/rts7751r2dplus_defconfig | 2 +- arch/sh/configs/se7343_defconfig | 4 ++-- arch/sh/configs/sh7785lcr_defconfig | 4 ++-- arch/sh/configs/shx3_defconfig | 6 +++--- arch/sparc/configs/sparc64_defconfig | 4 ++-- arch/x86/configs/i386_defconfig | 4 ++-- arch/x86/configs/x86_64_defconfig | 4 ++-- drivers/misc/eeprom/Kconfig | 8 ++++---- drivers/misc/eeprom/Makefile | 6 +++--- 176 files changed, 326 insertions(+), 326 deletions(-) (limited to 'arch/x86') diff --git a/arch/arm/configs/afeb9260_defconfig b/arch/arm/configs/afeb9260_defconfig index ce909586a34..f7a272cb3da 100644 --- a/arch/arm/configs/afeb9260_defconfig +++ b/arch/arm/configs/afeb9260_defconfig @@ -719,8 +719,8 @@ CONFIG_I2C_GPIO=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -CONFIG_AT24=y -# CONFIG_SENSORS_EEPROM is not set +CONFIG_EEPROM_AT24=y +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -744,7 +744,7 @@ CONFIG_SPI_ATMEL=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set CONFIG_SPI_SPIDEV=y # CONFIG_SPI_TLE62X0 is not set # CONFIG_W1 is not set diff --git a/arch/arm/configs/ams_delta_defconfig b/arch/arm/configs/ams_delta_defconfig index 2c4aa11f0b0..764732529ea 100644 --- a/arch/arm/configs/ams_delta_defconfig +++ b/arch/arm/configs/ams_delta_defconfig @@ -767,7 +767,7 @@ CONFIG_I2C_OMAP=y # # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/at91cap9adk_defconfig b/arch/arm/configs/at91cap9adk_defconfig index bf97801a106..bc6bd9f6174 100644 --- a/arch/arm/configs/at91cap9adk_defconfig +++ b/arch/arm/configs/at91cap9adk_defconfig @@ -676,7 +676,7 @@ CONFIG_I2C_CHARDEV=y # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set @@ -703,7 +703,7 @@ CONFIG_SPI_ATMEL=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set # CONFIG_W1 is not set diff --git a/arch/arm/configs/at91rm9200dk_defconfig b/arch/arm/configs/at91rm9200dk_defconfig index 868fb7b9530..238b218394e 100644 --- a/arch/arm/configs/at91rm9200dk_defconfig +++ b/arch/arm/configs/at91rm9200dk_defconfig @@ -636,7 +636,7 @@ CONFIG_I2C_GPIO=y # # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/at91rm9200ek_defconfig b/arch/arm/configs/at91rm9200ek_defconfig index de43fc67561..9f7a99ace51 100644 --- a/arch/arm/configs/at91rm9200ek_defconfig +++ b/arch/arm/configs/at91rm9200ek_defconfig @@ -610,7 +610,7 @@ CONFIG_I2C_GPIO=y # # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/at91sam9260ek_defconfig b/arch/arm/configs/at91sam9260ek_defconfig index 38e6a0abeb4..e0ee7060f9a 100644 --- a/arch/arm/configs/at91sam9260ek_defconfig +++ b/arch/arm/configs/at91sam9260ek_defconfig @@ -582,7 +582,7 @@ CONFIG_I2C_GPIO=y # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/at91sam9261ek_defconfig b/arch/arm/configs/at91sam9261ek_defconfig index 93b779f94b4..01d1ef97d8b 100644 --- a/arch/arm/configs/at91sam9261ek_defconfig +++ b/arch/arm/configs/at91sam9261ek_defconfig @@ -660,7 +660,7 @@ CONFIG_I2C_GPIO=y # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set @@ -687,7 +687,7 @@ CONFIG_SPI_ATMEL=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set # CONFIG_W1 is not set diff --git a/arch/arm/configs/at91sam9263ek_defconfig b/arch/arm/configs/at91sam9263ek_defconfig index a7ddd94363c..036a126725c 100644 --- a/arch/arm/configs/at91sam9263ek_defconfig +++ b/arch/arm/configs/at91sam9263ek_defconfig @@ -670,7 +670,7 @@ CONFIG_I2C_GPIO=y # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set @@ -697,7 +697,7 @@ CONFIG_SPI_ATMEL=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set # CONFIG_W1 is not set diff --git a/arch/arm/configs/at91sam9g20ek_defconfig b/arch/arm/configs/at91sam9g20ek_defconfig index df0d6ee672b..7e018a04c31 100644 --- a/arch/arm/configs/at91sam9g20ek_defconfig +++ b/arch/arm/configs/at91sam9g20ek_defconfig @@ -665,7 +665,7 @@ CONFIG_SPI_ATMEL=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set CONFIG_SPI_SPIDEV=y # CONFIG_SPI_TLE62X0 is not set # CONFIG_W1 is not set diff --git a/arch/arm/configs/at91sam9rlek_defconfig b/arch/arm/configs/at91sam9rlek_defconfig index 811bebbdc78..237a2a6a851 100644 --- a/arch/arm/configs/at91sam9rlek_defconfig +++ b/arch/arm/configs/at91sam9rlek_defconfig @@ -566,7 +566,7 @@ CONFIG_I2C_GPIO=y # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set @@ -593,7 +593,7 @@ CONFIG_SPI_ATMEL=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set # CONFIG_W1 is not set diff --git a/arch/arm/configs/ateb9200_defconfig b/arch/arm/configs/ateb9200_defconfig index 85c80f723d8..a19e824cf7f 100644 --- a/arch/arm/configs/ateb9200_defconfig +++ b/arch/arm/configs/ateb9200_defconfig @@ -723,7 +723,7 @@ CONFIG_I2C_GPIO=m # # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/badge4_defconfig b/arch/arm/configs/badge4_defconfig index b2bbf217c70..80222feb7da 100644 --- a/arch/arm/configs/badge4_defconfig +++ b/arch/arm/configs/badge4_defconfig @@ -750,7 +750,7 @@ CONFIG_I2C_ELEKTOR=m # Other I2C Chip support # # CONFIG_SENSORS_DS1337 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_RTC8564 is not set diff --git a/arch/arm/configs/cam60_defconfig b/arch/arm/configs/cam60_defconfig index f945105d6cd..8448108347c 100644 --- a/arch/arm/configs/cam60_defconfig +++ b/arch/arm/configs/cam60_defconfig @@ -722,7 +722,7 @@ CONFIG_I2C_ALGOBIT=y # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set @@ -749,7 +749,7 @@ CONFIG_SPI_ATMEL=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set # CONFIG_W1 is not set diff --git a/arch/arm/configs/cm_x300_defconfig b/arch/arm/configs/cm_x300_defconfig index 46f1c9dc350..227da0843ea 100644 --- a/arch/arm/configs/cm_x300_defconfig +++ b/arch/arm/configs/cm_x300_defconfig @@ -763,8 +763,8 @@ CONFIG_I2C_PXA=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/colibri_defconfig b/arch/arm/configs/colibri_defconfig index c3e3418ed4f..744086fff41 100644 --- a/arch/arm/configs/colibri_defconfig +++ b/arch/arm/configs/colibri_defconfig @@ -801,7 +801,7 @@ CONFIG_I2C_CHARDEV=y # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/corgi_defconfig b/arch/arm/configs/corgi_defconfig index 98765438048..d6cd165e931 100644 --- a/arch/arm/configs/corgi_defconfig +++ b/arch/arm/configs/corgi_defconfig @@ -982,8 +982,8 @@ CONFIG_I2C_PXA=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -1008,7 +1008,7 @@ CONFIG_SPI_PXA2XX=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_REQUIRE_GPIOLIB=y diff --git a/arch/arm/configs/csb337_defconfig b/arch/arm/configs/csb337_defconfig index 67e65e4f0cd..29f68c2effe 100644 --- a/arch/arm/configs/csb337_defconfig +++ b/arch/arm/configs/csb337_defconfig @@ -679,7 +679,7 @@ CONFIG_I2C_GPIO=y # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/csb637_defconfig b/arch/arm/configs/csb637_defconfig index 99702146c9f..f7b60ceed6c 100644 --- a/arch/arm/configs/csb637_defconfig +++ b/arch/arm/configs/csb637_defconfig @@ -704,7 +704,7 @@ CONFIG_I2C_CHARDEV=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/ecbat91_defconfig b/arch/arm/configs/ecbat91_defconfig index cfeb817ad21..ca520733bdb 100644 --- a/arch/arm/configs/ecbat91_defconfig +++ b/arch/arm/configs/ecbat91_defconfig @@ -721,7 +721,7 @@ CONFIG_I2C_GPIO=y # # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set @@ -747,7 +747,7 @@ CONFIG_SPI_AT91=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # diff --git a/arch/arm/configs/ep93xx_defconfig b/arch/arm/configs/ep93xx_defconfig index 21aa013793c..3f89d5f25bc 100644 --- a/arch/arm/configs/ep93xx_defconfig +++ b/arch/arm/configs/ep93xx_defconfig @@ -681,7 +681,7 @@ CONFIG_I2C_ALGOBIT=y # # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set -CONFIG_SENSORS_EEPROM=y +CONFIG_EEPROM_LEGACY=y # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/ezx_defconfig b/arch/arm/configs/ezx_defconfig index 2a84d557adc..d5ee16e6abf 100644 --- a/arch/arm/configs/ezx_defconfig +++ b/arch/arm/configs/ezx_defconfig @@ -877,7 +877,7 @@ CONFIG_I2C_PXA=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCF8591 is not set @@ -900,7 +900,7 @@ CONFIG_SPI_PXA2XX=m # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set CONFIG_HAVE_GPIO_LIB=y diff --git a/arch/arm/configs/footbridge_defconfig b/arch/arm/configs/footbridge_defconfig index 299dc22294a..6ace512fa10 100644 --- a/arch/arm/configs/footbridge_defconfig +++ b/arch/arm/configs/footbridge_defconfig @@ -801,7 +801,7 @@ CONFIG_I2C=m # # Other I2C Chip support # -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_RTC8564 is not set diff --git a/arch/arm/configs/iop13xx_defconfig b/arch/arm/configs/iop13xx_defconfig index 482e5706105..89c17761726 100644 --- a/arch/arm/configs/iop13xx_defconfig +++ b/arch/arm/configs/iop13xx_defconfig @@ -744,7 +744,7 @@ CONFIG_I2C_IOP3XX=y # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/iop32x_defconfig b/arch/arm/configs/iop32x_defconfig index 8612f58e105..d70177b38f5 100644 --- a/arch/arm/configs/iop32x_defconfig +++ b/arch/arm/configs/iop32x_defconfig @@ -847,7 +847,7 @@ CONFIG_I2C_IOP3XX=y # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/iop33x_defconfig b/arch/arm/configs/iop33x_defconfig index 8b0098d19d0..eec48829826 100644 --- a/arch/arm/configs/iop33x_defconfig +++ b/arch/arm/configs/iop33x_defconfig @@ -746,7 +746,7 @@ CONFIG_I2C_IOP3XX=y # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/ixp2000_defconfig b/arch/arm/configs/ixp2000_defconfig index 84680db6c61..57526c15e85 100644 --- a/arch/arm/configs/ixp2000_defconfig +++ b/arch/arm/configs/ixp2000_defconfig @@ -768,7 +768,7 @@ CONFIG_I2C_IXP2000=y # # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set -CONFIG_SENSORS_EEPROM=y +CONFIG_EEPROM_LEGACY=y # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/ixp23xx_defconfig b/arch/arm/configs/ixp23xx_defconfig index 4a2f7b2372d..ef97561ed75 100644 --- a/arch/arm/configs/ixp23xx_defconfig +++ b/arch/arm/configs/ixp23xx_defconfig @@ -900,7 +900,7 @@ CONFIG_I2C_ALGOBIT=y # # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set -CONFIG_SENSORS_EEPROM=y +CONFIG_EEPROM_LEGACY=y # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/ixp4xx_defconfig b/arch/arm/configs/ixp4xx_defconfig index fc14932e3ab..95cd8dfb5f1 100644 --- a/arch/arm/configs/ixp4xx_defconfig +++ b/arch/arm/configs/ixp4xx_defconfig @@ -1083,7 +1083,7 @@ CONFIG_I2C_IXP4XX=y # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set -CONFIG_SENSORS_EEPROM=y +CONFIG_EEPROM_LEGACY=y # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/kafa_defconfig b/arch/arm/configs/kafa_defconfig index 6dd95a2c8d5..9f92fc527f5 100644 --- a/arch/arm/configs/kafa_defconfig +++ b/arch/arm/configs/kafa_defconfig @@ -603,7 +603,7 @@ CONFIG_I2C_GPIO=y # # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/kirkwood_defconfig b/arch/arm/configs/kirkwood_defconfig index ab8b1e0d0da..4bc38078d58 100644 --- a/arch/arm/configs/kirkwood_defconfig +++ b/arch/arm/configs/kirkwood_defconfig @@ -905,8 +905,8 @@ CONFIG_I2C_MV64XXX=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -930,7 +930,7 @@ CONFIG_SPI_ORION=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set # CONFIG_W1 is not set diff --git a/arch/arm/configs/loki_defconfig b/arch/arm/configs/loki_defconfig index 17da7c3b3d5..b720fcffbcd 100644 --- a/arch/arm/configs/loki_defconfig +++ b/arch/arm/configs/loki_defconfig @@ -654,7 +654,7 @@ CONFIG_I2C_MV64XXX=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCF8591 is not set @@ -675,7 +675,7 @@ CONFIG_SPI_MASTER=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set # CONFIG_W1 is not set diff --git a/arch/arm/configs/magician_defconfig b/arch/arm/configs/magician_defconfig index 4d11678584d..73ba62b7106 100644 --- a/arch/arm/configs/magician_defconfig +++ b/arch/arm/configs/magician_defconfig @@ -678,7 +678,7 @@ CONFIG_I2C_PXA=m # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/msm_defconfig b/arch/arm/configs/msm_defconfig index 3b4ecf2a90d..cc3b06ee24f 100644 --- a/arch/arm/configs/msm_defconfig +++ b/arch/arm/configs/msm_defconfig @@ -580,7 +580,7 @@ CONFIG_I2C_MSM=y # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set CONFIG_SENSORS_PCA9633=y diff --git a/arch/arm/configs/mv78xx0_defconfig b/arch/arm/configs/mv78xx0_defconfig index d38ebf8721a..83c817f31bc 100644 --- a/arch/arm/configs/mv78xx0_defconfig +++ b/arch/arm/configs/mv78xx0_defconfig @@ -832,7 +832,7 @@ CONFIG_I2C_MV64XXX=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/n770_defconfig b/arch/arm/configs/n770_defconfig index 568ef1770d5..672f6db06a5 100644 --- a/arch/arm/configs/n770_defconfig +++ b/arch/arm/configs/n770_defconfig @@ -767,7 +767,7 @@ CONFIG_I2C_OMAP=y # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set @@ -798,7 +798,7 @@ CONFIG_SPI_OMAP_UWIRE=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_TSC2101 is not set # CONFIG_SPI_TSC2102 is not set # CONFIG_SPI_TSC210X is not set diff --git a/arch/arm/configs/neocore926_defconfig b/arch/arm/configs/neocore926_defconfig index 325f1e105f6..e0e4e98b5aa 100644 --- a/arch/arm/configs/neocore926_defconfig +++ b/arch/arm/configs/neocore926_defconfig @@ -774,8 +774,8 @@ CONFIG_I2C_CHARDEV=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -798,7 +798,7 @@ CONFIG_SPI_ATMEL=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set # CONFIG_W1 is not set diff --git a/arch/arm/configs/neponset_defconfig b/arch/arm/configs/neponset_defconfig index 92ccdc6492f..d81ea219c93 100644 --- a/arch/arm/configs/neponset_defconfig +++ b/arch/arm/configs/neponset_defconfig @@ -737,7 +737,7 @@ CONFIG_I2C_ALGOBIT=y # # Other I2C Chip support # -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_RTC8564 is not set diff --git a/arch/arm/configs/omap3_beagle_defconfig b/arch/arm/configs/omap3_beagle_defconfig index e042d27eae1..4c6fb7e959d 100644 --- a/arch/arm/configs/omap3_beagle_defconfig +++ b/arch/arm/configs/omap3_beagle_defconfig @@ -687,8 +687,8 @@ CONFIG_I2C_OMAP=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/arm/configs/omap3_pandora_defconfig b/arch/arm/configs/omap3_pandora_defconfig index 09543f4de5b..b54ad2e2da3 100644 --- a/arch/arm/configs/omap3_pandora_defconfig +++ b/arch/arm/configs/omap3_pandora_defconfig @@ -713,8 +713,8 @@ CONFIG_I2C_OMAP=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -740,7 +740,7 @@ CONFIG_SPI_OMAP24XX=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_REQUIRE_GPIOLIB=y diff --git a/arch/arm/configs/omap_2430sdp_defconfig b/arch/arm/configs/omap_2430sdp_defconfig index b0617c0da2a..640e9afc463 100644 --- a/arch/arm/configs/omap_2430sdp_defconfig +++ b/arch/arm/configs/omap_2430sdp_defconfig @@ -710,7 +710,7 @@ CONFIG_I2C_OMAP=y # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set @@ -743,7 +743,7 @@ CONFIG_SPI_MASTER=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_TSC2101 is not set # CONFIG_SPI_TSC2102 is not set # CONFIG_SPI_TSC210X is not set diff --git a/arch/arm/configs/omap_apollon_2420_defconfig b/arch/arm/configs/omap_apollon_2420_defconfig index bb39dfc72d6..ac7adf34c54 100644 --- a/arch/arm/configs/omap_apollon_2420_defconfig +++ b/arch/arm/configs/omap_apollon_2420_defconfig @@ -612,7 +612,7 @@ CONFIG_SPI_OMAP24XX=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_TSC2101 is not set # CONFIG_SPI_TSC2102 is not set # CONFIG_SPI_TSC210X is not set diff --git a/arch/arm/configs/omap_generic_1510_defconfig b/arch/arm/configs/omap_generic_1510_defconfig index 4b1c252f209..ccdc661b585 100644 --- a/arch/arm/configs/omap_generic_1510_defconfig +++ b/arch/arm/configs/omap_generic_1510_defconfig @@ -637,7 +637,7 @@ CONFIG_I2C_ALGOBIT=y # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/omap_generic_1610_defconfig b/arch/arm/configs/omap_generic_1610_defconfig index fc66f019d56..0c42c895504 100644 --- a/arch/arm/configs/omap_generic_1610_defconfig +++ b/arch/arm/configs/omap_generic_1610_defconfig @@ -641,7 +641,7 @@ CONFIG_I2C_ALGOBIT=y # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/omap_h2_1610_defconfig b/arch/arm/configs/omap_h2_1610_defconfig index c03507202f3..74dbdc644d3 100644 --- a/arch/arm/configs/omap_h2_1610_defconfig +++ b/arch/arm/configs/omap_h2_1610_defconfig @@ -700,7 +700,7 @@ CONFIG_I2C_OMAP=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCF8591 is not set @@ -731,7 +731,7 @@ CONFIG_SPI_OMAP_UWIRE=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set CONFIG_SPI_TSC2101=y # CONFIG_SPI_TSC2102 is not set # CONFIG_SPI_TSC210X is not set diff --git a/arch/arm/configs/omap_h4_2420_defconfig b/arch/arm/configs/omap_h4_2420_defconfig index 5bc89185a64..a4aab8e4c29 100644 --- a/arch/arm/configs/omap_h4_2420_defconfig +++ b/arch/arm/configs/omap_h4_2420_defconfig @@ -681,7 +681,7 @@ CONFIG_I2C_OMAP=y # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/omap_innovator_1510_defconfig b/arch/arm/configs/omap_innovator_1510_defconfig index 55b2611bd90..0cfe363e336 100644 --- a/arch/arm/configs/omap_innovator_1510_defconfig +++ b/arch/arm/configs/omap_innovator_1510_defconfig @@ -631,7 +631,7 @@ CONFIG_I2C_BOARDINFO=y # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/omap_ldp_defconfig b/arch/arm/configs/omap_ldp_defconfig index b77d054169e..aa9d34feddc 100644 --- a/arch/arm/configs/omap_ldp_defconfig +++ b/arch/arm/configs/omap_ldp_defconfig @@ -629,8 +629,8 @@ CONFIG_I2C_OMAP=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -656,7 +656,7 @@ CONFIG_SPI_OMAP24XX=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_REQUIRE_GPIOLIB=y diff --git a/arch/arm/configs/omap_osk_5912_defconfig b/arch/arm/configs/omap_osk_5912_defconfig index b68e0144cab..6b3b5c610da 100644 --- a/arch/arm/configs/omap_osk_5912_defconfig +++ b/arch/arm/configs/omap_osk_5912_defconfig @@ -711,7 +711,7 @@ CONFIG_I2C_OMAP=y # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/onearm_defconfig b/arch/arm/configs/onearm_defconfig index 418ca2febbe..f8701fadb60 100644 --- a/arch/arm/configs/onearm_defconfig +++ b/arch/arm/configs/onearm_defconfig @@ -698,7 +698,7 @@ CONFIG_I2C_CHARDEV=y # # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/orion5x_defconfig b/arch/arm/configs/orion5x_defconfig index b2456ca544c..a8ee6984a09 100644 --- a/arch/arm/configs/orion5x_defconfig +++ b/arch/arm/configs/orion5x_defconfig @@ -886,8 +886,8 @@ CONFIG_I2C_MV64XXX=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/arm/configs/overo_defconfig b/arch/arm/configs/overo_defconfig index 49200967a15..a57f9e4124f 100644 --- a/arch/arm/configs/overo_defconfig +++ b/arch/arm/configs/overo_defconfig @@ -858,8 +858,8 @@ CONFIG_I2C_OMAP=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -CONFIG_SENSORS_EEPROM=y +# CONFIG_EEPROM_AT24 is not set +CONFIG_EEPROM_LEGACY=y # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -885,7 +885,7 @@ CONFIG_SPI_OMAP24XX=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_REQUIRE_GPIOLIB=y diff --git a/arch/arm/configs/palmz71_defconfig b/arch/arm/configs/palmz71_defconfig index 6361922e71c..08e14068fff 100644 --- a/arch/arm/configs/palmz71_defconfig +++ b/arch/arm/configs/palmz71_defconfig @@ -554,7 +554,7 @@ CONFIG_SPI_OMAP_UWIRE=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_TSC2101 is not set # CONFIG_SPI_TSC2102 is not set # CONFIG_SPI_TSC210X is not set diff --git a/arch/arm/configs/palmz72_defconfig b/arch/arm/configs/palmz72_defconfig index 3245f8f33e0..a0dc37c05de 100644 --- a/arch/arm/configs/palmz72_defconfig +++ b/arch/arm/configs/palmz72_defconfig @@ -527,8 +527,8 @@ CONFIG_I2C_PXA=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -552,7 +552,7 @@ CONFIG_SPI_MASTER=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set CONFIG_SPI_SPIDEV=y # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_REQUIRE_GPIOLIB=y diff --git a/arch/arm/configs/pcm027_defconfig b/arch/arm/configs/pcm027_defconfig index 17b9b246957..05ad96a43b1 100644 --- a/arch/arm/configs/pcm027_defconfig +++ b/arch/arm/configs/pcm027_defconfig @@ -606,7 +606,7 @@ CONFIG_I2C_PXA=y # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set -CONFIG_SENSORS_EEPROM=y +CONFIG_EEPROM_LEGACY=y # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/pcm038_defconfig b/arch/arm/configs/pcm038_defconfig index 6b798c215ca..41429a00f58 100644 --- a/arch/arm/configs/pcm038_defconfig +++ b/arch/arm/configs/pcm038_defconfig @@ -604,7 +604,7 @@ CONFIG_I2C_BOARDINFO=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCF8591 is not set @@ -626,7 +626,7 @@ CONFIG_SPI_BITBANG=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set CONFIG_HAVE_GPIO_LIB=y diff --git a/arch/arm/configs/picotux200_defconfig b/arch/arm/configs/picotux200_defconfig index 59e4463c2da..9018f0f298a 100644 --- a/arch/arm/configs/picotux200_defconfig +++ b/arch/arm/configs/picotux200_defconfig @@ -744,7 +744,7 @@ CONFIG_I2C_GPIO=m # CONFIG_SENSORS_DS1337=m CONFIG_SENSORS_DS1374=m -CONFIG_SENSORS_EEPROM=m +CONFIG_EEPROM_LEGACY=m CONFIG_SENSORS_PCF8574=m CONFIG_SENSORS_PCA9539=m CONFIG_SENSORS_PCF8591=m diff --git a/arch/arm/configs/pnx4008_defconfig b/arch/arm/configs/pnx4008_defconfig index 811b8f60d19..67b5f1e15f4 100644 --- a/arch/arm/configs/pnx4008_defconfig +++ b/arch/arm/configs/pnx4008_defconfig @@ -915,7 +915,7 @@ CONFIG_I2C_ALGOPCA=m # # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set -CONFIG_SENSORS_EEPROM=m +CONFIG_EEPROM_LEGACY=m CONFIG_SENSORS_PCF8574=m # CONFIG_SENSORS_PCA9539 is not set CONFIG_SENSORS_PCF8591=m diff --git a/arch/arm/configs/qil-a9260_defconfig b/arch/arm/configs/qil-a9260_defconfig index 5cbd8158964..cd1d717903a 100644 --- a/arch/arm/configs/qil-a9260_defconfig +++ b/arch/arm/configs/qil-a9260_defconfig @@ -687,7 +687,7 @@ CONFIG_I2C_CHARDEV=y # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set @@ -714,7 +714,7 @@ CONFIG_SPI_ATMEL=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set # CONFIG_W1 is not set diff --git a/arch/arm/configs/rpc_defconfig b/arch/arm/configs/rpc_defconfig index f62d1817d2c..a29d61fe4c6 100644 --- a/arch/arm/configs/rpc_defconfig +++ b/arch/arm/configs/rpc_defconfig @@ -590,7 +590,7 @@ CONFIG_I2C_ACORN=y # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/s3c2410_defconfig b/arch/arm/configs/s3c2410_defconfig index 35faaea8623..65a583ee5df 100644 --- a/arch/arm/configs/s3c2410_defconfig +++ b/arch/arm/configs/s3c2410_defconfig @@ -923,7 +923,7 @@ CONFIG_I2C_SIMTEC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -CONFIG_SENSORS_EEPROM=m +CONFIG_EEPROM_LEGACY=m # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCF8591 is not set @@ -950,7 +950,7 @@ CONFIG_SPI_S3C24XX_GPIO=m # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set CONFIG_HAVE_GPIO_LIB=y diff --git a/arch/arm/configs/s3c6400_defconfig b/arch/arm/configs/s3c6400_defconfig index cf3c1b5d704..2e8fa50e9a0 100644 --- a/arch/arm/configs/s3c6400_defconfig +++ b/arch/arm/configs/s3c6400_defconfig @@ -465,8 +465,8 @@ CONFIG_I2C_S3C2410=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -CONFIG_AT24=y -# CONFIG_SENSORS_EEPROM is not set +CONFIG_EEPROM_AT24=y +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/arm/configs/spitz_defconfig b/arch/arm/configs/spitz_defconfig index 4df5b4db2aa..745c68ffb88 100644 --- a/arch/arm/configs/spitz_defconfig +++ b/arch/arm/configs/spitz_defconfig @@ -977,8 +977,8 @@ CONFIG_I2C_PXA=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -1003,7 +1003,7 @@ CONFIG_SPI_PXA2XX=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_REQUIRE_GPIOLIB=y diff --git a/arch/arm/configs/sx1_defconfig b/arch/arm/configs/sx1_defconfig index 853dcdd9f2e..25b007ff8ba 100644 --- a/arch/arm/configs/sx1_defconfig +++ b/arch/arm/configs/sx1_defconfig @@ -610,7 +610,7 @@ CONFIG_I2C_OMAP=y # # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/trizeps4_defconfig b/arch/arm/configs/trizeps4_defconfig index 9033d147f05..b6f83819781 100644 --- a/arch/arm/configs/trizeps4_defconfig +++ b/arch/arm/configs/trizeps4_defconfig @@ -948,7 +948,7 @@ CONFIG_I2C_PXA_SLAVE=y # # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/usb-a9260_defconfig b/arch/arm/configs/usb-a9260_defconfig index fcb4aaabd43..fd7774033d6 100644 --- a/arch/arm/configs/usb-a9260_defconfig +++ b/arch/arm/configs/usb-a9260_defconfig @@ -676,7 +676,7 @@ CONFIG_SPI_ATMEL=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set # CONFIG_W1 is not set diff --git a/arch/arm/configs/usb-a9263_defconfig b/arch/arm/configs/usb-a9263_defconfig index b786e0407e8..e7c19dd9255 100644 --- a/arch/arm/configs/usb-a9263_defconfig +++ b/arch/arm/configs/usb-a9263_defconfig @@ -668,7 +668,7 @@ CONFIG_SPI_ATMEL=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set # CONFIG_W1 is not set diff --git a/arch/arm/configs/versatile_defconfig b/arch/arm/configs/versatile_defconfig index 8355f88f729..b11c5da3996 100644 --- a/arch/arm/configs/versatile_defconfig +++ b/arch/arm/configs/versatile_defconfig @@ -611,7 +611,7 @@ CONFIG_I2C_ALGOBIT=y # # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set -CONFIG_SENSORS_EEPROM=m +CONFIG_EEPROM_LEGACY=m # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/arm/configs/viper_defconfig b/arch/arm/configs/viper_defconfig index d01fecb8673..30f463d2fa8 100644 --- a/arch/arm/configs/viper_defconfig +++ b/arch/arm/configs/viper_defconfig @@ -860,8 +860,8 @@ CONFIG_I2C_PXA=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/arm/configs/xm_x2xx_defconfig b/arch/arm/configs/xm_x2xx_defconfig index f891364dece..1039f366bf8 100644 --- a/arch/arm/configs/xm_x2xx_defconfig +++ b/arch/arm/configs/xm_x2xx_defconfig @@ -1009,8 +1009,8 @@ CONFIG_I2C_PXA=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/arm/configs/yl9200_defconfig b/arch/arm/configs/yl9200_defconfig index a9f41c24c9d..9192e597767 100644 --- a/arch/arm/configs/yl9200_defconfig +++ b/arch/arm/configs/yl9200_defconfig @@ -682,7 +682,7 @@ CONFIG_SPI_ATMEL=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_TLE62X0 is not set # CONFIG_W1 is not set # CONFIG_POWER_SUPPLY is not set diff --git a/arch/avr32/configs/atngw100_defconfig b/arch/avr32/configs/atngw100_defconfig index 164e2814ae7..574aca97533 100644 --- a/arch/avr32/configs/atngw100_defconfig +++ b/arch/avr32/configs/atngw100_defconfig @@ -644,8 +644,8 @@ CONFIG_I2C_GPIO=m # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -CONFIG_AT24=m -# CONFIG_SENSORS_EEPROM is not set +CONFIG_EEPROM_AT24=m +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -670,7 +670,7 @@ CONFIG_SPI_ATMEL=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set CONFIG_SPI_SPIDEV=m # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_REQUIRE_GPIOLIB=y diff --git a/arch/avr32/configs/atngw100_evklcd100_defconfig b/arch/avr32/configs/atngw100_evklcd100_defconfig index b0572d21382..86a45b5c9d0 100644 --- a/arch/avr32/configs/atngw100_evklcd100_defconfig +++ b/arch/avr32/configs/atngw100_evklcd100_defconfig @@ -671,7 +671,7 @@ CONFIG_I2C_GPIO=m # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCF8591 is not set @@ -699,7 +699,7 @@ CONFIG_SPI_ATMEL=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set CONFIG_SPI_SPIDEV=m # CONFIG_SPI_TLE62X0 is not set CONFIG_HAVE_GPIO_LIB=y diff --git a/arch/avr32/configs/atngw100_evklcd101_defconfig b/arch/avr32/configs/atngw100_evklcd101_defconfig index c5b898d2107..a96b68ea5e8 100644 --- a/arch/avr32/configs/atngw100_evklcd101_defconfig +++ b/arch/avr32/configs/atngw100_evklcd101_defconfig @@ -671,7 +671,7 @@ CONFIG_I2C_GPIO=m # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCF8591 is not set @@ -699,7 +699,7 @@ CONFIG_SPI_ATMEL=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set CONFIG_SPI_SPIDEV=m # CONFIG_SPI_TLE62X0 is not set CONFIG_HAVE_GPIO_LIB=y diff --git a/arch/avr32/configs/atstk1002_defconfig b/arch/avr32/configs/atstk1002_defconfig index c9dc64832a1..0abe90adb1a 100644 --- a/arch/avr32/configs/atstk1002_defconfig +++ b/arch/avr32/configs/atstk1002_defconfig @@ -663,8 +663,8 @@ CONFIG_I2C_GPIO=m # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -CONFIG_AT24=m -# CONFIG_SENSORS_EEPROM is not set +CONFIG_EEPROM_AT24=m +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -689,7 +689,7 @@ CONFIG_SPI_ATMEL=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set CONFIG_SPI_SPIDEV=m # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_REQUIRE_GPIOLIB=y diff --git a/arch/avr32/configs/atstk1003_defconfig b/arch/avr32/configs/atstk1003_defconfig index 29ea1327b49..101972f40f0 100644 --- a/arch/avr32/configs/atstk1003_defconfig +++ b/arch/avr32/configs/atstk1003_defconfig @@ -611,8 +611,8 @@ CONFIG_I2C_GPIO=m # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -CONFIG_AT24=m -# CONFIG_SENSORS_EEPROM is not set +CONFIG_EEPROM_AT24=m +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -637,7 +637,7 @@ CONFIG_SPI_ATMEL=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set CONFIG_SPI_SPIDEV=m # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_REQUIRE_GPIOLIB=y diff --git a/arch/avr32/configs/atstk1004_defconfig b/arch/avr32/configs/atstk1004_defconfig index 69e6c0d08ce..518f7898df7 100644 --- a/arch/avr32/configs/atstk1004_defconfig +++ b/arch/avr32/configs/atstk1004_defconfig @@ -394,7 +394,7 @@ CONFIG_SPI_ATMEL=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_REQUIRE_GPIOLIB=y diff --git a/arch/avr32/configs/atstk1006_defconfig b/arch/avr32/configs/atstk1006_defconfig index 361c31c2af1..c1603c4860e 100644 --- a/arch/avr32/configs/atstk1006_defconfig +++ b/arch/avr32/configs/atstk1006_defconfig @@ -684,8 +684,8 @@ CONFIG_I2C_GPIO=m # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -710,7 +710,7 @@ CONFIG_SPI_ATMEL=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set CONFIG_SPI_SPIDEV=m # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_REQUIRE_GPIOLIB=y diff --git a/arch/avr32/configs/favr-32_defconfig b/arch/avr32/configs/favr-32_defconfig index e2bd9982e2a..4c3ac0643e4 100644 --- a/arch/avr32/configs/favr-32_defconfig +++ b/arch/avr32/configs/favr-32_defconfig @@ -633,8 +633,8 @@ CONFIG_I2C_GPIO=m # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -659,7 +659,7 @@ CONFIG_SPI_ATMEL=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set CONFIG_SPI_SPIDEV=m # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_REQUIRE_GPIOLIB=y diff --git a/arch/avr32/configs/hammerhead_defconfig b/arch/avr32/configs/hammerhead_defconfig index 0d3d2982c8f..3970fcc4d76 100644 --- a/arch/avr32/configs/hammerhead_defconfig +++ b/arch/avr32/configs/hammerhead_defconfig @@ -716,8 +716,8 @@ CONFIG_I2C_GPIO=m # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -742,7 +742,7 @@ CONFIG_SPI_ATMEL=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set CONFIG_SPI_SPIDEV=m # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_REQUIRE_GPIOLIB=y diff --git a/arch/avr32/configs/mimc200_defconfig b/arch/avr32/configs/mimc200_defconfig index 981e4f8b8ae..1a58ffbc752 100644 --- a/arch/avr32/configs/mimc200_defconfig +++ b/arch/avr32/configs/mimc200_defconfig @@ -565,8 +565,8 @@ CONFIG_I2C_GPIO=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -CONFIG_AT24=y -# CONFIG_SENSORS_EEPROM is not set +CONFIG_EEPROM_AT24=y +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -592,7 +592,7 @@ CONFIG_SPI_ATMEL=y # # SPI Protocol Masters # -CONFIG_SPI_AT25=y +CONFIG_EEPROM_AT25=y # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_REQUIRE_GPIOLIB=y diff --git a/arch/blackfin/configs/BF518F-EZBRD_defconfig b/arch/blackfin/configs/BF518F-EZBRD_defconfig index e0b3f242b55..defb9785c65 100644 --- a/arch/blackfin/configs/BF518F-EZBRD_defconfig +++ b/arch/blackfin/configs/BF518F-EZBRD_defconfig @@ -746,9 +746,9 @@ CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100 # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set +# CONFIG_EEPROM_AT24 is not set # CONFIG_SENSORS_AD5252 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/blackfin/configs/BF526-EZBRD_defconfig b/arch/blackfin/configs/BF526-EZBRD_defconfig index 69f66c35b2a..992424ff315 100644 --- a/arch/blackfin/configs/BF526-EZBRD_defconfig +++ b/arch/blackfin/configs/BF526-EZBRD_defconfig @@ -793,9 +793,9 @@ CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100 # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set +# CONFIG_EEPROM_AT24 is not set # CONFIG_SENSORS_AD5252 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -820,7 +820,7 @@ CONFIG_SPI_BFIN=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y diff --git a/arch/blackfin/configs/BF527-EZKIT_defconfig b/arch/blackfin/configs/BF527-EZKIT_defconfig index f92668af00b..21e3c1af55b 100644 --- a/arch/blackfin/configs/BF527-EZKIT_defconfig +++ b/arch/blackfin/configs/BF527-EZKIT_defconfig @@ -837,9 +837,9 @@ CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100 # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set +# CONFIG_EEPROM_AT24 is not set # CONFIG_SENSORS_AD5252 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -864,7 +864,7 @@ CONFIG_SPI_BFIN=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y diff --git a/arch/blackfin/configs/BF533-EZKIT_defconfig b/arch/blackfin/configs/BF533-EZKIT_defconfig index 92afd988449..0bdf20a1af6 100644 --- a/arch/blackfin/configs/BF533-EZKIT_defconfig +++ b/arch/blackfin/configs/BF533-EZKIT_defconfig @@ -719,7 +719,7 @@ CONFIG_SPI_BFIN=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y diff --git a/arch/blackfin/configs/BF533-STAMP_defconfig b/arch/blackfin/configs/BF533-STAMP_defconfig index 49eabb41e9e..2f747d6e97e 100644 --- a/arch/blackfin/configs/BF533-STAMP_defconfig +++ b/arch/blackfin/configs/BF533-STAMP_defconfig @@ -743,9 +743,9 @@ CONFIG_I2C_HELPER_AUTO=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set +# CONFIG_EEPROM_AT24 is not set # CONFIG_SENSORS_AD5252 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -770,7 +770,7 @@ CONFIG_SPI_BFIN=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y diff --git a/arch/blackfin/configs/BF537-STAMP_defconfig b/arch/blackfin/configs/BF537-STAMP_defconfig index 332142f7f9b..8b0a81294e6 100644 --- a/arch/blackfin/configs/BF537-STAMP_defconfig +++ b/arch/blackfin/configs/BF537-STAMP_defconfig @@ -794,9 +794,9 @@ CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100 # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set +# CONFIG_EEPROM_AT24 is not set CONFIG_SENSORS_AD5252=m -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -821,7 +821,7 @@ CONFIG_SPI_BFIN=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y diff --git a/arch/blackfin/configs/BF538-EZKIT_defconfig b/arch/blackfin/configs/BF538-EZKIT_defconfig index ed15934c67c..a1f766bf7d9 100644 --- a/arch/blackfin/configs/BF538-EZKIT_defconfig +++ b/arch/blackfin/configs/BF538-EZKIT_defconfig @@ -796,9 +796,9 @@ CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100 # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set +# CONFIG_EEPROM_AT24 is not set # CONFIG_SENSORS_AD5252 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -823,7 +823,7 @@ CONFIG_SPI_BFIN=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y diff --git a/arch/blackfin/configs/BF548-EZKIT_defconfig b/arch/blackfin/configs/BF548-EZKIT_defconfig index d4ed9ce1f62..cd2da6b7692 100644 --- a/arch/blackfin/configs/BF548-EZKIT_defconfig +++ b/arch/blackfin/configs/BF548-EZKIT_defconfig @@ -928,9 +928,9 @@ CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100 # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set +# CONFIG_EEPROM_AT24 is not set # CONFIG_SENSORS_AD5252 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -955,7 +955,7 @@ CONFIG_SPI_BFIN=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y diff --git a/arch/blackfin/configs/BF561-EZKIT_defconfig b/arch/blackfin/configs/BF561-EZKIT_defconfig index 1ecb7a38c90..b398ca202db 100644 --- a/arch/blackfin/configs/BF561-EZKIT_defconfig +++ b/arch/blackfin/configs/BF561-EZKIT_defconfig @@ -756,7 +756,7 @@ CONFIG_SPI_BFIN=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y diff --git a/arch/blackfin/configs/BlackStamp_defconfig b/arch/blackfin/configs/BlackStamp_defconfig index 9683b2e1309..3a20e281d23 100644 --- a/arch/blackfin/configs/BlackStamp_defconfig +++ b/arch/blackfin/configs/BlackStamp_defconfig @@ -691,7 +691,7 @@ CONFIG_I2C_GPIO=m # # CONFIG_DS1682 is not set # CONFIG_SENSORS_AD5252 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCF8591 is not set @@ -713,7 +713,7 @@ CONFIG_SPI_BFIN=y # # SPI Protocol Masters # -CONFIG_SPI_AT25=y +CONFIG_EEPROM_AT25=y CONFIG_SPI_SPIDEV=m # CONFIG_SPI_TLE62X0 is not set # CONFIG_W1 is not set diff --git a/arch/blackfin/configs/CM-BF527_defconfig b/arch/blackfin/configs/CM-BF527_defconfig index a041e7eba77..95146948166 100644 --- a/arch/blackfin/configs/CM-BF527_defconfig +++ b/arch/blackfin/configs/CM-BF527_defconfig @@ -715,7 +715,7 @@ CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100 # CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set # CONFIG_SENSORS_AD5252 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -742,7 +742,7 @@ CONFIG_SPI_BFIN=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set # CONFIG_W1 is not set diff --git a/arch/blackfin/configs/CM-BF548_defconfig b/arch/blackfin/configs/CM-BF548_defconfig index efd68bc78f3..efe9741b1f1 100644 --- a/arch/blackfin/configs/CM-BF548_defconfig +++ b/arch/blackfin/configs/CM-BF548_defconfig @@ -798,7 +798,7 @@ CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100 # CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set # CONFIG_SENSORS_AD5252 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCF8575 is not set # CONFIG_SENSORS_PCA9543 is not set @@ -826,7 +826,7 @@ CONFIG_SPI_BFIN=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set # CONFIG_W1 is not set diff --git a/arch/blackfin/configs/H8606_defconfig b/arch/blackfin/configs/H8606_defconfig index 5d3901d23fd..bd553da15db 100644 --- a/arch/blackfin/configs/H8606_defconfig +++ b/arch/blackfin/configs/H8606_defconfig @@ -750,7 +750,7 @@ CONFIG_SPI_BFIN=y # # SPI Protocol Masters # -CONFIG_SPI_AT25=y +CONFIG_EEPROM_AT25=y CONFIG_SPI_SPIDEV=y # diff --git a/arch/blackfin/configs/IP0X_defconfig b/arch/blackfin/configs/IP0X_defconfig index e66f5daaa82..eae83b5de92 100644 --- a/arch/blackfin/configs/IP0X_defconfig +++ b/arch/blackfin/configs/IP0X_defconfig @@ -803,7 +803,7 @@ CONFIG_SPI_BFIN=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # diff --git a/arch/blackfin/configs/PNAV-10_defconfig b/arch/blackfin/configs/PNAV-10_defconfig index ce5dde9de9d..ad096702ac1 100644 --- a/arch/blackfin/configs/PNAV-10_defconfig +++ b/arch/blackfin/configs/PNAV-10_defconfig @@ -755,9 +755,9 @@ CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100 # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set +# CONFIG_EEPROM_AT24 is not set # CONFIG_SENSORS_AD5252 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set CONFIG_SENSORS_PCF8574=m # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -781,7 +781,7 @@ CONFIG_SPI_BFIN=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y diff --git a/arch/blackfin/configs/SRV1_defconfig b/arch/blackfin/configs/SRV1_defconfig index 7c8250d6fa6..fa580affc9d 100644 --- a/arch/blackfin/configs/SRV1_defconfig +++ b/arch/blackfin/configs/SRV1_defconfig @@ -798,7 +798,7 @@ CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100 # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set # CONFIG_SENSORS_AD5252 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCF8575 is not set # CONFIG_SENSORS_PCA9543 is not set @@ -826,7 +826,7 @@ CONFIG_SPI_BFIN=y # # SPI Protocol Masters # -CONFIG_SPI_AT25=m +CONFIG_EEPROM_AT25=m # CONFIG_SPI_SPIDEV is not set # diff --git a/arch/blackfin/configs/TCM-BF537_defconfig b/arch/blackfin/configs/TCM-BF537_defconfig index 9af522c7dad..97a1f1d20dc 100644 --- a/arch/blackfin/configs/TCM-BF537_defconfig +++ b/arch/blackfin/configs/TCM-BF537_defconfig @@ -533,7 +533,7 @@ CONFIG_SPI_BFIN=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y diff --git a/arch/ia64/configs/bigsur_defconfig b/arch/ia64/configs/bigsur_defconfig index 6dd8655664f..ace41096b47 100644 --- a/arch/ia64/configs/bigsur_defconfig +++ b/arch/ia64/configs/bigsur_defconfig @@ -752,7 +752,7 @@ CONFIG_I2C_ALGOBIT=y # # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/ia64/configs/zx1_defconfig b/arch/ia64/configs/zx1_defconfig index 0a06b1333c9..514f0635daf 100644 --- a/arch/ia64/configs/zx1_defconfig +++ b/arch/ia64/configs/zx1_defconfig @@ -841,7 +841,7 @@ CONFIG_I2C_ALGOPCF=y # # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/m32r/configs/m32104ut_defconfig b/arch/m32r/configs/m32104ut_defconfig index 9b5af6cd2e0..6f54b00b383 100644 --- a/arch/m32r/configs/m32104ut_defconfig +++ b/arch/m32r/configs/m32104ut_defconfig @@ -621,7 +621,7 @@ CONFIG_I2C_ELEKTOR=m # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig index a3bbbf067a3..783da855a2e 100644 --- a/arch/mips/configs/bigsur_defconfig +++ b/arch/mips/configs/bigsur_defconfig @@ -959,7 +959,7 @@ CONFIG_I2C_SIBYTE=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -CONFIG_SENSORS_EEPROM=y +CONFIG_EEPROM_LEGACY=y CONFIG_SENSORS_PCF8574=y # CONFIG_PCF8575 is not set CONFIG_SENSORS_PCF8591=y diff --git a/arch/mips/configs/emma2rh_defconfig b/arch/mips/configs/emma2rh_defconfig index 634bb4eaf13..fea9bc9865a 100644 --- a/arch/mips/configs/emma2rh_defconfig +++ b/arch/mips/configs/emma2rh_defconfig @@ -996,7 +996,7 @@ CONFIG_I2C_CHARDEV=y # # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/mips/configs/fulong_defconfig b/arch/mips/configs/fulong_defconfig index b6698a232ae..786a9bc9a69 100644 --- a/arch/mips/configs/fulong_defconfig +++ b/arch/mips/configs/fulong_defconfig @@ -1006,8 +1006,8 @@ CONFIG_I2C_VIAPRO=m # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/mips/configs/msp71xx_defconfig b/arch/mips/configs/msp71xx_defconfig index dd13db4d0fb..84d6491b3d4 100644 --- a/arch/mips/configs/msp71xx_defconfig +++ b/arch/mips/configs/msp71xx_defconfig @@ -929,7 +929,7 @@ CONFIG_I2C_PMCMSP=y # # CONFIG_SENSORS_DS1337 is not set # CONFIG_SENSORS_DS1374 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set CONFIG_PMCTWILED=y # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig index db9272677aa..8426d3b9501 100644 --- a/arch/mips/configs/mtx1_defconfig +++ b/arch/mips/configs/mtx1_defconfig @@ -1845,7 +1845,7 @@ CONFIG_I2C_VOODOO3=m CONFIG_SENSORS_DS1337=m CONFIG_SENSORS_DS1374=m # CONFIG_DS1682 is not set -CONFIG_SENSORS_EEPROM=m +CONFIG_EEPROM_LEGACY=m CONFIG_SENSORS_PCF8574=m CONFIG_SENSORS_PCA9539=m CONFIG_SENSORS_PCF8591=m @@ -1872,7 +1872,7 @@ CONFIG_SPI_BUTTERFLY=m # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set CONFIG_W1=m diff --git a/arch/mips/configs/pnx8335-stb225_defconfig b/arch/mips/configs/pnx8335-stb225_defconfig index d9536522cff..2728caa6c2f 100644 --- a/arch/mips/configs/pnx8335-stb225_defconfig +++ b/arch/mips/configs/pnx8335-stb225_defconfig @@ -640,8 +640,8 @@ CONFIG_I2C_CHARDEV=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/mips/configs/rbtx49xx_defconfig b/arch/mips/configs/rbtx49xx_defconfig index c7c0864b8ce..83d5c58662c 100644 --- a/arch/mips/configs/rbtx49xx_defconfig +++ b/arch/mips/configs/rbtx49xx_defconfig @@ -522,7 +522,7 @@ CONFIG_SPI_TXX9=y # # SPI Protocol Masters # -CONFIG_SPI_AT25=y +CONFIG_EEPROM_AT25=y # CONFIG_SPI_TLE62X0 is not set CONFIG_HAVE_GPIO_LIB=y diff --git a/arch/powerpc/configs/44x/sam440ep_defconfig b/arch/powerpc/configs/44x/sam440ep_defconfig index 15f48e03ec2..ce28cd6f0d4 100644 --- a/arch/powerpc/configs/44x/sam440ep_defconfig +++ b/arch/powerpc/configs/44x/sam440ep_defconfig @@ -730,8 +730,8 @@ CONFIG_I2C_IBM_IIC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/44x/warp_defconfig b/arch/powerpc/configs/44x/warp_defconfig index 59cbd2761ed..031ac6d4212 100644 --- a/arch/powerpc/configs/44x/warp_defconfig +++ b/arch/powerpc/configs/44x/warp_defconfig @@ -660,8 +660,8 @@ CONFIG_I2C_HELPER_AUTO=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -CONFIG_SENSORS_EEPROM=y +# CONFIG_EEPROM_AT24 is not set +CONFIG_EEPROM_LEGACY=y # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/52xx/cm5200_defconfig b/arch/powerpc/configs/52xx/cm5200_defconfig index 3df627494b6..99c495ad9c7 100644 --- a/arch/powerpc/configs/52xx/cm5200_defconfig +++ b/arch/powerpc/configs/52xx/cm5200_defconfig @@ -629,8 +629,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/52xx/lite5200b_defconfig b/arch/powerpc/configs/52xx/lite5200b_defconfig index 5b969f9c925..9796220032f 100644 --- a/arch/powerpc/configs/52xx/lite5200b_defconfig +++ b/arch/powerpc/configs/52xx/lite5200b_defconfig @@ -762,8 +762,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/52xx/motionpro_defconfig b/arch/powerpc/configs/52xx/motionpro_defconfig index 3c0d4e56172..d081e003105 100644 --- a/arch/powerpc/configs/52xx/motionpro_defconfig +++ b/arch/powerpc/configs/52xx/motionpro_defconfig @@ -629,8 +629,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -CONFIG_SENSORS_EEPROM=y +# CONFIG_EEPROM_AT24 is not set +CONFIG_EEPROM_LEGACY=y # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/52xx/pcm030_defconfig b/arch/powerpc/configs/52xx/pcm030_defconfig index 9d0207783d6..b21b8e8c3a7 100644 --- a/arch/powerpc/configs/52xx/pcm030_defconfig +++ b/arch/powerpc/configs/52xx/pcm030_defconfig @@ -740,8 +740,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -CONFIG_SENSORS_EEPROM=m +# CONFIG_EEPROM_AT24 is not set +CONFIG_EEPROM_LEGACY=m # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/52xx/tqm5200_defconfig b/arch/powerpc/configs/52xx/tqm5200_defconfig index bc190051e8d..79954579f5e 100644 --- a/arch/powerpc/configs/52xx/tqm5200_defconfig +++ b/arch/powerpc/configs/52xx/tqm5200_defconfig @@ -645,8 +645,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/83xx/asp8347_defconfig b/arch/powerpc/configs/83xx/asp8347_defconfig index cbecaf3d790..b3d9d1a8dfa 100644 --- a/arch/powerpc/configs/83xx/asp8347_defconfig +++ b/arch/powerpc/configs/83xx/asp8347_defconfig @@ -732,8 +732,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig index bfc32ea265a..6293bb99b52 100644 --- a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig @@ -858,8 +858,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -884,7 +884,7 @@ CONFIG_SPI_MPC83xx=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y diff --git a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig index aad0e1a98c5..c7cceb4536d 100644 --- a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig @@ -919,8 +919,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -945,7 +945,7 @@ CONFIG_SPI_MPC83xx=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y diff --git a/arch/powerpc/configs/83xx/mpc832x_mds_defconfig b/arch/powerpc/configs/83xx/mpc832x_mds_defconfig index 9cb8c8b956e..5e3cd49a2e9 100644 --- a/arch/powerpc/configs/83xx/mpc832x_mds_defconfig +++ b/arch/powerpc/configs/83xx/mpc832x_mds_defconfig @@ -730,8 +730,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig index 9cc976f010c..90c48260688 100644 --- a/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig @@ -745,8 +745,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -770,7 +770,7 @@ CONFIG_SPI_MPC83xx=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y diff --git a/arch/powerpc/configs/83xx/mpc834x_itx_defconfig b/arch/powerpc/configs/83xx/mpc834x_itx_defconfig index 07a674f5344..42b7f3403fb 100644 --- a/arch/powerpc/configs/83xx/mpc834x_itx_defconfig +++ b/arch/powerpc/configs/83xx/mpc834x_itx_defconfig @@ -898,8 +898,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set CONFIG_SENSORS_PCF8574=y # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -923,7 +923,7 @@ CONFIG_SPI_MPC83xx=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y diff --git a/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig b/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig index 426232cb009..17c903b3657 100644 --- a/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig +++ b/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig @@ -778,8 +778,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set CONFIG_SENSORS_PCF8574=y # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -803,7 +803,7 @@ CONFIG_SPI_MPC83xx=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y diff --git a/arch/powerpc/configs/83xx/mpc834x_mds_defconfig b/arch/powerpc/configs/83xx/mpc834x_mds_defconfig index 36e2e93a1c5..728fc95d587 100644 --- a/arch/powerpc/configs/83xx/mpc834x_mds_defconfig +++ b/arch/powerpc/configs/83xx/mpc834x_mds_defconfig @@ -673,8 +673,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/83xx/mpc836x_mds_defconfig b/arch/powerpc/configs/83xx/mpc836x_mds_defconfig index 80eb6c9a05c..f7546e7cb76 100644 --- a/arch/powerpc/configs/83xx/mpc836x_mds_defconfig +++ b/arch/powerpc/configs/83xx/mpc836x_mds_defconfig @@ -805,8 +805,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig b/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig index b9b236806e9..b94972f5224 100644 --- a/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig +++ b/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig @@ -723,8 +723,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -749,7 +749,7 @@ CONFIG_SPI_MPC83xx=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set CONFIG_SPI_SPIDEV=y # CONFIG_SPI_TLE62X0 is not set CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y diff --git a/arch/powerpc/configs/83xx/mpc837x_mds_defconfig b/arch/powerpc/configs/83xx/mpc837x_mds_defconfig index f6350d7e168..ad97649bdc7 100644 --- a/arch/powerpc/configs/83xx/mpc837x_mds_defconfig +++ b/arch/powerpc/configs/83xx/mpc837x_mds_defconfig @@ -787,8 +787,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig index f447de16f75..2e526beeeb9 100644 --- a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig @@ -789,8 +789,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/83xx/sbc834x_defconfig b/arch/powerpc/configs/83xx/sbc834x_defconfig index 8d2d7eeab5f..54bb24a5e02 100644 --- a/arch/powerpc/configs/83xx/sbc834x_defconfig +++ b/arch/powerpc/configs/83xx/sbc834x_defconfig @@ -640,8 +640,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/85xx/mpc8536_ds_defconfig b/arch/powerpc/configs/85xx/mpc8536_ds_defconfig index e243e14a670..256055cb6d2 100644 --- a/arch/powerpc/configs/85xx/mpc8536_ds_defconfig +++ b/arch/powerpc/configs/85xx/mpc8536_ds_defconfig @@ -849,8 +849,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -CONFIG_SENSORS_EEPROM=y +# CONFIG_EEPROM_AT24 is not set +CONFIG_EEPROM_LEGACY=y # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/85xx/mpc8544_ds_defconfig b/arch/powerpc/configs/85xx/mpc8544_ds_defconfig index f6cb01495ea..5ac7e51e9f1 100644 --- a/arch/powerpc/configs/85xx/mpc8544_ds_defconfig +++ b/arch/powerpc/configs/85xx/mpc8544_ds_defconfig @@ -848,8 +848,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -CONFIG_SENSORS_EEPROM=y +# CONFIG_EEPROM_AT24 is not set +CONFIG_EEPROM_LEGACY=y # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/85xx/mpc8568mds_defconfig b/arch/powerpc/configs/85xx/mpc8568mds_defconfig index 597be849181..ab5b0b58924 100644 --- a/arch/powerpc/configs/85xx/mpc8568mds_defconfig +++ b/arch/powerpc/configs/85xx/mpc8568mds_defconfig @@ -723,8 +723,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/85xx/mpc8572_ds_defconfig b/arch/powerpc/configs/85xx/mpc8572_ds_defconfig index 32aeb79216f..65ef823e08c 100644 --- a/arch/powerpc/configs/85xx/mpc8572_ds_defconfig +++ b/arch/powerpc/configs/85xx/mpc8572_ds_defconfig @@ -861,8 +861,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -CONFIG_SENSORS_EEPROM=y +# CONFIG_EEPROM_AT24 is not set +CONFIG_EEPROM_LEGACY=y # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/85xx/stx_gp3_defconfig b/arch/powerpc/configs/85xx/stx_gp3_defconfig index 5a0cf58d2b8..f1288a0c304 100644 --- a/arch/powerpc/configs/85xx/stx_gp3_defconfig +++ b/arch/powerpc/configs/85xx/stx_gp3_defconfig @@ -877,8 +877,8 @@ CONFIG_I2C_HELPER_AUTO=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/85xx/tqm8540_defconfig b/arch/powerpc/configs/85xx/tqm8540_defconfig index f3e4f3481fd..b3b1de6a6a8 100644 --- a/arch/powerpc/configs/85xx/tqm8540_defconfig +++ b/arch/powerpc/configs/85xx/tqm8540_defconfig @@ -787,8 +787,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/85xx/tqm8541_defconfig b/arch/powerpc/configs/85xx/tqm8541_defconfig index c6248939453..a92a639cdf8 100644 --- a/arch/powerpc/configs/85xx/tqm8541_defconfig +++ b/arch/powerpc/configs/85xx/tqm8541_defconfig @@ -796,8 +796,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/85xx/tqm8548_defconfig b/arch/powerpc/configs/85xx/tqm8548_defconfig index eef45b97dc3..397d37fbe7e 100644 --- a/arch/powerpc/configs/85xx/tqm8548_defconfig +++ b/arch/powerpc/configs/85xx/tqm8548_defconfig @@ -806,8 +806,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/85xx/tqm8555_defconfig b/arch/powerpc/configs/85xx/tqm8555_defconfig index 11b637e99a5..856ca6ab37b 100644 --- a/arch/powerpc/configs/85xx/tqm8555_defconfig +++ b/arch/powerpc/configs/85xx/tqm8555_defconfig @@ -796,8 +796,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/85xx/tqm8560_defconfig b/arch/powerpc/configs/85xx/tqm8560_defconfig index 2519169b6d4..56ed4213609 100644 --- a/arch/powerpc/configs/85xx/tqm8560_defconfig +++ b/arch/powerpc/configs/85xx/tqm8560_defconfig @@ -796,8 +796,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/86xx/gef_sbc610_defconfig b/arch/powerpc/configs/86xx/gef_sbc610_defconfig index 391874c7b43..840b09a0728 100644 --- a/arch/powerpc/configs/86xx/gef_sbc610_defconfig +++ b/arch/powerpc/configs/86xx/gef_sbc610_defconfig @@ -1081,8 +1081,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # CONFIG_DS1682=y -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig b/arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig index 72854a10dfa..25bf4e95acb 100644 --- a/arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig +++ b/arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig @@ -905,8 +905,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig b/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig index 41220ece603..8e0f0e5180a 100644 --- a/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig +++ b/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig @@ -851,8 +851,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -CONFIG_SENSORS_EEPROM=y +# CONFIG_EEPROM_AT24 is not set +CONFIG_EEPROM_LEGACY=y # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/86xx/sbc8641d_defconfig b/arch/powerpc/configs/86xx/sbc8641d_defconfig index a4342862f6e..37d9f4ed80f 100644 --- a/arch/powerpc/configs/86xx/sbc8641d_defconfig +++ b/arch/powerpc/configs/86xx/sbc8641d_defconfig @@ -958,8 +958,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/c2k_defconfig b/arch/powerpc/configs/c2k_defconfig index 5078594cd1f..74c6feabdd7 100644 --- a/arch/powerpc/configs/c2k_defconfig +++ b/arch/powerpc/configs/c2k_defconfig @@ -1124,8 +1124,8 @@ CONFIG_I2C_MV64XXX=m # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -CONFIG_SENSORS_EEPROM=m +# CONFIG_EEPROM_AT24 is not set +CONFIG_EEPROM_LEGACY=m CONFIG_SENSORS_PCF8574=m # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig index c420e47426f..c6d2baa7aae 100644 --- a/arch/powerpc/configs/cell_defconfig +++ b/arch/powerpc/configs/cell_defconfig @@ -1012,7 +1012,7 @@ CONFIG_I2C_ALGOBIT=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/powerpc/configs/celleb_defconfig b/arch/powerpc/configs/celleb_defconfig index 9ba3c6fc2fe..d2123779512 100644 --- a/arch/powerpc/configs/celleb_defconfig +++ b/arch/powerpc/configs/celleb_defconfig @@ -834,7 +834,7 @@ CONFIG_I2C_ALGOBIT=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/powerpc/configs/chrp32_defconfig b/arch/powerpc/configs/chrp32_defconfig index 63b3c2372ce..5094a65a449 100644 --- a/arch/powerpc/configs/chrp32_defconfig +++ b/arch/powerpc/configs/chrp32_defconfig @@ -941,8 +941,8 @@ CONFIG_I2C_ALGOBIT=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index f85e71ccb98..fc905924c02 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -994,8 +994,8 @@ CONFIG_I2C_POWERMAC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/linkstation_defconfig b/arch/powerpc/configs/linkstation_defconfig index 54fa6248137..3a0ffd73b65 100644 --- a/arch/powerpc/configs/linkstation_defconfig +++ b/arch/powerpc/configs/linkstation_defconfig @@ -1015,8 +1015,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -CONFIG_SENSORS_EEPROM=m +# CONFIG_EEPROM_AT24 is not set +CONFIG_EEPROM_LEGACY=m # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig index 045f1b008ce..8b244003b9e 100644 --- a/arch/powerpc/configs/maple_defconfig +++ b/arch/powerpc/configs/maple_defconfig @@ -866,8 +866,8 @@ CONFIG_I2C_AMD8111=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/mpc5200_defconfig b/arch/powerpc/configs/mpc5200_defconfig index 15c5604d0b2..5ff3de205d6 100644 --- a/arch/powerpc/configs/mpc5200_defconfig +++ b/arch/powerpc/configs/mpc5200_defconfig @@ -844,8 +844,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/mpc83xx_defconfig b/arch/powerpc/configs/mpc83xx_defconfig index d582014b0a3..3e1272cfdd9 100644 --- a/arch/powerpc/configs/mpc83xx_defconfig +++ b/arch/powerpc/configs/mpc83xx_defconfig @@ -902,8 +902,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig index c87b53abc61..55e5ebd91cd 100644 --- a/arch/powerpc/configs/mpc85xx_defconfig +++ b/arch/powerpc/configs/mpc85xx_defconfig @@ -860,8 +860,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -CONFIG_SENSORS_EEPROM=y +# CONFIG_EEPROM_AT24 is not set +CONFIG_EEPROM_LEGACY=y # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/mpc86xx_defconfig b/arch/powerpc/configs/mpc86xx_defconfig index 1736bbc281e..21e0ccbd3f5 100644 --- a/arch/powerpc/configs/mpc86xx_defconfig +++ b/arch/powerpc/configs/mpc86xx_defconfig @@ -852,8 +852,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -CONFIG_SENSORS_EEPROM=y +# CONFIG_EEPROM_AT24 is not set +CONFIG_EEPROM_LEGACY=y # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index 199e5f59d7a..4f8681cc8d7 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -984,7 +984,7 @@ CONFIG_I2C_PASEMI=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -CONFIG_SENSORS_EEPROM=y +CONFIG_EEPROM_LEGACY=y # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCF8591 is not set diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index de9b121820a..5339bb44cce 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -1232,8 +1232,8 @@ CONFIG_I2C_POWERMAC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/ppc40x_defconfig b/arch/powerpc/configs/ppc40x_defconfig index 4256e2c4534..2d72ee7a8d6 100644 --- a/arch/powerpc/configs/ppc40x_defconfig +++ b/arch/powerpc/configs/ppc40x_defconfig @@ -745,8 +745,8 @@ CONFIG_I2C_IBM_IIC=m # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig index 034a1fbdc88..358b85f9270 100644 --- a/arch/powerpc/configs/ppc44x_defconfig +++ b/arch/powerpc/configs/ppc44x_defconfig @@ -794,8 +794,8 @@ CONFIG_I2C_IBM_IIC=m # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index d4685d1c0be..88c6295b76c 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -1261,8 +1261,8 @@ CONFIG_I2C_PASEMI=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index 01f05ec5abf..7d044dfd923 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -1801,8 +1801,8 @@ CONFIG_I2C_STUB=m # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -CONFIG_AT24=m -CONFIG_SENSORS_EEPROM=m +CONFIG_EEPROM_AT24=m +CONFIG_EEPROM_LEGACY=m CONFIG_SENSORS_PCF8574=m CONFIG_PCF8575=m CONFIG_SENSORS_PCA9539=m diff --git a/arch/powerpc/configs/prpmc2800_defconfig b/arch/powerpc/configs/prpmc2800_defconfig index 6046dc0cbd8..a257da60808 100644 --- a/arch/powerpc/configs/prpmc2800_defconfig +++ b/arch/powerpc/configs/prpmc2800_defconfig @@ -943,8 +943,8 @@ CONFIG_I2C_MV64XXX=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 1e520ab6511..61b10084971 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -1055,8 +1055,8 @@ CONFIG_I2C_ALGOBIT=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/powerpc/configs/storcenter_defconfig b/arch/powerpc/configs/storcenter_defconfig index b3f5671972a..7eb16ab7b71 100644 --- a/arch/powerpc/configs/storcenter_defconfig +++ b/arch/powerpc/configs/storcenter_defconfig @@ -795,8 +795,8 @@ CONFIG_I2C_MPC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/sh/configs/cayman_defconfig b/arch/sh/configs/cayman_defconfig index e21c0e8e22d..92895013813 100644 --- a/arch/sh/configs/cayman_defconfig +++ b/arch/sh/configs/cayman_defconfig @@ -700,8 +700,8 @@ CONFIG_I2C_HELPER_AUTO=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/sh/configs/edosk7760_defconfig b/arch/sh/configs/edosk7760_defconfig index 158006847ad..e93d3962165 100644 --- a/arch/sh/configs/edosk7760_defconfig +++ b/arch/sh/configs/edosk7760_defconfig @@ -632,8 +632,8 @@ CONFIG_I2C_SH7760=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/sh/configs/migor_defconfig b/arch/sh/configs/migor_defconfig index 30cac42f25e..8958629d7f3 100644 --- a/arch/sh/configs/migor_defconfig +++ b/arch/sh/configs/migor_defconfig @@ -684,8 +684,8 @@ CONFIG_I2C_SH_MOBILE=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/sh/configs/r7780mp_defconfig b/arch/sh/configs/r7780mp_defconfig index 2e65149e950..60cb1b8a874 100644 --- a/arch/sh/configs/r7780mp_defconfig +++ b/arch/sh/configs/r7780mp_defconfig @@ -836,8 +836,8 @@ CONFIG_I2C_HIGHLANDER=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/sh/configs/r7785rp_defconfig b/arch/sh/configs/r7785rp_defconfig index 043a8a509e0..de9ff781a63 100644 --- a/arch/sh/configs/r7785rp_defconfig +++ b/arch/sh/configs/r7785rp_defconfig @@ -838,8 +838,8 @@ CONFIG_I2C_HIGHLANDER=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/sh/configs/rts7751r2d1_defconfig b/arch/sh/configs/rts7751r2d1_defconfig index 7d2a9e88838..5d7d7ce8a80 100644 --- a/arch/sh/configs/rts7751r2d1_defconfig +++ b/arch/sh/configs/rts7751r2d1_defconfig @@ -777,7 +777,7 @@ CONFIG_SPI_SH_SCI=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set # CONFIG_W1 is not set diff --git a/arch/sh/configs/rts7751r2dplus_defconfig b/arch/sh/configs/rts7751r2dplus_defconfig index f680d3eecdf..801abeb1419 100644 --- a/arch/sh/configs/rts7751r2dplus_defconfig +++ b/arch/sh/configs/rts7751r2dplus_defconfig @@ -777,7 +777,7 @@ CONFIG_SPI_SH_SCI=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set # CONFIG_W1 is not set diff --git a/arch/sh/configs/se7343_defconfig b/arch/sh/configs/se7343_defconfig index be246f38150..bb87dfbda84 100644 --- a/arch/sh/configs/se7343_defconfig +++ b/arch/sh/configs/se7343_defconfig @@ -661,8 +661,8 @@ CONFIG_I2C_SH_MOBILE=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/sh/configs/sh7785lcr_defconfig b/arch/sh/configs/sh7785lcr_defconfig index 07e33c285b9..45079dabf31 100644 --- a/arch/sh/configs/sh7785lcr_defconfig +++ b/arch/sh/configs/sh7785lcr_defconfig @@ -823,8 +823,8 @@ CONFIG_I2C_PCA_PLATFORM=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/sh/configs/shx3_defconfig b/arch/sh/configs/shx3_defconfig index ae5cbe237ff..a687b9f02ed 100644 --- a/arch/sh/configs/shx3_defconfig +++ b/arch/sh/configs/shx3_defconfig @@ -653,8 +653,8 @@ CONFIG_I2C_HELPER_AUTO=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -678,7 +678,7 @@ CONFIG_SPI_MASTER=y # # SPI Protocol Masters # -# CONFIG_SPI_AT25 is not set +# CONFIG_EEPROM_AT25 is not set # CONFIG_SPI_SPIDEV is not set # CONFIG_SPI_TLE62X0 is not set # CONFIG_W1 is not set diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig index cde19ae78f5..ade49941def 100644 --- a/arch/sparc/configs/sparc64_defconfig +++ b/arch/sparc/configs/sparc64_defconfig @@ -867,8 +867,8 @@ CONFIG_I2C_ALGOBIT=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index b30a08ed8eb..edba00d98ac 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -1331,8 +1331,8 @@ CONFIG_I2C_I801=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 0e7dbc0a3e4..322dd2748fc 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -1311,8 +1311,8 @@ CONFIG_I2C_I801=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_AT24 is not set -# CONFIG_SENSORS_EEPROM is not set +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set diff --git a/drivers/misc/eeprom/Kconfig b/drivers/misc/eeprom/Kconfig index 62aae334ee6..c76df8cda5e 100644 --- a/drivers/misc/eeprom/Kconfig +++ b/drivers/misc/eeprom/Kconfig @@ -1,6 +1,6 @@ menu "EEPROM support" -config AT24 +config EEPROM_AT24 tristate "I2C EEPROMs from most vendors" depends on I2C && SYSFS && EXPERIMENTAL help @@ -26,7 +26,7 @@ config AT24 This driver can also be built as a module. If so, the module will be called at24. -config SPI_AT25 +config EEPROM_AT25 tristate "SPI EEPROMs from most vendors" depends on SPI && SYSFS help @@ -37,9 +37,9 @@ config SPI_AT25 This driver can also be built as a module. If so, the module will be called at25. -config SENSORS_EEPROM +config EEPROM_LEGACY tristate "Old I2C EEPROM reader" - depends on I2C && EXPERIMENTAL + depends on I2C && SYSFS help If you say yes here you get read-only access to the EEPROM data available on modern memory DIMMs and Sony Vaio laptops via I2C. Such diff --git a/drivers/misc/eeprom/Makefile b/drivers/misc/eeprom/Makefile index 3b7af6df79a..539dd8f8812 100644 --- a/drivers/misc/eeprom/Makefile +++ b/drivers/misc/eeprom/Makefile @@ -1,4 +1,4 @@ -obj-$(CONFIG_AT24) += at24.o -obj-$(CONFIG_SPI_AT25) += at25.o -obj-$(CONFIG_SENSORS_EEPROM) += eeprom.o +obj-$(CONFIG_EEPROM_AT24) += at24.o +obj-$(CONFIG_EEPROM_AT25) += at25.o +obj-$(CONFIG_EEPROM_LEGACY) += eeprom.o obj-$(CONFIG_EEPROM_93CX6) += eeprom_93cx6.o -- cgit v1.2.3-70-g09d2 From 57064d213d2e44654d4f13c66df135b5e7389a26 Mon Sep 17 00:00:00 2001 From: Seth Heasley Date: Fri, 23 Jan 2009 12:43:38 -0800 Subject: PCI: irq and pci_ids patch for Intel Tigerpoint DeviceIDs This patch adds the Intel Tigerpoint LPC Controller DeviceIDs. Signed-off-by: Seth Heasley Signed-off-by: Jesse Barnes --- arch/x86/pci/irq.c | 1 + include/linux/pci_ids.h | 1 + 2 files changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 4064345cf14..fecbce6e7d7 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -572,6 +572,7 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route case PCI_DEVICE_ID_INTEL_ICH7_1: case PCI_DEVICE_ID_INTEL_ICH7_30: case PCI_DEVICE_ID_INTEL_ICH7_31: + case PCI_DEVICE_ID_INTEL_TGP_LPC: case PCI_DEVICE_ID_INTEL_ESB2_0: case PCI_DEVICE_ID_INTEL_ICH8_0: case PCI_DEVICE_ID_INTEL_ICH8_1: diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index d56ad9c21c0..6f260b50253 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2421,6 +2421,7 @@ #define PCI_DEVICE_ID_INTEL_ICH7_0 0x27b8 #define PCI_DEVICE_ID_INTEL_ICH7_1 0x27b9 #define PCI_DEVICE_ID_INTEL_ICH7_30 0x27b0 +#define PCI_DEVICE_ID_INTEL_TGP_LPC 0x27bc #define PCI_DEVICE_ID_INTEL_ICH7_31 0x27bd #define PCI_DEVICE_ID_INTEL_ICH7_17 0x27da #define PCI_DEVICE_ID_INTEL_ICH7_19 0x27dd -- cgit v1.2.3-70-g09d2 From bf3647c44bc76c43c4b2ebb4c37a559e899ac70e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 29 Jan 2009 11:45:35 +0100 Subject: x86: tone down mtrr_trim_uncached_memory() warning kerneloops.org is reporting a lot of these warnings that come due to vmware not setting up any MTRRs for emulated CPUs: | Reported 709 times (14696 total reports) | BIOS bug (often in VMWare) where the MTRR's are set up incorrectly | or not at all | | This warning was last seen in version 2.6.29-rc2-git1, and first | seen in 2.6.24. | | More info: | http://www.kerneloops.org/searchweek.php?search=mtrr_trim_uncached_memory Keep a one-liner KERN_INFO about it - so that we have so notice if empty MTRRs are caused by native hardware/BIOS weirdness. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index d259e5d2e05..236a401b825 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1594,8 +1594,7 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) /* kvm/qemu doesn't have mtrr set right, don't trim them all */ if (!highest_pfn) { - WARN(!kvm_para_available(), KERN_WARNING - "WARNING: strange, CPU MTRRs all blank?\n"); + printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n"); return 0; } -- cgit v1.2.3-70-g09d2 From 72410af921cbc9018da388ca1ddf75880a033ac1 Mon Sep 17 00:00:00 2001 From: Atsushi SAKAI Date: Fri, 16 Jan 2009 20:39:14 +0900 Subject: lguest: typos fix 3 points lguest_asm.S => i386_head.S LHCALL_BREAK => LHREQ_BREAK perferred => preferred Signed-off-by: Atsushi SAKAI Signed-off-by: Rusty Russell --- arch/x86/lguest/boot.c | 4 ++-- drivers/lguest/core.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index a7ed208f81e..92f1c6f3e19 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -931,7 +931,7 @@ static void lguest_restart(char *reason) * that we can fit comfortably. * * First we need assembly templates of each of the patchable Guest operations, - * and these are in lguest_asm.S. */ + * and these are in i386_head.S. */ /*G:060 We construct a table from the assembler templates: */ static const struct lguest_insns @@ -1093,7 +1093,7 @@ __init void lguest_init(void) acpi_ht = 0; #endif - /* We set the perferred console to "hvc". This is the "hypervisor + /* We set the preferred console to "hvc". This is the "hypervisor * virtual console" driver written by the PowerPC people, which we also * adapted for lguest's use. */ add_preferred_console("hvc", 0, NULL); diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 90663e01a56..60156dfdc60 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -224,7 +224,7 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) break; /* If the Guest asked to be stopped, we sleep. The Guest's - * clock timer or LHCALL_BREAK from the Waker will wake us. */ + * clock timer or LHREQ_BREAK from the Waker will wake us. */ if (cpu->halted) { set_current_state(TASK_INTERRUPTIBLE); schedule(); -- cgit v1.2.3-70-g09d2 From 5872fb94f85d2e4fdef94657bd14e1a492df9825 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 29 Jan 2009 16:28:02 -0800 Subject: Documentation: move DMA-mapping.txt to Doc/PCI/ Move DMA-mapping.txt to Documentation/PCI/. DMA-mapping.txt was supposed to be moved from Documentation/ to Documentation/PCI/. The 00-INDEX files in those two directories were updated, along with a few other text files, but the file itself somehow escaped being moved, so move it and update more text files and source files with its new location. Signed-off-by: Randy Dunlap Acked-by: Greg Kroah-Hartman cc: Jesse Barnes Signed-off-by: Linus Torvalds --- Documentation/DMA-API.txt | 2 +- Documentation/IO-mapping.txt | 4 ++-- Documentation/block/biodoc.txt | 5 +++-- Documentation/usb/dma.txt | 11 ++++++----- arch/ia64/hp/common/sba_iommu.c | 12 ++++++------ arch/parisc/include/asm/dma-mapping.h | 2 +- arch/parisc/kernel/pci-dma.c | 2 +- arch/x86/include/asm/dma-mapping.h | 4 ++-- arch/x86/kernel/pci-gart_64.c | 2 +- drivers/parisc/sba_iommu.c | 18 +++++++++--------- drivers/staging/altpciechdma/altpciechdma.c | 4 ++-- include/media/videobuf-dma-sg.h | 2 +- 12 files changed, 35 insertions(+), 33 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index 52441694fe0..2a3fcc55e98 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -5,7 +5,7 @@ This document describes the DMA API. For a more gentle introduction phrased in terms of the pci_ equivalents (and actual examples) see -DMA-mapping.txt +Documentation/PCI/PCI-DMA-mapping.txt. This API is split into two pieces. Part I describes the API and the corresponding pci_ API. Part II describes the extensions to the API diff --git a/Documentation/IO-mapping.txt b/Documentation/IO-mapping.txt index 86edb61bdee..78a440695e1 100644 --- a/Documentation/IO-mapping.txt +++ b/Documentation/IO-mapping.txt @@ -1,6 +1,6 @@ [ NOTE: The virt_to_bus() and bus_to_virt() functions have been - superseded by the functionality provided by the PCI DMA - interface (see Documentation/DMA-mapping.txt). They continue + superseded by the functionality provided by the PCI DMA interface + (see Documentation/PCI/PCI-DMA-mapping.txt). They continue to be documented below for historical purposes, but new code must not use them. --davidm 00/12/12 ] diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index 3c5434c83da..5d2480d33b4 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt @@ -186,8 +186,9 @@ a virtual address mapping (unlike the earlier scheme of virtual address do not have a corresponding kernel virtual address space mapping) and low-memory pages. -Note: Please refer to DMA-mapping.txt for a discussion on PCI high mem DMA -aspects and mapping of scatter gather lists, and support for 64 bit PCI. +Note: Please refer to Documentation/PCI/PCI-DMA-mapping.txt for a discussion +on PCI high mem DMA aspects and mapping of scatter gather lists, and support +for 64 bit PCI. Special handling is required only for cases where i/o needs to happen on pages at physical memory addresses beyond what the device can support. In these diff --git a/Documentation/usb/dma.txt b/Documentation/usb/dma.txt index e8b50b7de9d..cfdcd16e3ab 100644 --- a/Documentation/usb/dma.txt +++ b/Documentation/usb/dma.txt @@ -6,8 +6,9 @@ in the kernel usb programming guide (kerneldoc, from the source code). API OVERVIEW The big picture is that USB drivers can continue to ignore most DMA issues, -though they still must provide DMA-ready buffers (see DMA-mapping.txt). -That's how they've worked through the 2.4 (and earlier) kernels. +though they still must provide DMA-ready buffers (see +Documentation/PCI/PCI-DMA-mapping.txt). That's how they've worked through +the 2.4 (and earlier) kernels. OR: they can now be DMA-aware. @@ -62,8 +63,8 @@ and effects like cache-trashing can impose subtle penalties. force a consistent memory access ordering by using memory barriers. It's not using a streaming DMA mapping, so it's good for small transfers on systems where the I/O would otherwise thrash an IOMMU mapping. (See - Documentation/DMA-mapping.txt for definitions of "coherent" and "streaming" - DMA mappings.) + Documentation/PCI/PCI-DMA-mapping.txt for definitions of "coherent" and + "streaming" DMA mappings.) Asking for 1/Nth of a page (as well as asking for N pages) is reasonably space-efficient. @@ -93,7 +94,7 @@ WORKING WITH EXISTING BUFFERS Existing buffers aren't usable for DMA without first being mapped into the DMA address space of the device. However, most buffers passed to your driver can safely be used with such DMA mapping. (See the first section -of DMA-mapping.txt, titled "What memory is DMA-able?") +of Documentation/PCI/PCI-DMA-mapping.txt, titled "What memory is DMA-able?") - When you're using scatterlists, you can map everything at once. On some systems, this kicks in an IOMMU and turns the scatterlists into single diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index d98f0f4ff83..6d5e6c5630e 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -906,7 +906,7 @@ sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt) * @dir: R/W or both. * @attrs: optional dma attributes * - * See Documentation/DMA-mapping.txt + * See Documentation/PCI/PCI-DMA-mapping.txt */ dma_addr_t sba_map_single_attrs(struct device *dev, void *addr, size_t size, int dir, @@ -1024,7 +1024,7 @@ sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size) * @dir: R/W or both. * @attrs: optional dma attributes * - * See Documentation/DMA-mapping.txt + * See Documentation/PCI/PCI-DMA-mapping.txt */ void sba_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size, int dir, struct dma_attrs *attrs) @@ -1102,7 +1102,7 @@ EXPORT_SYMBOL(sba_unmap_single_attrs); * @size: number of bytes mapped in driver buffer. * @dma_handle: IOVA of new buffer. * - * See Documentation/DMA-mapping.txt + * See Documentation/PCI/PCI-DMA-mapping.txt */ void * sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags) @@ -1165,7 +1165,7 @@ sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp * @vaddr: virtual address IOVA of "consistent" buffer. * @dma_handler: IO virtual address of "consistent" buffer. * - * See Documentation/DMA-mapping.txt + * See Documentation/PCI/PCI-DMA-mapping.txt */ void sba_free_coherent (struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) { @@ -1420,7 +1420,7 @@ sba_coalesce_chunks(struct ioc *ioc, struct device *dev, * @dir: R/W or both. * @attrs: optional dma attributes * - * See Documentation/DMA-mapping.txt + * See Documentation/PCI/PCI-DMA-mapping.txt */ int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents, int dir, struct dma_attrs *attrs) @@ -1512,7 +1512,7 @@ EXPORT_SYMBOL(sba_map_sg_attrs); * @dir: R/W or both. * @attrs: optional dma attributes * - * See Documentation/DMA-mapping.txt + * See Documentation/PCI/PCI-DMA-mapping.txt */ void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents, int dir, struct dma_attrs *attrs) diff --git a/arch/parisc/include/asm/dma-mapping.h b/arch/parisc/include/asm/dma-mapping.h index 53af696f23d..da694338090 100644 --- a/arch/parisc/include/asm/dma-mapping.h +++ b/arch/parisc/include/asm/dma-mapping.h @@ -5,7 +5,7 @@ #include #include -/* See Documentation/DMA-mapping.txt */ +/* See Documentation/PCI/PCI-DMA-mapping.txt */ struct hppa_dma_ops { int (*dma_supported)(struct device *dev, u64 mask); void *(*alloc_consistent)(struct device *dev, size_t size, dma_addr_t *iova, gfp_t flag); diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c index ccd61b9567a..df47895db82 100644 --- a/arch/parisc/kernel/pci-dma.c +++ b/arch/parisc/kernel/pci-dma.c @@ -2,7 +2,7 @@ ** PARISC 1.1 Dynamic DMA mapping support. ** This implementation is for PA-RISC platforms that do not support ** I/O TLBs (aka DMA address translation hardware). -** See Documentation/DMA-mapping.txt for interface definitions. +** See Documentation/PCI/PCI-DMA-mapping.txt for interface definitions. ** ** (c) Copyright 1999,2000 Hewlett-Packard Company ** (c) Copyright 2000 Grant Grundler diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 4035357f5b9..132a134d12f 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -2,8 +2,8 @@ #define _ASM_X86_DMA_MAPPING_H /* - * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for - * documentation. + * IOMMU interface. See Documentation/PCI/PCI-DMA-mapping.txt and + * Documentation/DMA-API.txt for documentation. */ #include diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 00c2bcd4146..d5768b1af08 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -5,7 +5,7 @@ * This allows to use PCI devices that only support 32bit addresses on systems * with more than 4GB. * - * See Documentation/DMA-mapping.txt for the interface specification. + * See Documentation/PCI/PCI-DMA-mapping.txt for the interface specification. * * Copyright 2002 Andi Kleen, SuSE Labs. * Subject to the GNU General Public License v2 only. diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index 3fac8f81d59..a70cf16ee1a 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -668,7 +668,7 @@ sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt) * @dev: instance of PCI owned by the driver that's asking * @mask: number of address bits this PCI device can handle * - * See Documentation/DMA-mapping.txt + * See Documentation/PCI/PCI-DMA-mapping.txt */ static int sba_dma_supported( struct device *dev, u64 mask) { @@ -680,8 +680,8 @@ static int sba_dma_supported( struct device *dev, u64 mask) return(0); } - /* Documentation/DMA-mapping.txt tells drivers to try 64-bit first, - * then fall back to 32-bit if that fails. + /* Documentation/PCI/PCI-DMA-mapping.txt tells drivers to try 64-bit + * first, then fall back to 32-bit if that fails. * We are just "encouraging" 32-bit DMA masks here since we can * never allow IOMMU bypass unless we add special support for ZX1. */ @@ -706,7 +706,7 @@ static int sba_dma_supported( struct device *dev, u64 mask) * @size: number of bytes to map in driver buffer. * @direction: R/W or both. * - * See Documentation/DMA-mapping.txt + * See Documentation/PCI/PCI-DMA-mapping.txt */ static dma_addr_t sba_map_single(struct device *dev, void *addr, size_t size, @@ -785,7 +785,7 @@ sba_map_single(struct device *dev, void *addr, size_t size, * @size: number of bytes mapped in driver buffer. * @direction: R/W or both. * - * See Documentation/DMA-mapping.txt + * See Documentation/PCI/PCI-DMA-mapping.txt */ static void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, @@ -861,7 +861,7 @@ sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, * @size: number of bytes mapped in driver buffer. * @dma_handle: IOVA of new buffer. * - * See Documentation/DMA-mapping.txt + * See Documentation/PCI/PCI-DMA-mapping.txt */ static void *sba_alloc_consistent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) @@ -892,7 +892,7 @@ static void *sba_alloc_consistent(struct device *hwdev, size_t size, * @vaddr: virtual address IOVA of "consistent" buffer. * @dma_handler: IO virtual address of "consistent" buffer. * - * See Documentation/DMA-mapping.txt + * See Documentation/PCI/PCI-DMA-mapping.txt */ static void sba_free_consistent(struct device *hwdev, size_t size, void *vaddr, @@ -927,7 +927,7 @@ int dump_run_sg = 0; * @nents: number of entries in list * @direction: R/W or both. * - * See Documentation/DMA-mapping.txt + * See Documentation/PCI/PCI-DMA-mapping.txt */ static int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, @@ -1011,7 +1011,7 @@ sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, * @nents: number of entries in list * @direction: R/W or both. * - * See Documentation/DMA-mapping.txt + * See Documentation/PCI/PCI-DMA-mapping.txt */ static void sba_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, diff --git a/drivers/staging/altpciechdma/altpciechdma.c b/drivers/staging/altpciechdma/altpciechdma.c index 8e2b4ca0651..f516140ca97 100644 --- a/drivers/staging/altpciechdma/altpciechdma.c +++ b/drivers/staging/altpciechdma/altpciechdma.c @@ -531,7 +531,7 @@ static int __devinit dma_test(struct ape_dev *ape, struct pci_dev *dev) goto fail; /* allocate and map coherently-cached memory for a DMA-able buffer */ - /* @see 2.6.26.2/Documentation/DMA-mapping.txt line 318 */ + /* @see Documentation/PCI/PCI-DMA-mapping.txt, near line 318 */ buffer_virt = (u8 *)pci_alloc_consistent(dev, PAGE_SIZE * 4, &buffer_bus); if (!buffer_virt) { printk(KERN_DEBUG "Could not allocate coherent DMA buffer.\n"); @@ -846,7 +846,7 @@ static int __devinit probe(struct pci_dev *dev, const struct pci_device_id *id) #if 1 // @todo For now, disable 64-bit, because I do not understand the implications (DAC!) /* query for DMA transfer */ - /* @see Documentation/DMA-mapping.txt */ + /* @see Documentation/PCI/PCI-DMA-mapping.txt */ if (!pci_set_dma_mask(dev, DMA_64BIT_MASK)) { pci_set_consistent_dma_mask(dev, DMA_64BIT_MASK); /* use 64-bit DMA */ diff --git a/include/media/videobuf-dma-sg.h b/include/media/videobuf-dma-sg.h index 90edd22d343..dda47f0082e 100644 --- a/include/media/videobuf-dma-sg.h +++ b/include/media/videobuf-dma-sg.h @@ -49,7 +49,7 @@ struct scatterlist* videobuf_pages_to_sg(struct page **pages, int nr_pages, * does memory allocation too using vmalloc_32(). * * videobuf_dma_*() - * see Documentation/DMA-mapping.txt, these functions to + * see Documentation/PCI/PCI-DMA-mapping.txt, these functions to * basically the same. The map function does also build a * scatterlist for the buffer (and unmap frees it ...) * -- cgit v1.2.3-70-g09d2 From 999721ca6d0c2540341acb73ac9048cbd6b05d3a Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 22:44:58 +0530 Subject: headers_check fix: x86, e820.h fix the following 'make headers_check' warning: usr/include/asm/e820.h:44: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- arch/x86/include/asm/e820.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 3d8ceddbd40..00d41ce4c84 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -49,6 +49,7 @@ #define E820_RESERVED_KERN 128 #ifndef __ASSEMBLY__ +#include struct e820entry { __u64 addr; /* start of memory segment */ __u64 size; /* size of memory segment */ -- cgit v1.2.3-70-g09d2 From cef3767852a9b1a7ff4a8dfe0969e2d32eb728df Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 22:46:08 +0530 Subject: headers_check fix: x86, kvm.h fix the following 'make headers_check' warnings: usr/include/asm/kvm.h:9: include of is preferred over usr/include/asm/kvm.h:16: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- arch/x86/include/asm/kvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index b95162af0bf..d2e3bf3608a 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -6,7 +6,7 @@ * */ -#include +#include #include /* Architectural interrupt line count. */ -- cgit v1.2.3-70-g09d2 From 999b697b9d8b15756e65da72c816ef4363a945a5 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 22:47:27 +0530 Subject: headers_check fix: x86, mce.h fix the following 'make headers_check' warnings: usr/include/asm/mce.h:7: include of is preferred over usr/include/asm/mce.h:29: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- arch/x86/include/asm/mce.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 1d6e17c2f23..32c6e17b960 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -3,8 +3,8 @@ #ifdef __x86_64__ +#include #include -#include /* * Machine Check support for x86 @@ -115,8 +115,6 @@ extern int mce_notify_user(void); #endif /* !CONFIG_X86_32 */ - - #ifdef CONFIG_X86_MCE extern void mcheck_init(struct cpuinfo_x86 *c); #else @@ -126,5 +124,4 @@ extern void stop_mce(void); extern void restart_mce(void); #endif /* __KERNEL__ */ - #endif /* _ASM_X86_MCE_H */ -- cgit v1.2.3-70-g09d2 From d122072cc079d299e5191c9cbb6162ba8791624c Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 22:48:17 +0530 Subject: headers_check fix: x86, mtrr.h fix the following 'make headers_check' warning: usr/include/asm/mtrr.h:61: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- arch/x86/include/asm/mtrr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index 14080d22edb..a51ada8467d 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -23,6 +23,7 @@ #ifndef _ASM_X86_MTRR_H #define _ASM_X86_MTRR_H +#include #include #include -- cgit v1.2.3-70-g09d2 From 420ab35eef206d147973d26db14b5618868726be Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 22:52:16 +0530 Subject: headers_check fix: x86, ptrace-abi.h fix the following 'make headers_check' warnings: usr/include/asm/ptrace-abi.h:86: include of is preferred over usr/include/asm/ptrace-abi.h:93: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- arch/x86/include/asm/ptrace-abi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/ptrace-abi.h b/arch/x86/include/asm/ptrace-abi.h index 25f1bb8fc62..8e0f8d199e0 100644 --- a/arch/x86/include/asm/ptrace-abi.h +++ b/arch/x86/include/asm/ptrace-abi.h @@ -83,7 +83,7 @@ #ifdef CONFIG_X86_PTRACE_BTS #ifndef __ASSEMBLY__ -#include +#include /* configuration/status structure used in PTRACE_BTS_CONFIG and PTRACE_BTS_STATUS commands. -- cgit v1.2.3-70-g09d2 From e59afe6a21dce7bb3c63ba4f894a3195ae3d5529 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 22:53:49 +0530 Subject: headers_check fix: x86, sigcontext.h fix the following 'make headers_check' warnings: usr/include/asm/sigcontext.h:5: include of is preferred over usr/include/asm/sigcontext.h:24: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- arch/x86/include/asm/sigcontext.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h index 0afcb5e58ac..ec666491aaa 100644 --- a/arch/x86/include/asm/sigcontext.h +++ b/arch/x86/include/asm/sigcontext.h @@ -2,7 +2,7 @@ #define _ASM_X86_SIGCONTEXT_H #include -#include +#include #define FP_XSTATE_MAGIC1 0x46505853U #define FP_XSTATE_MAGIC2 0x46505845U -- cgit v1.2.3-70-g09d2 From 2de548faa78c650bb20c4680ee3a225cca33a45d Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 22:55:20 +0530 Subject: headers_check fix: x86, sigcontext32.h fix the following 'make headers_check' warning: usr/include/asm/sigcontext32.h:20: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- arch/x86/include/asm/sigcontext32.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/sigcontext32.h b/arch/x86/include/asm/sigcontext32.h index 6126188cf3a..ad1478c4ae1 100644 --- a/arch/x86/include/asm/sigcontext32.h +++ b/arch/x86/include/asm/sigcontext32.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_SIGCONTEXT32_H #define _ASM_X86_SIGCONTEXT32_H +#include + /* signal context for 32bit programs. */ #define X86_FXSR_MAGIC 0x0000 -- cgit v1.2.3-70-g09d2 From 7cff3608d2553a045b676fa81b0cf54e4f2cc5ce Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 30 Jan 2009 22:57:38 +0530 Subject: headers_check fix: x86, swab.h fix the following 'make headers_check' warnings: usr/include/asm/swab.h:4: include of is preferred over usr/include/asm/swab.h:7: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- arch/x86/include/asm/swab.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/swab.h b/arch/x86/include/asm/swab.h index 306d4178ffc..557cd9f0066 100644 --- a/arch/x86/include/asm/swab.h +++ b/arch/x86/include/asm/swab.h @@ -1,7 +1,7 @@ #ifndef _ASM_X86_SWAB_H #define _ASM_X86_SWAB_H -#include +#include #include static inline __attribute_const__ __u32 __arch_swab32(__u32 val) -- cgit v1.2.3-70-g09d2 From 7fc49f19813030f2e15ad2ccec5cb701f7f4a3ec Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Tue, 27 Jan 2009 21:45:57 +0100 Subject: x86 setup: fix asm constraints in vesa_store_edid Impact: fix potential miscompile (currently believed non-manifest) As the comment explains, the VBE DDC call can clobber any register. Tell the compiler about that fact. Signed-off-by: Andreas Schwab Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/boot/video-vesa.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c index 75115849af3..4a58c8ce3f6 100644 --- a/arch/x86/boot/video-vesa.c +++ b/arch/x86/boot/video-vesa.c @@ -269,9 +269,8 @@ void vesa_store_edid(void) we genuinely have to assume all registers are destroyed here. */ asm("pushw %%es; movw %2,%%es; "INT10"; popw %%es" - : "+a" (ax), "+b" (bx) - : "c" (cx), "D" (di) - : "esi"); + : "+a" (ax), "+b" (bx), "+c" (cx), "+D" (di) + : : "esi", "edx"); if (ax != 0x004f) return; /* No EDID */ @@ -285,9 +284,9 @@ void vesa_store_edid(void) dx = 0; /* EDID block number */ di =(size_t) &boot_params.edid_info; /* (ES:)Pointer to block */ asm(INT10 - : "+a" (ax), "+b" (bx), "+d" (dx), "=m" (boot_params.edid_info) - : "c" (cx), "D" (di) - : "esi"); + : "+a" (ax), "+b" (bx), "+d" (dx), "=m" (boot_params.edid_info), + "+c" (cx), "+D" (di) + : : "esi"); #endif /* CONFIG_FIRMWARE_EDID */ } -- cgit v1.2.3-70-g09d2 From 92ab78315c638515d0e81b0c70b2082f713582d9 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sat, 31 Jan 2009 17:24:43 +0100 Subject: x86/Voyager: make it build and boot [ mingo@elte.hu: these fixes are a subset of changes cherry-picked from: git://git.kernel.org:/pub/scm/linux/kernel/git/jejb/voyager-2.6.git They fix various problems that recent x86 changes caused in the Voyager subarchitecture: both APIC changes and cpumask changes and certain cleanups caused subarch assumptions to break. Most of these changes are obsolete as the subarch code has been removed from the x86 development tree - but we merge them upstream to make Voyager build and boot. ] Signed-off-by: James Bottomley Signed-off-by: Ingo Molnar --- arch/x86/kernel/irqinit_32.c | 12 ------------ arch/x86/mach-default/setup.c | 12 ++++++++++++ arch/x86/mach-voyager/setup.c | 12 +++++++++++- arch/x86/mach-voyager/voyager_smp.c | 25 ++++++++++++------------- 4 files changed, 35 insertions(+), 26 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 1507ad4e674..10a09c2f182 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -78,15 +78,6 @@ void __init init_ISA_irqs(void) } } -/* - * IRQ2 is cascade interrupt to second interrupt controller - */ -static struct irqaction irq2 = { - .handler = no_action, - .mask = CPU_MASK_NONE, - .name = "cascade", -}; - DEFINE_PER_CPU(vector_irq_t, vector_irq) = { [0 ... IRQ0_VECTOR - 1] = -1, [IRQ0_VECTOR] = 0, @@ -178,9 +169,6 @@ void __init native_init_IRQ(void) alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); #endif - if (!acpi_ioapic) - setup_irq(2, &irq2); - /* setup after call gates are initialised (usually add in * the architecture specific gates) */ diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index df167f26562..a265a7c6319 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c @@ -38,6 +38,15 @@ void __init pre_intr_init_hook(void) init_ISA_irqs(); } +/* + * IRQ2 is cascade interrupt to second interrupt controller + */ +static struct irqaction irq2 = { + .handler = no_action, + .mask = CPU_MASK_NONE, + .name = "cascade", +}; + /** * intr_init_hook - post gate setup interrupt initialisation * @@ -53,6 +62,9 @@ void __init intr_init_hook(void) if (x86_quirks->arch_intr_init()) return; } + if (!acpi_ioapic) + setup_irq(2, &irq2); + } /** diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c index a580b9562e7..d914a7996a6 100644 --- a/arch/x86/mach-voyager/setup.c +++ b/arch/x86/mach-voyager/setup.c @@ -33,13 +33,23 @@ void __init intr_init_hook(void) setup_irq(2, &irq2); } -void __init pre_setup_arch_hook(void) +static void voyager_disable_tsc(void) { /* Voyagers run their CPUs from independent clocks, so disable * the TSC code because we can't sync them */ setup_clear_cpu_cap(X86_FEATURE_TSC); } +void __init pre_setup_arch_hook(void) +{ + voyager_disable_tsc(); +} + +void __init pre_time_init_hook(void) +{ + voyager_disable_tsc(); +} + void __init trap_init_hook(void) { } diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 9840b7ec749..7ffcdeec463 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -81,7 +81,7 @@ static void enable_local_vic_irq(unsigned int irq); static void disable_local_vic_irq(unsigned int irq); static void before_handle_vic_irq(unsigned int irq); static void after_handle_vic_irq(unsigned int irq); -static void set_vic_irq_affinity(unsigned int irq, cpumask_t mask); +static void set_vic_irq_affinity(unsigned int irq, const struct cpumask *mask); static void ack_vic_irq(unsigned int irq); static void vic_enable_cpi(void); static void do_boot_cpu(__u8 cpuid); @@ -211,8 +211,6 @@ static __u32 cpu_booted_map; static cpumask_t smp_commenced_mask = CPU_MASK_NONE; /* This is for the new dynamic CPU boot code */ -cpumask_t cpu_callin_map = CPU_MASK_NONE; -cpumask_t cpu_callout_map = CPU_MASK_NONE; /* The per processor IRQ masks (these are usually kept in sync) */ static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned; @@ -378,7 +376,7 @@ void __init find_smp_config(void) cpus_addr(phys_cpu_present_map)[0] |= voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 3) << 24; - cpu_possible_map = phys_cpu_present_map; + init_cpu_possible(&phys_cpu_present_map); printk("VOYAGER SMP: phys_cpu_present_map = 0x%lx\n", cpus_addr(phys_cpu_present_map)[0]); /* Here we set up the VIC to enable SMP */ @@ -1599,16 +1597,16 @@ static void after_handle_vic_irq(unsigned int irq) * change the mask and then do an interrupt enable CPI to re-enable on * the selected processors */ -void set_vic_irq_affinity(unsigned int irq, cpumask_t mask) +void set_vic_irq_affinity(unsigned int irq, const struct cpumask *mask) { /* Only extended processors handle interrupts */ unsigned long real_mask; unsigned long irq_mask = 1 << irq; int cpu; - real_mask = cpus_addr(mask)[0] & voyager_extended_vic_processors; + real_mask = cpus_addr(*mask)[0] & voyager_extended_vic_processors; - if (cpus_addr(mask)[0] == 0) + if (cpus_addr(*mask)[0] == 0) /* can't have no CPUs to accept the interrupt -- extremely * bad things will happen */ return; @@ -1750,10 +1748,11 @@ static void __cpuinit voyager_smp_prepare_boot_cpu(void) init_gdt(smp_processor_id()); switch_to_new_gdt(); - cpu_set(smp_processor_id(), cpu_online_map); - cpu_set(smp_processor_id(), cpu_callout_map); - cpu_set(smp_processor_id(), cpu_possible_map); - cpu_set(smp_processor_id(), cpu_present_map); + cpu_online_map = cpumask_of_cpu(smp_processor_id()); + cpu_callout_map = cpumask_of_cpu(smp_processor_id()); + cpu_callin_map = CPU_MASK_NONE; + cpu_present_map = cpumask_of_cpu(smp_processor_id()); + } static int __cpuinit voyager_cpu_up(unsigned int cpu) @@ -1783,9 +1782,9 @@ void __init smp_setup_processor_id(void) x86_write_percpu(cpu_number, hard_smp_processor_id()); } -static void voyager_send_call_func(cpumask_t callmask) +static void voyager_send_call_func(const struct cpumask *callmask) { - __u32 mask = cpus_addr(callmask)[0] & ~(1 << smp_processor_id()); + __u32 mask = cpus_addr(*callmask)[0] & ~(1 << smp_processor_id()); send_CPI(mask, VIC_CALL_FUNCTION_CPI); } -- cgit v1.2.3-70-g09d2 From 9a8ecae87a2b698964b1db9ea504ba1099f479fc Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 31 Jan 2009 20:12:14 -0500 Subject: x86: add cache descriptors for Intel Core i7 Signed-off-by: Dave Jones Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 48533d77be7..da299eb85fc 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -36,8 +36,11 @@ static struct _cache_table cache_table[] __cpuinitdata = { { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ + { 0x09, LVL_1_INST, 32 }, /* 4-way set assoc, 64 byte line size */ { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */ { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */ + { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */ + { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */ { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */ @@ -85,6 +88,18 @@ static struct _cache_table cache_table[] __cpuinitdata = { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */ { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */ + { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */ + { 0xd1, LVL_3, 1024 }, /* 4-way set assoc, 64 byte line size */ + { 0xd2, LVL_3, 2048 }, /* 4-way set assoc, 64 byte line size */ + { 0xd6, LVL_3, 1024 }, /* 8-way set assoc, 64 byte line size */ + { 0xd7, LVL_3, 2038 }, /* 8-way set assoc, 64 byte line size */ + { 0xd8, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ + { 0xdc, LVL_3, 2048 }, /* 12-way set assoc, 64 byte line size */ + { 0xdd, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ + { 0xde, LVL_3, 8192 }, /* 12-way set assoc, 64 byte line size */ + { 0xe2, LVL_3, 2048 }, /* 16-way set assoc, 64 byte line size */ + { 0xe3, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ + { 0xe4, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ { 0x00, 0, 0} }; -- cgit v1.2.3-70-g09d2 From 10b888d6cec2688e65e9e128b14bf98ecd199da2 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 31 Jan 2009 14:50:07 -0800 Subject: irq, x86: fix lock status with numa_migrate_irq_desc Eric Paris reported: > I have an hp dl785g5 which is unable to successfully run > 2.6.29-0.66.rc3.fc11.x86_64 or 2.6.29-rc2-next-20090126. During bootup > (early in userspace daemons starting) I get the below BUG, which quickly > renders the machine dead. I assume it is because sparse_irq_lock never > gets released when the BUG kills that task. Adjust lock sequence when migrating a descriptor with CONFIG_NUMA_MIGRATE_IRQ_DESC enabled. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 5 +++-- kernel/irq/numa_migrate.c | 7 ++++++- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 1c4a1302536..9b0c480c383 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -2528,14 +2528,15 @@ static void irq_complete_move(struct irq_desc **descp) vector = ~get_irq_regs()->orig_ax; me = smp_processor_id(); + + if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) { #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC *descp = desc = move_irq_desc(desc, me); /* get the new one */ cfg = desc->chip_data; #endif - - if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) send_cleanup_vector(cfg); + } } #else static inline void irq_complete_move(struct irq_desc **descp) {} diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c index ecf765c6a77..acd88356ac7 100644 --- a/kernel/irq/numa_migrate.c +++ b/kernel/irq/numa_migrate.c @@ -71,7 +71,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, desc = irq_desc_ptrs[irq]; if (desc && old_desc != desc) - goto out_unlock; + goto out_unlock; node = cpu_to_node(cpu); desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); @@ -84,10 +84,15 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, init_copy_one_irq_desc(irq, old_desc, desc, cpu); irq_desc_ptrs[irq] = desc; + spin_unlock_irqrestore(&sparse_irq_lock, flags); /* free the old one */ free_one_irq_desc(old_desc, desc); + spin_unlock(&old_desc->lock); kfree(old_desc); + spin_lock(&desc->lock); + + return desc; out_unlock: spin_unlock_irqrestore(&sparse_irq_lock, flags); -- cgit v1.2.3-70-g09d2 From 8d4b4981195849dd50ed94be33ede926c6f41dcd Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 2 Feb 2009 21:48:33 +0530 Subject: headers_check fix: x86, prctl.h fix the following 'make headers_check' warning: usr/include/asm/prctl.h:10: extern's make no sense in userspace Signed-off-by: Jaswinder Singh Rajput --- arch/x86/include/asm/prctl.h | 4 ---- arch/x86/include/asm/syscalls.h | 1 + 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/prctl.h b/arch/x86/include/asm/prctl.h index a8894647dd9..3ac5032fae0 100644 --- a/arch/x86/include/asm/prctl.h +++ b/arch/x86/include/asm/prctl.h @@ -6,8 +6,4 @@ #define ARCH_GET_FS 0x1003 #define ARCH_GET_GS 0x1004 -#ifdef CONFIG_X86_64 -extern long sys_arch_prctl(int, unsigned long); -#endif /* CONFIG_X86_64 */ - #endif /* _ASM_X86_PRCTL_H */ diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index c0b0bda754e..e26d34b0bc7 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -74,6 +74,7 @@ asmlinkage long sys_vfork(struct pt_regs *); asmlinkage long sys_execve(char __user *, char __user * __user *, char __user * __user *, struct pt_regs *); +long sys_arch_prctl(int, unsigned long); /* kernel/ioport.c */ asmlinkage long sys_iopl(unsigned int, struct pt_regs *); -- cgit v1.2.3-70-g09d2 From 15c554439faedfa490389b31db893dc764245e88 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 2 Feb 2009 21:59:19 +0530 Subject: headers_check fix: x86, setup.h fix the following 'make headers_check' warning: usr/include/asm/setup.h:16: extern's make no sense in userspace usr/include/asm/setup.h:17: extern's make no sense in userspace usr/include/asm/setup.h:23: extern's make no sense in userspace usr/include/asm/setup.h:24: extern's make no sense in userspace usr/include/asm/setup.h:51: extern's make no sense in userspace usr/include/asm/setup.h:52: extern's make no sense in userspace Signed-off-by: Jaswinder Singh Rajput --- arch/x86/include/asm/setup.h | 45 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 23 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index ebe858cdc8a..5a3a1371575 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -5,23 +5,6 @@ #ifndef __ASSEMBLY__ -/* Interrupt control for vSMPowered x86_64 systems */ -void vsmp_init(void); - - -void setup_bios_corruption_check(void); - - -#ifdef CONFIG_X86_VISWS -extern void visws_early_detect(void); -extern int is_visws_box(void); -#else -static inline void visws_early_detect(void) { } -static inline int is_visws_box(void) { return 0; } -#endif - -extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); -extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip); /* * Any setup quirks to be performed? */ @@ -48,12 +31,6 @@ struct x86_quirks { int (*update_genapic)(void); }; -extern struct x86_quirks *x86_quirks; -extern unsigned long saved_video_mode; - -#ifndef CONFIG_PARAVIRT -#define paravirt_post_allocator_init() do {} while (0) -#endif #endif /* __ASSEMBLY__ */ #ifdef __KERNEL__ @@ -78,6 +55,28 @@ extern unsigned long saved_video_mode; #ifndef __ASSEMBLY__ #include +/* Interrupt control for vSMPowered x86_64 systems */ +void vsmp_init(void); + +void setup_bios_corruption_check(void); + +#ifdef CONFIG_X86_VISWS +extern void visws_early_detect(void); +extern int is_visws_box(void); +#else +static inline void visws_early_detect(void) { } +static inline int is_visws_box(void) { return 0; } +#endif + +extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); +extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip); +extern struct x86_quirks *x86_quirks; +extern unsigned long saved_video_mode; + +#ifndef CONFIG_PARAVIRT +#define paravirt_post_allocator_init() do {} while (0) +#endif + #ifndef _SETUP /* -- cgit v1.2.3-70-g09d2 From a67798cd7bb130bf37f5ffb28f3260f4c10232db Mon Sep 17 00:00:00 2001 From: Martin Hicks Date: Fri, 30 Jan 2009 10:50:54 -0600 Subject: x86: push old stack address on irqstack for unwinder Impact: Fixes dumpstack and KDB on 64 bits This re-adds the old stack pointer to the top of the irqstack to help with unwinding. It was removed in commit d99015b1abbad743aa049b439c1e1dede6d0fa49 as part of the save_args out-of-line work. Both dumpstack and KDB require this information. Signed-off-by: Martin Hicks Signed-off-by: H. Peter Anvin --- arch/x86/kernel/entry_64.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index e28c7a98779..a1346217e43 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -346,6 +346,7 @@ ENTRY(save_args) popq_cfi %rax /* move return address... */ mov %gs:pda_irqstackptr,%rsp EMPTY_FRAME 0 + pushq_cfi %rbp /* backlink for unwinder */ pushq_cfi %rax /* ... to the new stack */ /* * We entered an interrupt context - irqs are off: -- cgit v1.2.3-70-g09d2 From 06fc732c33a7ff5e4c91bcf4a6ca86b5e335ad9a Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 3 Feb 2009 16:01:46 -0800 Subject: xen: disable interrupts before saving in percpu Impact: Fix race condition xen_mc_batch has a small preempt race where it takes the address of a percpu variable immediately before disabling interrupts, thereby leaving a small window in which we may migrate to another cpu and save the flags in the wrong percpu variable. Disable interrupts before saving the old flags in a percpu. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/xen/multicalls.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index 85893824161..fa3e10725d9 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h @@ -19,8 +19,10 @@ DECLARE_PER_CPU(unsigned long, xen_mc_irq_flags); paired with xen_mc_issue() */ static inline void xen_mc_batch(void) { + unsigned long flags; /* need to disable interrupts until this entry is complete */ - local_irq_save(__get_cpu_var(xen_mc_irq_flags)); + local_irq_save(flags); + __get_cpu_var(xen_mc_irq_flags) = flags; } static inline struct multicall_space xen_mc_entry(size_t args) -- cgit v1.2.3-70-g09d2 From 858770619debfb9269add63e4ba8b7c6b5538dd1 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 3 Feb 2009 16:24:22 +0100 Subject: x86: APIC: enable workaround on AMD Fam10h CPUs Impact: fix to enable APIC for AMD Fam10h on chipsets with a missing/b0rked ACPI MP table (MADT) Booting a 32bit kernel on an AMD Fam10h CPU running on chipsets with missing/b0rked MP table leads to a hang pretty early in the boot process due to the APIC not being initialized. Fix that by falling back to the default APIC base address in 32bit code, as it is done in the 64bit codepath. Signed-off-by: Borislav Petkov Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 4b6df2469fe..115449f869e 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1436,7 +1436,7 @@ static int __init detect_init_APIC(void) switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) || - (boot_cpu_data.x86 == 15)) + (boot_cpu_data.x86 >= 15)) break; goto no_apic; case X86_VENDOR_INTEL: -- cgit v1.2.3-70-g09d2 From 62663ea8220366472fe20462831f2d69d7987439 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Tue, 3 Feb 2009 17:46:46 +0100 Subject: ACPI: cpufreq: Remove deprecated /proc/acpi/processor/../performance proc entries They were long enough set deprecated... Update Documentation/cpu-freq/users-guide.txt: The deprecated files listed there seen not to exist for some time anymore already. Signed-off-by: Thomas Renninger Signed-off-by: Len Brown --- Documentation/cpu-freq/user-guide.txt | 16 ------ arch/x86/kernel/cpu/cpufreq/Kconfig | 11 ---- drivers/acpi/processor_perflib.c | 105 ---------------------------------- 3 files changed, 132 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt index e3443ddcfb8..917918f84fc 100644 --- a/Documentation/cpu-freq/user-guide.txt +++ b/Documentation/cpu-freq/user-guide.txt @@ -195,19 +195,3 @@ scaling_setspeed. By "echoing" a new frequency into this you can change the speed of the CPU, but only within the limits of scaling_min_freq and scaling_max_freq. - - -3.2 Deprecated Interfaces -------------------------- - -Depending on your kernel configuration, you might find the following -cpufreq-related files: -/proc/cpufreq -/proc/sys/cpu/*/speed -/proc/sys/cpu/*/speed-min -/proc/sys/cpu/*/speed-max - -These are files for deprecated interfaces to cpufreq, which offer far -less functionality. Because of this, these interfaces aren't described -here. - diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig index efae3b22a0f..65792c2cc46 100644 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig @@ -245,17 +245,6 @@ config X86_E_POWERSAVER comment "shared options" -config X86_ACPI_CPUFREQ_PROC_INTF - bool "/proc/acpi/processor/../performance interface (deprecated)" - depends on PROC_FS - depends on X86_ACPI_CPUFREQ || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI - help - This enables the deprecated /proc/acpi/processor/../performance - interface. While it is helpful for debugging, the generic, - cross-architecture cpufreq interfaces should be used. - - If in doubt, say N. - config X86_SPEEDSTEP_LIB tristate default (X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD) diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index 846e227592d..9cc769b587f 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -31,14 +31,6 @@ #include #include -#ifdef CONFIG_X86_ACPI_CPUFREQ_PROC_INTF -#include -#include -#include - -#include -#endif - #ifdef CONFIG_X86 #include #endif @@ -434,96 +426,6 @@ int acpi_processor_notify_smm(struct module *calling_module) EXPORT_SYMBOL(acpi_processor_notify_smm); -#ifdef CONFIG_X86_ACPI_CPUFREQ_PROC_INTF -/* /proc/acpi/processor/../performance interface (DEPRECATED) */ - -static int acpi_processor_perf_open_fs(struct inode *inode, struct file *file); -static struct file_operations acpi_processor_perf_fops = { - .owner = THIS_MODULE, - .open = acpi_processor_perf_open_fs, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int acpi_processor_perf_seq_show(struct seq_file *seq, void *offset) -{ - struct acpi_processor *pr = seq->private; - int i; - - - if (!pr) - goto end; - - if (!pr->performance) { - seq_puts(seq, "\n"); - goto end; - } - - seq_printf(seq, "state count: %d\n" - "active state: P%d\n", - pr->performance->state_count, pr->performance->state); - - seq_puts(seq, "states:\n"); - for (i = 0; i < pr->performance->state_count; i++) - seq_printf(seq, - " %cP%d: %d MHz, %d mW, %d uS\n", - (i == pr->performance->state ? '*' : ' '), i, - (u32) pr->performance->states[i].core_frequency, - (u32) pr->performance->states[i].power, - (u32) pr->performance->states[i].transition_latency); - - end: - return 0; -} - -static int acpi_processor_perf_open_fs(struct inode *inode, struct file *file) -{ - return single_open(file, acpi_processor_perf_seq_show, - PDE(inode)->data); -} - -static void acpi_cpufreq_add_file(struct acpi_processor *pr) -{ - struct acpi_device *device = NULL; - - - if (acpi_bus_get_device(pr->handle, &device)) - return; - - /* add file 'performance' [R/W] */ - proc_create_data(ACPI_PROCESSOR_FILE_PERFORMANCE, S_IFREG | S_IRUGO, - acpi_device_dir(device), - &acpi_processor_perf_fops, acpi_driver_data(device)); - return; -} - -static void acpi_cpufreq_remove_file(struct acpi_processor *pr) -{ - struct acpi_device *device = NULL; - - - if (acpi_bus_get_device(pr->handle, &device)) - return; - - /* remove file 'performance' */ - remove_proc_entry(ACPI_PROCESSOR_FILE_PERFORMANCE, - acpi_device_dir(device)); - - return; -} - -#else -static void acpi_cpufreq_add_file(struct acpi_processor *pr) -{ - return; -} -static void acpi_cpufreq_remove_file(struct acpi_processor *pr) -{ - return; -} -#endif /* CONFIG_X86_ACPI_CPUFREQ_PROC_INTF */ - static int acpi_processor_get_psd(struct acpi_processor *pr) { int result = 0; @@ -747,14 +649,12 @@ err_ret: } EXPORT_SYMBOL(acpi_processor_preregister_performance); - int acpi_processor_register_performance(struct acpi_processor_performance *performance, unsigned int cpu) { struct acpi_processor *pr; - if (!(acpi_processor_ppc_status & PPC_REGISTERED)) return -EINVAL; @@ -781,8 +681,6 @@ acpi_processor_register_performance(struct acpi_processor_performance return -EIO; } - acpi_cpufreq_add_file(pr); - mutex_unlock(&performance_mutex); return 0; } @@ -795,7 +693,6 @@ acpi_processor_unregister_performance(struct acpi_processor_performance { struct acpi_processor *pr; - mutex_lock(&performance_mutex); pr = per_cpu(processors, cpu); @@ -808,8 +705,6 @@ acpi_processor_unregister_performance(struct acpi_processor_performance kfree(pr->performance->states); pr->performance = NULL; - acpi_cpufreq_remove_file(pr); - mutex_unlock(&performance_mutex); return; -- cgit v1.2.3-70-g09d2 From 48ec4d9537282a55d602136724f069faafcac8c8 Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Wed, 4 Feb 2009 15:54:45 -0500 Subject: x86, 64-bit: print DMI info in the oops trace This patch echoes what we already do on 32-bit since 90f7d25c6b672137344f447a30a9159945ffea72, and prints the DMI product name in show_regs, so that system specific problems can be easily identified. Signed-off-by: Kyle McMartin Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_64.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 416fb9282f4..85b4cb5c198 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -151,14 +152,18 @@ void __show_regs(struct pt_regs *regs, int all) unsigned long d0, d1, d2, d3, d6, d7; unsigned int fsindex, gsindex; unsigned int ds, cs, es; + const char *board; printk("\n"); print_modules(); - printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n", + board = dmi_get_system_info(DMI_PRODUCT_NAME); + if (!board) + board = ""; + printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n", current->pid, current->comm, print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); + init_utsname()->version, board); printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); printk_address(regs->ip, 1); printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, -- cgit v1.2.3-70-g09d2 From 36723bfe81f374fd8abc05a46f10a7cac2f01a75 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 4 Feb 2009 21:44:04 +0100 Subject: x86/Kconfig.cpu: make Kconfig help readable in the console Impact: cleanup Some lines exceed the 80 char width making them unreadable. Signed-off-by: Borislav Petkov Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.cpu | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 8078955845a..c98d52e8296 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -167,9 +167,9 @@ config MK7 config MK8 bool "Opteron/Athlon64/Hammer/K8" help - Select this for an AMD Opteron or Athlon64 Hammer-family processor. Enables - use of some extended instructions, and passes appropriate optimization - flags to GCC. + Select this for an AMD Opteron or Athlon64 Hammer-family processor. + Enables use of some extended instructions, and passes appropriate + optimization flags to GCC. config MCRUSOE bool "Crusoe" @@ -256,9 +256,11 @@ config MPSC config MCORE2 bool "Core 2/newer Xeon" help - Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and 53xx) - CPUs. You can distinguish newer from older Xeons by the CPU family - in /proc/cpuinfo. Newer ones have 6 and older ones 15 (not a typo) + + Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and + 53xx) CPUs. You can distinguish newer from older Xeons by the CPU + family in /proc/cpuinfo. Newer ones have 6 and older ones 15 + (not a typo) config GENERIC_CPU bool "Generic-x86-64" @@ -320,14 +322,14 @@ config X86_PPRO_FENCE bool "PentiumPro memory ordering errata workaround" depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1 help - Old PentiumPro multiprocessor systems had errata that could cause memory - operations to violate the x86 ordering standard in rare cases. Enabling this - option will attempt to work around some (but not all) occurances of - this problem, at the cost of much heavier spinlock and memory barrier - operations. + Old PentiumPro multiprocessor systems had errata that could cause + memory operations to violate the x86 ordering standard in rare cases. + Enabling this option will attempt to work around some (but not all) + occurances of this problem, at the cost of much heavier spinlock and + memory barrier operations. - If unsure, say n here. Even distro kernels should think twice before enabling - this: there are few systems, and an unlikely bug. + If unsure, say n here. Even distro kernels should think twice before + enabling this: there are few systems, and an unlikely bug. config X86_F00F_BUG def_bool y -- cgit v1.2.3-70-g09d2 From a6a95406c676ffe4f9dee708eb404a17c69f7fdd Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 4 Feb 2009 13:40:31 +0300 Subject: x86: fix hpet timer reinit for x86_64 There's a small problem with hpet_rtc_reinit function - it checks for the: hpet_readl(HPET_COUNTER) - hpet_t1_cmp > 0 to continue increasing both the HPET_T1_CMP (register) and the hpet_t1_cmp (variable). But since the HPET_COUNTER is always 32-bit, if the hpet_t1_cmp is 64-bit this condition will always be FALSE once the latter hits the 32-bit boundary, and we can have a situation, when we don't increase the HPET_T1_CMP register high enough. The result - timer stops ticking, since HPET_T1_CMP becomes less, than the COUNTER and never increased again. The solution is (based on Linus's suggestion) to not compare 64-bits (on 64-bit x86), but to do the comparison on 32-bit signed integers. Reported-by: Kirill Korotaev Signed-off-by: Pavel Emelyanov Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 64d5ad0b8ad..c761f914430 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -1075,7 +1075,7 @@ static void hpet_rtc_timer_reinit(void) hpet_t1_cmp += delta; hpet_writel(hpet_t1_cmp, HPET_T1_CMP); lost_ints++; - } while ((long)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0); + } while ((s32)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0); if (lost_ints) { if (hpet_rtc_flags & RTC_PIE) -- cgit v1.2.3-70-g09d2 From 4560839939f4b4a96e21e80584f87308ac93c1da Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Wed, 4 Feb 2009 16:44:01 -0700 Subject: x86: fix grammar in user-visible BIOS warning Fix user-visible grammo. Signed-off-by: Alex Chiang Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ae0d8042cf6..c461f6d6907 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -607,7 +607,7 @@ struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) { printk(KERN_NOTICE - "%s detected: BIOS may corrupt low RAM, working it around.\n", + "%s detected: BIOS may corrupt low RAM, working around it.\n", d->ident); e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED); -- cgit v1.2.3-70-g09d2 From b534816b552d35bbd3c60702139ed5c7da2f55c2 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 4 Feb 2009 18:33:38 -0800 Subject: x86: don't apply __supported_pte_mask to non-present ptes On an x86 system which doesn't support global mappings, __supported_pte_mask has _PAGE_GLOBAL clear, to make sure it never appears in the PTE. pfn_pte() and so on will enforce it with: static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { return __pte((((phys_addr_t)page_nr << PAGE_SHIFT) | pgprot_val(pgprot)) & __supported_pte_mask); } However, we overload _PAGE_GLOBAL with _PAGE_PROTNONE on non-present ptes to distinguish them from swap entries. However, applying __supported_pte_mask indiscriminately will clear the bit and corrupt the pte. I guess the best fix is to only apply __supported_pte_mask to present ptes. This seems like the right solution to me, as it means we can completely ignore the issue of overlaps between the present pte bits and the non-present pte-as-swap entry use of the bits. __supported_pte_mask contains the set of flags we support on the current hardware. We also use bits in the pte for things like logically present ptes with no permissions, and swap entries for swapped out pages. We should only apply __supported_pte_mask to present ptes, because otherwise we may destroy other information being stored in the ptes. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/pgtable.h | 26 ++++++++++++++++++++------ arch/x86/include/asm/xen/page.h | 2 +- 2 files changed, 21 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 06bbcbd66e9..4f5af8447d5 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -302,16 +302,30 @@ static inline pte_t pte_mkspecial(pte_t pte) extern pteval_t __supported_pte_mask; +/* + * Mask out unsupported bits in a present pgprot. Non-present pgprots + * can use those bits for other purposes, so leave them be. + */ +static inline pgprotval_t massage_pgprot(pgprot_t pgprot) +{ + pgprotval_t protval = pgprot_val(pgprot); + + if (protval & _PAGE_PRESENT) + protval &= __supported_pte_mask; + + return protval; +} + static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { - return __pte((((phys_addr_t)page_nr << PAGE_SHIFT) | - pgprot_val(pgprot)) & __supported_pte_mask); + return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) | + massage_pgprot(pgprot)); } static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { - return __pmd((((phys_addr_t)page_nr << PAGE_SHIFT) | - pgprot_val(pgprot)) & __supported_pte_mask); + return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) | + massage_pgprot(pgprot)); } static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) @@ -323,7 +337,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) * the newprot (if present): */ val &= _PAGE_CHG_MASK; - val |= pgprot_val(newprot) & (~_PAGE_CHG_MASK) & __supported_pte_mask; + val |= massage_pgprot(newprot) & ~_PAGE_CHG_MASK; return __pte(val); } @@ -339,7 +353,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) #define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK) -#define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) +#define canon_pgprot(p) __pgprot(massage_pgprot(p)) static inline int is_new_memtype_allowed(unsigned long flags, unsigned long new_flags) diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 7ef617ef1df..4bd990ee43d 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -137,7 +137,7 @@ static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot) pte_t pte; pte.pte = ((phys_addr_t)page_nr << PAGE_SHIFT) | - (pgprot_val(pgprot) & __supported_pte_mask); + massage_pgprot(pgprot); return pte; } -- cgit v1.2.3-70-g09d2 From 0cd5c3c80a0ebd68c08312fa7d8c13149cc61c4c Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Wed, 4 Feb 2009 14:29:19 -0800 Subject: x86: disable intel_iommu support by default Due to recurring issues with DMAR support on certain platforms. There's a number of filesystem corruption incidents reported: https://bugzilla.redhat.com/show_bug.cgi?id=479996 http://bugzilla.kernel.org/show_bug.cgi?id=12578 Provide a Kconfig option to change whether it is enabled by default. If disabled, it can still be reenabled by passing intel_iommu=on to the kernel. Keep the .config option off by default. Signed-off-by: Kyle McMartin Signed-off-by: Andrew Morton Acked-By: David Woodhouse Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 2 ++ arch/x86/Kconfig | 11 +++++++++++ drivers/pci/intel-iommu.c | 14 +++++++++++--- 3 files changed, 24 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index d8362cf9909..b182626739e 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -937,6 +937,8 @@ and is between 256 and 4096 characters. It is defined in the file intel_iommu= [DMAR] Intel IOMMU driver (DMAR) option + on + Enable intel iommu driver. off Disable intel iommu driver. igfx_off [Default Off] diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 73f7fe8fd4d..9c39095b33f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1802,6 +1802,17 @@ config DMAR and include PCI device scope covered by these DMA remapping devices. +config DMAR_DEFAULT_ON + def_bool n + prompt "Enable DMA Remapping Devices by default" + depends on DMAR + help + Selecting this option will enable a DMAR device at boot time if + one is found. If this option is not selected, DMAR support can + be enabled by passing intel_iommu=on to the kernel. It is + recommended you say N here while the DMAR code remains + experimental. + config DMAR_GFX_WA def_bool y prompt "Support for Graphics workaround" diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 3dfecb20d5e..f4b7c79023f 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -268,7 +268,12 @@ static long list_size; static void domain_remove_dev_info(struct dmar_domain *domain); -int dmar_disabled; +#ifdef CONFIG_DMAR_DEFAULT_ON +int dmar_disabled = 0; +#else +int dmar_disabled = 1; +#endif /*CONFIG_DMAR_DEFAULT_ON*/ + static int __initdata dmar_map_gfx = 1; static int dmar_forcedac; static int intel_iommu_strict; @@ -284,9 +289,12 @@ static int __init intel_iommu_setup(char *str) if (!str) return -EINVAL; while (*str) { - if (!strncmp(str, "off", 3)) { + if (!strncmp(str, "on", 2)) { + dmar_disabled = 0; + printk(KERN_INFO "Intel-IOMMU: enabled\n"); + } else if (!strncmp(str, "off", 3)) { dmar_disabled = 1; - printk(KERN_INFO"Intel-IOMMU: disabled\n"); + printk(KERN_INFO "Intel-IOMMU: disabled\n"); } else if (!strncmp(str, "igfx_off", 8)) { dmar_map_gfx = 0; printk(KERN_INFO -- cgit v1.2.3-70-g09d2 From 732553e567c2700ba5b9bccc6ec885c75779a94b Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Tue, 3 Feb 2009 17:46:43 +0100 Subject: [CPUFREQ] powernow-k8: Get transition latency from ACPI _PSS table At this time, the PowerNow! driver for K8 uses an experimentally derived formula to calculate transition latency. The value it provides is orders of magnitude too large on modern systems. This patch replaces the formula with ACPI _PSS latency values for more accuracy and better performance. I've tested it on two 2nd generation Opteron systems, a 3rd generation Operton system, and a Turion X2 without seeing any stability problems. Signed-off-by: Mark Langsdorf Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 5c28b37dea1..fb039cd345d 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -939,10 +939,25 @@ static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) free_cpumask_var(data->acpi_data.shared_cpu_map); } +static int get_transition_latency(struct powernow_k8_data *data) +{ + int max_latency = 0; + int i; + for (i = 0; i < data->acpi_data.state_count; i++) { + int cur_latency = data->acpi_data.states[i].transition_latency + + data->acpi_data.states[i].bus_master_latency; + if (cur_latency > max_latency) + max_latency = cur_latency; + } + /* value in usecs, needs to be in nanoseconds */ + return 1000 * max_latency; +} + #else static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } +static int get_transition_latency(struct powernow_k8_data *data) { return 0; } #endif /* CONFIG_X86_POWERNOW_K8_ACPI */ /* Take a frequency, and issue the fid/vid transition command */ @@ -1173,7 +1188,13 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) if (rc) { goto err_out; } - } + /* Take a crude guess here. + * That guess was in microseconds, so multiply with 1000 */ + pol->cpuinfo.transition_latency = ( + ((data->rvo + 8) * data->vstable * VST_UNITS_20US) + + ((1 << data->irt) * 30)) * 1000; + } else /* ACPI _PSS objects available */ + pol->cpuinfo.transition_latency = get_transition_latency(data); /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; @@ -1204,11 +1225,6 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) cpumask_copy(pol->cpus, &per_cpu(cpu_core_map, pol->cpu)); data->available_cores = pol->cpus; - /* Take a crude guess here. - * That guess was in microseconds, so multiply with 1000 */ - pol->cpuinfo.transition_latency = (((data->rvo + 8) * data->vstable * VST_UNITS_20US) - + (3 * (1 << data->irt) * 10)) * 1000; - if (cpu_family == CPU_HW_PSTATE) pol->cur = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); else -- cgit v1.2.3-70-g09d2 From 9be260a646bf76fa418ee519afa10196b3164681 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 5 Feb 2009 17:12:39 -0500 Subject: prevent kprobes from catching spurious page faults Prevent kprobes from catching spurious faults which will cause infinite recursive page-fault and memory corruption by stack overflow. Signed-off-by: Masami Hiramatsu Cc: [2.6.28.x] Signed-off-by: Linus Torvalds --- arch/x86/mm/fault.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 90dfae511a4..c76ef1d701c 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -603,8 +603,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) si_code = SEGV_MAPERR; - if (notify_page_fault(regs)) - return; if (unlikely(kmmio_fault(regs, address))) return; @@ -634,6 +632,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) if (spurious_fault(address, error_code)) return; + /* kprobes don't want to hook the spurious faults. */ + if (notify_page_fault(regs)) + return; /* * Don't take the mm semaphore here. If we fixup a prefetch * fault we could otherwise deadlock. @@ -641,6 +642,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) goto bad_area_nosemaphore; } + /* kprobes don't want to hook the spurious faults. */ + if (notify_page_fault(regs)) + return; /* * It's safe to allow irq's after cr2 has been saved and the -- cgit v1.2.3-70-g09d2 From ff08f76d738d0ec0f334b187f61e160caa321d54 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 4 Feb 2009 13:40:31 +0300 Subject: x86: clean up hpet timer reinit Implement Linus's suggestion: introduce the hpet_cnt_ahead() helper function to compare hpet time values - like other wrapping counter comparisons are abstracted away elsewhere. (jiffies, ktime_t, etc.) Reported-by: Kirill Korotaev Signed-off-by: Pavel Emelyanov Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index c761f914430..388254f69a2 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -897,13 +897,21 @@ static unsigned long hpet_rtc_flags; static int hpet_prev_update_sec; static struct rtc_time hpet_alarm_time; static unsigned long hpet_pie_count; -static unsigned long hpet_t1_cmp; +static u32 hpet_t1_cmp; static unsigned long hpet_default_delta; static unsigned long hpet_pie_delta; static unsigned long hpet_pie_limit; static rtc_irq_handler irq_handler; +/* + * Check that the hpet counter c1 is ahead of the c2 + */ +static inline int hpet_cnt_ahead(u32 c1, u32 c2) +{ + return (s32)(c2 - c1) < 0; +} + /* * Registers a IRQ handler. */ @@ -1075,7 +1083,7 @@ static void hpet_rtc_timer_reinit(void) hpet_t1_cmp += delta; hpet_writel(hpet_t1_cmp, HPET_T1_CMP); lost_ints++; - } while ((s32)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0); + } while (!hpet_cnt_ahead(hpet_t1_cmp, hpet_readl(HPET_COUNTER))); if (lost_ints) { if (hpet_rtc_flags & RTC_PIE) -- cgit v1.2.3-70-g09d2 From c09249f8d1b84344eca882547afdbffee8c09d14 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Fri, 6 Feb 2009 18:15:18 -0800 Subject: x86-64: fix int $0x80 -ENOSYS return One of my past fixes to this code introduced a different new bug. When using 32-bit "int $0x80" entry for a bogus syscall number, the return value is not correctly set to -ENOSYS. This only happens when neither syscall-audit nor syscall tracing is enabled (i.e., never seen if auditd ever started). Test program: /* gcc -o int80-badsys -m32 -g int80-badsys.c Run on x86-64 kernel. Note to reproduce the bug you need auditd never to have started. */ #include #include int main (void) { long res; asm ("int $0x80" : "=a" (res) : "0" (99999)); printf ("bad syscall returns %ld\n", res); return res != -ENOSYS; } The fix makes the int $0x80 path match the sysenter and syscall paths. Reported-by: Dmitry V. Levin Signed-off-by: Roland McGrath --- arch/x86/ia32/ia32entry.S | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 256b00b6189..5a0d76dc56a 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -418,9 +418,9 @@ ENTRY(ia32_syscall) orl $TS_COMPAT,TI_status(%r10) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) jnz ia32_tracesys -ia32_do_syscall: cmpl $(IA32_NR_syscalls-1),%eax - ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ + ja ia32_badsys +ia32_do_call: IA32_ARG_FIXUP call *ia32_sys_call_table(,%rax,8) # xxx: rip relative ia32_sysret: @@ -435,7 +435,9 @@ ia32_tracesys: call syscall_trace_enter LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST - jmp ia32_do_syscall + cmpl $(IA32_NR_syscalls-1),%eax + ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ + jmp ia32_do_call END(ia32_syscall) ia32_badsys: -- cgit v1.2.3-70-g09d2 From e736ad548db152776de61d7a26805cfae77ce5ce Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Fri, 6 Feb 2009 16:52:05 -0800 Subject: x86: add clflush before monitor for Intel 7400 series For Intel 7400 series CPUs, the recommendation is to use a clflush on the monitored address just before monitor and mwait pair [1]. This clflush makes sure that there are no false wakeups from mwait when the monitored address was recently written to. [1] "MONITOR/MWAIT Recommendations for Intel Xeon Processor 7400 series" section in specification update document of 7400 series http://download.intel.com/design/xeon/specupdt/32033601.pdf Signed-off-by: Venkatesh Pallipadi Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeature.h | 1 + arch/x86/kernel/cpu/intel.c | 3 +++ arch/x86/kernel/process.c | 6 ++++++ 3 files changed, 10 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index ea408dcba51..7301e60dc4a 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -93,6 +93,7 @@ #define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */ #define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */ #define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ +#define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 430e5c38a54..24ff26a38ad 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -291,6 +291,9 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) ds_init_intel(c); } + if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush) + set_cpu_cap(c, X86_FEATURE_CLFLUSH_MONITOR); + #ifdef CONFIG_X86_64 if (c->x86 == 15) c->x86_cache_alignment = c->x86_clflush_size * 2; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e68bb9e3086..6d12f7e37f8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -180,6 +180,9 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx) trace_power_start(&it, POWER_CSTATE, (ax>>4)+1); if (!need_resched()) { + if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) + clflush((void *)¤t_thread_info()->flags); + __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) @@ -194,6 +197,9 @@ static void mwait_idle(void) struct power_trace it; if (!need_resched()) { trace_power_start(&it, POWER_CSTATE, 1); + if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) + clflush((void *)¤t_thread_info()->flags); + __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) -- cgit v1.2.3-70-g09d2 From 3f4a739c6accd651a11fcf3c7a20ec8147c42660 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 8 Feb 2009 16:18:03 -0800 Subject: x86: find nr_irqs_gsi with mp_ioapic_routing Impact: find right nr_irqs_gsi on some systems. One test-system has gap between gsi's: [ 0.000000] ACPI: IOAPIC (id[0x04] address[0xfec00000] gsi_base[0]) [ 0.000000] IOAPIC[0]: apic_id 4, version 0, address 0xfec00000, GSI 0-23 [ 0.000000] ACPI: IOAPIC (id[0x05] address[0xfeafd000] gsi_base[48]) [ 0.000000] IOAPIC[1]: apic_id 5, version 0, address 0xfeafd000, GSI 48-54 [ 0.000000] ACPI: IOAPIC (id[0x06] address[0xfeafc000] gsi_base[56]) [ 0.000000] IOAPIC[2]: apic_id 6, version 0, address 0xfeafc000, GSI 56-62 ... [ 0.000000] nr_irqs_gsi: 38 So nr_irqs_gsi is not right. some irq for MSI will overwrite with io_apic. need to get that with acpi_probe_gsi when acpi io_apic is used Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mpspec.h | 6 ++++++ arch/x86/kernel/acpi/boot.c | 23 +++++++++++++++++++++++ arch/x86/kernel/io_apic.c | 20 +++++++++++++++----- 3 files changed, 44 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 62d14ce3cd0..bd22f2a3713 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -60,6 +60,7 @@ extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi); extern void mp_config_acpi_legacy_irqs(void); extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low); +extern int acpi_probe_gsi(void); #ifdef CONFIG_X86_IO_APIC extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, u32 gsi, int triggering, int polarity); @@ -71,6 +72,11 @@ mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, return 0; } #endif +#else /* !CONFIG_ACPI: */ +static inline int acpi_probe_gsi(void) +{ + return 0; +} #endif /* CONFIG_ACPI */ #define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index d37593c2f43..7678f10c456 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -973,6 +973,29 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) nr_ioapics++; } +int __init acpi_probe_gsi(void) +{ + int idx; + int gsi; + int max_gsi = 0; + + if (acpi_disabled) + return 0; + + if (!acpi_ioapic) + return 0; + + max_gsi = 0; + for (idx = 0; idx < nr_ioapics; idx++) { + gsi = mp_ioapic_routing[idx].gsi_end; + + if (gsi > max_gsi) + max_gsi = gsi; + } + + return max_gsi + 1; +} + static void assign_to_mp_irq(struct mp_config_intsrc *m, struct mp_config_intsrc *mp_irq) { diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 9b0c480c383..bc7ac4da90d 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3841,14 +3841,24 @@ int __init io_apic_get_redir_entries (int ioapic) void __init probe_nr_irqs_gsi(void) { - int idx; int nr = 0; - for (idx = 0; idx < nr_ioapics; idx++) - nr += io_apic_get_redir_entries(idx) + 1; - - if (nr > nr_irqs_gsi) + nr = acpi_probe_gsi(); + if (nr > nr_irqs_gsi) { nr_irqs_gsi = nr; + } else { + /* for acpi=off or acpi is not compiled in */ + int idx; + + nr = 0; + for (idx = 0; idx < nr_ioapics; idx++) + nr += io_apic_get_redir_entries(idx) + 1; + + if (nr > nr_irqs_gsi) + nr_irqs_gsi = nr; + } + + printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); } /* -------------------------------------------------------------------------- -- cgit v1.2.3-70-g09d2 From 55a8ba4b7f76bebd7e8ce3f74c04b140627a1bad Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Fri, 6 Feb 2009 10:29:35 -0800 Subject: x86, vmi: put a missing paravirt_release_pmd in pgd_dtor Commit 6194ba6ff6ccf8d5c54c857600843c67aa82c407 ("x86: don't special-case pmd allocations as much") made changes to the way we handle pmd allocations, and while doing that it dropped a call to paravirt_release_pd on the pgd page from the pgd_dtor code path. As a result of this missing release, the hypervisor is now unaware of the pgd page being freed, and as a result it ends up tracking this page as a page table page. After this the guest may start using the same page for other purposes, and depending on what use the page is put to, it may result in various performance and/or functional issues ( hangs, reboots). Since this release is only required for VMI, I now release the pgd page from the (vmi)_pgd_free hook. Signed-off-by: Alok N Kataria Acked-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Cc: --- arch/x86/kernel/vmi_32.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 1d3302cc2dd..bef58b4982d 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -320,6 +320,16 @@ static void vmi_release_pmd(unsigned long pfn) vmi_ops.release_page(pfn, VMI_PAGE_L2); } +/* + * We use the pgd_free hook for releasing the pgd page: + */ +static void vmi_pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + unsigned long pfn = __pa(pgd) >> PAGE_SHIFT; + + vmi_ops.release_page(pfn, VMI_PAGE_L2); +} + /* * Helper macros for MMU update flags. We can defer updates until a flush * or page invalidation only if the update is to the current address space @@ -762,6 +772,7 @@ static inline int __init activate_vmi(void) if (vmi_ops.release_page) { pv_mmu_ops.release_pte = vmi_release_pte; pv_mmu_ops.release_pmd = vmi_release_pmd; + pv_mmu_ops.pgd_free = vmi_pgd_free; } /* Set linear is needed in all cases */ -- cgit v1.2.3-70-g09d2 From 914c3d630b29b07d04908eab1b246812dadd5bd6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:39 +0900 Subject: x86: include correct %gs in a.out core dump Impact: dump the correct %gs into a.out core dump aout_dump_thread() read %gs but didn't include it in core dump. Fix it. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/a.out-core.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h index 37822206083..3c601f8224b 100644 --- a/arch/x86/include/asm/a.out-core.h +++ b/arch/x86/include/asm/a.out-core.h @@ -23,8 +23,6 @@ */ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) { - u16 gs; - /* changed the size calculations - should hopefully work better. lbt */ dump->magic = CMAGIC; dump->start_code = 0; @@ -57,7 +55,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) dump->regs.ds = (u16)regs->ds; dump->regs.es = (u16)regs->es; dump->regs.fs = (u16)regs->fs; - savesegment(gs, gs); + savesegment(gs, dump->regs.gs); dump->regs.orig_ax = regs->orig_ax; dump->regs.ip = regs->ip; dump->regs.cs = (u16)regs->cs; -- cgit v1.2.3-70-g09d2 From ae6af41f5a4841f06eb92bc86ad020ad44ae2a30 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:39 +0900 Subject: x86: math_emu info cleanup Impact: cleanup * Come on, struct info? s/struct info/struct math_emu_info/ * Use struct pt_regs and kernel_vm86_regs instead of defining its own register frame structure. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/math_emu.h | 29 +++++------------- arch/x86/include/asm/processor.h | 2 +- arch/x86/math-emu/fpu_entry.c | 2 +- arch/x86/math-emu/fpu_proto.h | 2 +- arch/x86/math-emu/fpu_system.h | 14 ++++----- arch/x86/math-emu/get_address.c | 63 +++++++++++++++++++--------------------- 6 files changed, 48 insertions(+), 64 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/math_emu.h b/arch/x86/include/asm/math_emu.h index 5a65b107ad5..302492c7795 100644 --- a/arch/x86/include/asm/math_emu.h +++ b/arch/x86/include/asm/math_emu.h @@ -1,31 +1,18 @@ #ifndef _ASM_X86_MATH_EMU_H #define _ASM_X86_MATH_EMU_H +#include +#include + /* This structure matches the layout of the data saved to the stack following a device-not-present interrupt, part of it saved automatically by the 80386/80486. */ -struct info { +struct math_emu_info { long ___orig_eip; - long ___ebx; - long ___ecx; - long ___edx; - long ___esi; - long ___edi; - long ___ebp; - long ___eax; - long ___ds; - long ___es; - long ___fs; - long ___orig_eax; - long ___eip; - long ___cs; - long ___eflags; - long ___esp; - long ___ss; - long ___vm86_es; /* This and the following only in vm86 mode */ - long ___vm86_ds; - long ___vm86_fs; - long ___vm86_gs; + union { + struct pt_regs regs; + struct kernel_vm86_regs vm86; + }; }; #endif /* _ASM_X86_MATH_EMU_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 091cd8855f2..3bfd5235a9e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -353,7 +353,7 @@ struct i387_soft_struct { u8 no_update; u8 rm; u8 alimit; - struct info *info; + struct math_emu_info *info; u32 entry_eip; }; diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index c7b06feb139..c268abe7225 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -659,7 +659,7 @@ static int valid_prefix(u_char *Byte, u_char __user **fpu_eip, } } -void math_abort(struct info *info, unsigned int signal) +void math_abort(struct math_emu_info *info, unsigned int signal) { FPU_EIP = FPU_ORIG_EIP; current->thread.trap_no = 16; diff --git a/arch/x86/math-emu/fpu_proto.h b/arch/x86/math-emu/fpu_proto.h index aa49b6a0d85..51bfbb61c5b 100644 --- a/arch/x86/math-emu/fpu_proto.h +++ b/arch/x86/math-emu/fpu_proto.h @@ -52,7 +52,7 @@ extern void fst_i_(void); extern void fstp_i(void); /* fpu_entry.c */ asmlinkage extern void math_emulate(long arg); -extern void math_abort(struct info *info, unsigned int signal); +extern void math_abort(struct math_emu_info *info, unsigned int signal); /* fpu_etc.c */ extern void FPU_etc(void); /* fpu_tags.c */ diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h index 13488fa153e..6729c6a3134 100644 --- a/arch/x86/math-emu/fpu_system.h +++ b/arch/x86/math-emu/fpu_system.h @@ -18,7 +18,7 @@ /* This sets the pointer FPU_info to point to the argument part of the stack frame of math_emulate() */ -#define SETUP_DATA_AREA(arg) FPU_info = (struct info *) &arg +#define SETUP_DATA_AREA(arg) FPU_info = (struct math_emu_info *) &arg /* s is always from a cpu register, and the cpu does bounds checking * during register load --> no further bounds checks needed */ @@ -38,12 +38,12 @@ #define I387 (current->thread.xstate) #define FPU_info (I387->soft.info) -#define FPU_CS (*(unsigned short *) &(FPU_info->___cs)) -#define FPU_SS (*(unsigned short *) &(FPU_info->___ss)) -#define FPU_DS (*(unsigned short *) &(FPU_info->___ds)) -#define FPU_EAX (FPU_info->___eax) -#define FPU_EFLAGS (FPU_info->___eflags) -#define FPU_EIP (FPU_info->___eip) +#define FPU_CS (*(unsigned short *) &(FPU_info->regs.cs)) +#define FPU_SS (*(unsigned short *) &(FPU_info->regs.ss)) +#define FPU_DS (*(unsigned short *) &(FPU_info->regs.ds)) +#define FPU_EAX (FPU_info->regs.ax) +#define FPU_EFLAGS (FPU_info->regs.flags) +#define FPU_EIP (FPU_info->regs.ip) #define FPU_ORIG_EIP (FPU_info->___orig_eip) #define FPU_lookahead (I387->soft.lookahead) diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c index d701e2b39e4..62daa7fcc44 100644 --- a/arch/x86/math-emu/get_address.c +++ b/arch/x86/math-emu/get_address.c @@ -29,42 +29,39 @@ #define FPU_WRITE_BIT 0x10 static int reg_offset[] = { - offsetof(struct info, ___eax), - offsetof(struct info, ___ecx), - offsetof(struct info, ___edx), - offsetof(struct info, ___ebx), - offsetof(struct info, ___esp), - offsetof(struct info, ___ebp), - offsetof(struct info, ___esi), - offsetof(struct info, ___edi) + offsetof(struct math_emu_info, regs.ax), + offsetof(struct math_emu_info, regs.cx), + offsetof(struct math_emu_info, regs.dx), + offsetof(struct math_emu_info, regs.bx), + offsetof(struct math_emu_info, regs.sp), + offsetof(struct math_emu_info, regs.bp), + offsetof(struct math_emu_info, regs.si), + offsetof(struct math_emu_info, regs.di) }; #define REG_(x) (*(long *)(reg_offset[(x)]+(u_char *) FPU_info)) static int reg_offset_vm86[] = { - offsetof(struct info, ___cs), - offsetof(struct info, ___vm86_ds), - offsetof(struct info, ___vm86_es), - offsetof(struct info, ___vm86_fs), - offsetof(struct info, ___vm86_gs), - offsetof(struct info, ___ss), - offsetof(struct info, ___vm86_ds) + offsetof(struct math_emu_info, regs.cs), + offsetof(struct math_emu_info, vm86.ds), + offsetof(struct math_emu_info, vm86.es), + offsetof(struct math_emu_info, vm86.fs), + offsetof(struct math_emu_info, vm86.gs), + offsetof(struct math_emu_info, regs.ss), + offsetof(struct math_emu_info, vm86.ds) }; #define VM86_REG_(x) (*(unsigned short *) \ (reg_offset_vm86[((unsigned)x)]+(u_char *) FPU_info)) -/* This dummy, gs is not saved on the stack. */ -#define ___GS ___ds - static int reg_offset_pm[] = { - offsetof(struct info, ___cs), - offsetof(struct info, ___ds), - offsetof(struct info, ___es), - offsetof(struct info, ___fs), - offsetof(struct info, ___GS), - offsetof(struct info, ___ss), - offsetof(struct info, ___ds) + offsetof(struct math_emu_info, regs.cs), + offsetof(struct math_emu_info, regs.ds), + offsetof(struct math_emu_info, regs.es), + offsetof(struct math_emu_info, regs.fs), + offsetof(struct math_emu_info, regs.ds), /* dummy, not saved on stack */ + offsetof(struct math_emu_info, regs.ss), + offsetof(struct math_emu_info, regs.ds) }; #define PM_REG_(x) (*(unsigned short *) \ @@ -349,34 +346,34 @@ void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip, } switch (rm) { case 0: - address += FPU_info->___ebx + FPU_info->___esi; + address += FPU_info->regs.bx + FPU_info->regs.si; break; case 1: - address += FPU_info->___ebx + FPU_info->___edi; + address += FPU_info->regs.bx + FPU_info->regs.di; break; case 2: - address += FPU_info->___ebp + FPU_info->___esi; + address += FPU_info->regs.bp + FPU_info->regs.si; if (addr_modes.override.segment == PREFIX_DEFAULT) addr_modes.override.segment = PREFIX_SS_; break; case 3: - address += FPU_info->___ebp + FPU_info->___edi; + address += FPU_info->regs.bp + FPU_info->regs.di; if (addr_modes.override.segment == PREFIX_DEFAULT) addr_modes.override.segment = PREFIX_SS_; break; case 4: - address += FPU_info->___esi; + address += FPU_info->regs.si; break; case 5: - address += FPU_info->___edi; + address += FPU_info->regs.di; break; case 6: - address += FPU_info->___ebp; + address += FPU_info->regs.bp; if (addr_modes.override.segment == PREFIX_DEFAULT) addr_modes.override.segment = PREFIX_SS_; break; case 7: - address += FPU_info->___ebx; + address += FPU_info->regs.bx; break; } -- cgit v1.2.3-70-g09d2 From a5ef7ca0e2636bad0ccd07b996d775348ae2b65e Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Sun, 8 Feb 2009 17:39:58 -0500 Subject: x86: spinlocks: define dummy __raw_spin_is_contended Architectures other than mips and x86 are not using ticket spinlocks. Therefore, the contention on the lock is meaningless, since there is nobody known to be waiting on it (arguably /fairly/ unfair locks). Dummy it out to return 0 on other architectures. Signed-off-by: Kyle McMartin Acked-by: Ralf Baechle Acked-by: Ingo Molnar Signed-off-by: Linus Torvalds --- arch/mips/include/asm/spinlock.h | 1 + arch/x86/include/asm/paravirt.h | 1 + arch/x86/include/asm/spinlock.h | 1 + include/linux/spinlock.h | 5 +++++ 4 files changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h index 1a1f320c30d..0884947ebe2 100644 --- a/arch/mips/include/asm/spinlock.h +++ b/arch/mips/include/asm/spinlock.h @@ -51,6 +51,7 @@ static inline int __raw_spin_is_contended(raw_spinlock_t *lock) return (((counters >> 14) - counters) & 0x1fff) > 1; } +#define __raw_spin_is_contended __raw_spin_is_contended static inline void __raw_spin_lock(raw_spinlock_t *lock) { diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index ba3e2ff6aed..c09a1412758 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -1402,6 +1402,7 @@ static inline int __raw_spin_is_contended(struct raw_spinlock *lock) { return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock); } +#define __raw_spin_is_contended __raw_spin_is_contended static __always_inline void __raw_spin_lock(struct raw_spinlock *lock) { diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index d17c91981da..8247e94ac6b 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -245,6 +245,7 @@ static inline int __raw_spin_is_contended(raw_spinlock_t *lock) { return __ticket_spin_is_contended(lock); } +#define __raw_spin_is_contended __raw_spin_is_contended static __always_inline void __raw_spin_lock(raw_spinlock_t *lock) { diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index e0c0fccced4..a0c66a2e00a 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -124,7 +124,12 @@ do { \ #ifdef CONFIG_GENERIC_LOCKBREAK #define spin_is_contended(lock) ((lock)->break_lock) #else + +#ifdef __raw_spin_is_contended #define spin_is_contended(lock) __raw_spin_is_contended(&(lock)->raw_lock) +#else +#define spin_is_contended(lock) (((void)(lock), 0)) +#endif /*__raw_spin_is_contended*/ #endif /** -- cgit v1.2.3-70-g09d2 From d315760ffa261c15ff92699ac6f514112543d7ca Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:39 +0900 Subject: x86: fix math_emu register frame access do_device_not_available() is the handler for #NM and it declares that it takes a unsigned long and calls math_emu(), which takes a long argument and surprisingly expects the stack frame starting at the zero argument would match struct math_emu_info, which isn't true regardless of configuration in the current code. This patch makes do_device_not_available() take struct pt_regs like other exception handlers and initialize struct math_emu_info with pointer to it and pass pointer to the math_emu_info to math_emulate() like normal C functions do. This way, unless gcc makes a copy of struct pt_regs in do_device_not_available(), the register frame is correctly accessed regardless of kernel configuration or compiler used. This doesn't fix all math_emu problems but it at least gets it somewhat working. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/math_emu.h | 4 +-- arch/x86/include/asm/traps.h | 4 +-- arch/x86/kernel/traps.c | 15 ++++++---- arch/x86/math-emu/fpu_entry.c | 4 +-- arch/x86/math-emu/fpu_proto.h | 2 +- arch/x86/math-emu/fpu_system.h | 16 ++++------ arch/x86/math-emu/get_address.c | 66 ++++++++++++++++++++--------------------- 7 files changed, 55 insertions(+), 56 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/math_emu.h b/arch/x86/include/asm/math_emu.h index 302492c7795..031f6266f42 100644 --- a/arch/x86/include/asm/math_emu.h +++ b/arch/x86/include/asm/math_emu.h @@ -11,8 +11,8 @@ struct math_emu_info { long ___orig_eip; union { - struct pt_regs regs; - struct kernel_vm86_regs vm86; + struct pt_regs *regs; + struct kernel_vm86_regs *vm86; }; }; #endif /* _ASM_X86_MATH_EMU_H */ diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 2ee0a3bceed..cf3bb053da0 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -41,7 +41,7 @@ dotraplinkage void do_int3(struct pt_regs *, long); dotraplinkage void do_overflow(struct pt_regs *, long); dotraplinkage void do_bounds(struct pt_regs *, long); dotraplinkage void do_invalid_op(struct pt_regs *, long); -dotraplinkage void do_device_not_available(struct pt_regs *, long); +dotraplinkage void do_device_not_available(struct pt_regs); dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *, long); dotraplinkage void do_invalid_TSS(struct pt_regs *, long); dotraplinkage void do_segment_not_present(struct pt_regs *, long); @@ -77,7 +77,7 @@ extern int panic_on_unrecovered_nmi; extern int kstack_depth_to_print; void math_error(void __user *); -asmlinkage void math_emulate(long); +void math_emulate(struct math_emu_info *); #ifdef CONFIG_X86_32 unsigned long patch_espfix_desc(unsigned long, unsigned long); #else diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 98c2d055284..7932338d7cb 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -896,7 +896,7 @@ asmlinkage void math_state_restore(void) EXPORT_SYMBOL_GPL(math_state_restore); #ifndef CONFIG_MATH_EMULATION -asmlinkage void math_emulate(long arg) +void math_emulate(struct math_emu_info *info) { printk(KERN_EMERG "math-emulation not enabled and no coprocessor found.\n"); @@ -906,16 +906,19 @@ asmlinkage void math_emulate(long arg) } #endif /* CONFIG_MATH_EMULATION */ -dotraplinkage void __kprobes -do_device_not_available(struct pt_regs *regs, long error) +dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs) { #ifdef CONFIG_X86_32 if (read_cr0() & X86_CR0_EM) { - conditional_sti(regs); - math_emulate(0); + struct math_emu_info info = { }; + + conditional_sti(®s); + + info.regs = ®s; + math_emulate(&info); } else { math_state_restore(); /* interrupts still off */ - conditional_sti(regs); + conditional_sti(®s); } #else math_state_restore(); diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index c268abe7225..5d87f586f8d 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -131,7 +131,7 @@ u_char emulating = 0; static int valid_prefix(u_char *Byte, u_char __user ** fpu_eip, overrides * override); -asmlinkage void math_emulate(long arg) +void math_emulate(struct math_emu_info *info) { u_char FPU_modrm, byte1; unsigned short code; @@ -161,7 +161,7 @@ asmlinkage void math_emulate(long arg) RE_ENTRANT_CHECK_ON; #endif /* RE_ENTRANT_CHECKING */ - SETUP_DATA_AREA(arg); + FPU_info = info; FPU_ORIG_EIP = FPU_EIP; diff --git a/arch/x86/math-emu/fpu_proto.h b/arch/x86/math-emu/fpu_proto.h index 51bfbb61c5b..9779df436b7 100644 --- a/arch/x86/math-emu/fpu_proto.h +++ b/arch/x86/math-emu/fpu_proto.h @@ -51,7 +51,7 @@ extern void ffreep(void); extern void fst_i_(void); extern void fstp_i(void); /* fpu_entry.c */ -asmlinkage extern void math_emulate(long arg); +extern void math_emulate(struct math_emu_info *info); extern void math_abort(struct math_emu_info *info, unsigned int signal); /* fpu_etc.c */ extern void FPU_etc(void); diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h index 6729c6a3134..50fa0ec2c8a 100644 --- a/arch/x86/math-emu/fpu_system.h +++ b/arch/x86/math-emu/fpu_system.h @@ -16,10 +16,6 @@ #include #include -/* This sets the pointer FPU_info to point to the argument part - of the stack frame of math_emulate() */ -#define SETUP_DATA_AREA(arg) FPU_info = (struct math_emu_info *) &arg - /* s is always from a cpu register, and the cpu does bounds checking * during register load --> no further bounds checks needed */ #define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->context.ldt)[(s) >> 3]) @@ -38,12 +34,12 @@ #define I387 (current->thread.xstate) #define FPU_info (I387->soft.info) -#define FPU_CS (*(unsigned short *) &(FPU_info->regs.cs)) -#define FPU_SS (*(unsigned short *) &(FPU_info->regs.ss)) -#define FPU_DS (*(unsigned short *) &(FPU_info->regs.ds)) -#define FPU_EAX (FPU_info->regs.ax) -#define FPU_EFLAGS (FPU_info->regs.flags) -#define FPU_EIP (FPU_info->regs.ip) +#define FPU_CS (*(unsigned short *) &(FPU_info->regs->cs)) +#define FPU_SS (*(unsigned short *) &(FPU_info->regs->ss)) +#define FPU_DS (*(unsigned short *) &(FPU_info->regs->ds)) +#define FPU_EAX (FPU_info->regs->ax) +#define FPU_EFLAGS (FPU_info->regs->flags) +#define FPU_EIP (FPU_info->regs->ip) #define FPU_ORIG_EIP (FPU_info->___orig_eip) #define FPU_lookahead (I387->soft.lookahead) diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c index 62daa7fcc44..420b3b6e391 100644 --- a/arch/x86/math-emu/get_address.c +++ b/arch/x86/math-emu/get_address.c @@ -29,43 +29,43 @@ #define FPU_WRITE_BIT 0x10 static int reg_offset[] = { - offsetof(struct math_emu_info, regs.ax), - offsetof(struct math_emu_info, regs.cx), - offsetof(struct math_emu_info, regs.dx), - offsetof(struct math_emu_info, regs.bx), - offsetof(struct math_emu_info, regs.sp), - offsetof(struct math_emu_info, regs.bp), - offsetof(struct math_emu_info, regs.si), - offsetof(struct math_emu_info, regs.di) + offsetof(struct pt_regs, ax), + offsetof(struct pt_regs, cx), + offsetof(struct pt_regs, dx), + offsetof(struct pt_regs, bx), + offsetof(struct pt_regs, sp), + offsetof(struct pt_regs, bp), + offsetof(struct pt_regs, si), + offsetof(struct pt_regs, di) }; -#define REG_(x) (*(long *)(reg_offset[(x)]+(u_char *) FPU_info)) +#define REG_(x) (*(long *)(reg_offset[(x)] + (u_char *)FPU_info->regs)) static int reg_offset_vm86[] = { - offsetof(struct math_emu_info, regs.cs), - offsetof(struct math_emu_info, vm86.ds), - offsetof(struct math_emu_info, vm86.es), - offsetof(struct math_emu_info, vm86.fs), - offsetof(struct math_emu_info, vm86.gs), - offsetof(struct math_emu_info, regs.ss), - offsetof(struct math_emu_info, vm86.ds) + offsetof(struct pt_regs, cs), + offsetof(struct kernel_vm86_regs, ds), + offsetof(struct kernel_vm86_regs, es), + offsetof(struct kernel_vm86_regs, fs), + offsetof(struct kernel_vm86_regs, gs), + offsetof(struct pt_regs, ss), + offsetof(struct kernel_vm86_regs, ds) }; #define VM86_REG_(x) (*(unsigned short *) \ - (reg_offset_vm86[((unsigned)x)]+(u_char *) FPU_info)) + (reg_offset_vm86[((unsigned)x)] + (u_char *)FPU_info->regs)) static int reg_offset_pm[] = { - offsetof(struct math_emu_info, regs.cs), - offsetof(struct math_emu_info, regs.ds), - offsetof(struct math_emu_info, regs.es), - offsetof(struct math_emu_info, regs.fs), - offsetof(struct math_emu_info, regs.ds), /* dummy, not saved on stack */ - offsetof(struct math_emu_info, regs.ss), - offsetof(struct math_emu_info, regs.ds) + offsetof(struct pt_regs, cs), + offsetof(struct pt_regs, ds), + offsetof(struct pt_regs, es), + offsetof(struct pt_regs, fs), + offsetof(struct pt_regs, ds), /* dummy, not saved on stack */ + offsetof(struct pt_regs, ss), + offsetof(struct pt_regs, ds) }; #define PM_REG_(x) (*(unsigned short *) \ - (reg_offset_pm[((unsigned)x)]+(u_char *) FPU_info)) + (reg_offset_pm[((unsigned)x)] + (u_char *)FPU_info->regs)) /* Decode the SIB byte. This function assumes mod != 0 */ static int sib(int mod, unsigned long *fpu_eip) @@ -346,34 +346,34 @@ void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip, } switch (rm) { case 0: - address += FPU_info->regs.bx + FPU_info->regs.si; + address += FPU_info->regs->bx + FPU_info->regs->si; break; case 1: - address += FPU_info->regs.bx + FPU_info->regs.di; + address += FPU_info->regs->bx + FPU_info->regs->di; break; case 2: - address += FPU_info->regs.bp + FPU_info->regs.si; + address += FPU_info->regs->bp + FPU_info->regs->si; if (addr_modes.override.segment == PREFIX_DEFAULT) addr_modes.override.segment = PREFIX_SS_; break; case 3: - address += FPU_info->regs.bp + FPU_info->regs.di; + address += FPU_info->regs->bp + FPU_info->regs->di; if (addr_modes.override.segment == PREFIX_DEFAULT) addr_modes.override.segment = PREFIX_SS_; break; case 4: - address += FPU_info->regs.si; + address += FPU_info->regs->si; break; case 5: - address += FPU_info->regs.di; + address += FPU_info->regs->di; break; case 6: - address += FPU_info->regs.bp; + address += FPU_info->regs->bp; if (addr_modes.override.segment == PREFIX_DEFAULT) addr_modes.override.segment = PREFIX_SS_; break; case 7: - address += FPU_info->regs.bx; + address += FPU_info->regs->bx; break; } -- cgit v1.2.3-70-g09d2 From b52af40923fc91a12e3c7152d833e0c0c6a508f6 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Tue, 10 Feb 2009 09:21:07 +0100 Subject: i8327: fix outb() parameter order In i8237A_resume(), when resetting the DMA controller, the parameters to dma_outb() were mixed up. Signed-off-by: Clemens Ladisch [ cleaned up the file a tiny bit. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/i8237.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/i8237.c b/arch/x86/kernel/i8237.c index dbd6c1d1b63..b42ca694dc6 100644 --- a/arch/x86/kernel/i8237.c +++ b/arch/x86/kernel/i8237.c @@ -28,10 +28,10 @@ static int i8237A_resume(struct sys_device *dev) flags = claim_dma_lock(); - dma_outb(DMA1_RESET_REG, 0); - dma_outb(DMA2_RESET_REG, 0); + dma_outb(0, DMA1_RESET_REG); + dma_outb(0, DMA2_RESET_REG); - for (i = 0;i < 8;i++) { + for (i = 0; i < 8; i++) { set_dma_addr(i, 0x000000); /* DMA count is a bit weird so this is not 0 */ set_dma_count(i, 1); @@ -51,14 +51,14 @@ static int i8237A_suspend(struct sys_device *dev, pm_message_t state) } static struct sysdev_class i8237_sysdev_class = { - .name = "i8237", - .suspend = i8237A_suspend, - .resume = i8237A_resume, + .name = "i8237", + .suspend = i8237A_suspend, + .resume = i8237A_resume, }; static struct sys_device device_i8237A = { - .id = 0, - .cls = &i8237_sysdev_class, + .id = 0, + .cls = &i8237_sysdev_class, }; static int __init i8237A_init_sysfs(void) @@ -68,5 +68,4 @@ static int __init i8237A_init_sysfs(void) error = sysdev_register(&device_i8237A); return error; } - device_initcall(i8237A_init_sysfs); -- cgit v1.2.3-70-g09d2 From e3944bfac961cd7fc82f3b3143c55dc375748569 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 10 Feb 2009 13:07:13 -0500 Subject: tracing, x86: fix fixup section to return to original code Impact: fix to prevent a kernel crash on fault If for some reason the pointer to the parent function on the stack takes a fault, the fix up code will not return back to the original faulting code. This can lead to unpredictable results and perhaps even a kernel panic. A fault should not happen, but if it does, we should simply disable the tracer, warn, and continue running the kernel. It should not lead to a kernel crash. Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 1b43086b097..9d549e4fe88 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -491,13 +491,15 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) "1: " _ASM_MOV " (%[parent_old]), %[old]\n" "2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n" " movl $0, %[faulted]\n" + "3:\n" ".section .fixup, \"ax\"\n" - "3: movl $1, %[faulted]\n" + "4: movl $1, %[faulted]\n" + " jmp 3b\n" ".previous\n" - _ASM_EXTABLE(1b, 3b) - _ASM_EXTABLE(2b, 3b) + _ASM_EXTABLE(1b, 4b) + _ASM_EXTABLE(2b, 4b) : [parent_replaced] "=r" (parent), [old] "=r" (old), [faulted] "=r" (faulted) -- cgit v1.2.3-70-g09d2 From f47a454db9129d2e61b224a40f4365cdd4f83042 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 10 Feb 2009 11:53:23 -0500 Subject: tracing, x86: fix constraint for parent variable The constraint used for retrieving and restoring the parent function pointer is incorrect. The parent variable is a pointer, and the address of the pointer is modified by the asm statement and not the pointer itself. It is incorrect to pass it in as an output constraint since the asm will never update the pointer. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 9d549e4fe88..231bdd3c5b1 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -488,8 +488,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) * ignore such a protection. */ asm volatile( - "1: " _ASM_MOV " (%[parent_old]), %[old]\n" - "2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n" + "1: " _ASM_MOV " (%[parent]), %[old]\n" + "2: " _ASM_MOV " %[return_hooker], (%[parent])\n" " movl $0, %[faulted]\n" "3:\n" @@ -501,9 +501,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) _ASM_EXTABLE(1b, 4b) _ASM_EXTABLE(2b, 4b) - : [parent_replaced] "=r" (parent), [old] "=r" (old), - [faulted] "=r" (faulted) - : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker) + : [old] "=r" (old), [faulted] "=r" (faulted) + : [parent] "r" (parent), [return_hooker] "r" (return_hooker) : "memory" ); -- cgit v1.2.3-70-g09d2 From 9f339e7028e2855717af3193c938f9960ad13b38 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Wed, 11 Feb 2009 15:10:27 +0100 Subject: x86, ptrace, mm: fix double-free on race Ptrace_detach() races with __ptrace_unlink() if the traced task is reaped while detaching. This might cause a double-free of the BTS buffer. Change the ptrace_detach() path to only do the memory accounting in ptrace_bts_detach() and leave the buffer free to ptrace_bts_untrace() which will be called from __ptrace_unlink(). The fix follows a proposal from Oleg Nesterov. Reported-by: Oleg Nesterov Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- arch/x86/kernel/ptrace.c | 16 ++++++++++------ include/linux/mm.h | 1 + mm/mlock.c | 7 ++++++- 3 files changed, 17 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 0a5df5f82fb..5a4c23d8989 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -810,12 +810,16 @@ static void ptrace_bts_untrace(struct task_struct *child) static void ptrace_bts_detach(struct task_struct *child) { - if (unlikely(child->bts)) { - ds_release_bts(child->bts); - child->bts = NULL; - - ptrace_bts_free_buffer(child); - } + /* + * Ptrace_detach() races with ptrace_untrace() in case + * the child dies and is reaped by another thread. + * + * We only do the memory accounting at this point and + * leave the buffer deallocation and the bts tracer + * release to ptrace_bts_untrace() which will be called + * later on with tasklist_lock held. + */ + release_locked_buffer(child->bts_buffer, child->bts_size); } #else static inline void ptrace_bts_fork(struct task_struct *tsk) {} diff --git a/include/linux/mm.h b/include/linux/mm.h index e8ddc98b840..3d7fb44d7d7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1305,5 +1305,6 @@ void vmemmap_populate_print_last(void); extern void *alloc_locked_buffer(size_t size); extern void free_locked_buffer(void *buffer, size_t size); +extern void release_locked_buffer(void *buffer, size_t size); #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/mm/mlock.c b/mm/mlock.c index 028ec482fdd..2b57f7e6039 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -657,7 +657,7 @@ void *alloc_locked_buffer(size_t size) return buffer; } -void free_locked_buffer(void *buffer, size_t size) +void release_locked_buffer(void *buffer, size_t size) { unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; @@ -667,6 +667,11 @@ void free_locked_buffer(void *buffer, size_t size) current->mm->locked_vm -= pgsz; up_write(¤t->mm->mmap_sem); +} + +void free_locked_buffer(void *buffer, size_t size) +{ + release_locked_buffer(buffer, size); kfree(buffer); } -- cgit v1.2.3-70-g09d2 From 4f06b0436b2ddbd3b67b10e77098a6862787b3eb Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 11 Feb 2009 09:32:19 -0800 Subject: x86/cpa: make sure cpa is safe to call in lazy mmu mode Impact: fix race leading to crash under KVM and Xen The CPA code may be called while we're in lazy mmu update mode - for example, when using DEBUG_PAGE_ALLOC and doing a slab allocation in an interrupt handler which interrupted a lazy mmu update. In this case, the in-memory pagetable state may be out of date due to pending queued updates. We need to flush any pending updates before inspecting the page table. Similarly, we must explicitly flush any modifications CPA may have made (which comes down to flushing queued operations when flushing the TLB). Signed-off-by: Jeremy Fitzhardinge Acked-by: Marcelo Tosatti Cc: Stable Kernel Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 84ba74820ad..f664bc1c409 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -576,6 +576,13 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) else address = *cpa->vaddr; + /* + * If we're called with lazy mmu updates enabled, the + * in-memory pte state may be stale. Flush pending updates to + * bring them up to date. + */ + arch_flush_lazy_mmu_mode(); + repeat: kpte = lookup_address(address, &level); if (!kpte) @@ -854,6 +861,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, } else cpa_flush_all(cache); + /* + * If we've been called with lazy mmu updates enabled, then + * make sure that everything gets flushed out before we + * return. + */ + arch_flush_lazy_mmu_mode(); + out: return ret; } -- cgit v1.2.3-70-g09d2 From be03d9e8022030c16abf534e33e185bfc3d40eef Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 11 Feb 2009 11:20:23 -0800 Subject: x86, pat: fix warn_on_once() while mapping 0-1MB range with /dev/mem Jeff Mahoney reported: > With Suse's hwinfo tool, on -tip: > WARNING: at arch/x86/mm/pat.c:637 reserve_pfn_range+0x5b/0x26d() reserve_pfn_range() is not tracking the memory range below 1MB as non-RAM and as such is inconsistent with similar checks in reserve_memtype() and free_memtype() Rename the pagerange_is_ram() to pat_pagerange_is_ram() and add the "track legacy 1MB region as non RAM" condition. And also, fix reserve_pfn_range() to return -EINVAL, when the pfn range is RAM. This is to be consistent with this API design. Reported-and-tested-by: Jeff Mahoney Signed-off-by: Suresh Siddha Signed-off-by: Venkatesh Pallipadi Signed-off-by: Ingo Molnar --- arch/x86/include/asm/page.h | 1 - arch/x86/mm/ioremap.c | 19 ----------- arch/x86/mm/pat.c | 83 ++++++++++++++++++++++++--------------------- 3 files changed, 45 insertions(+), 58 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index e9873a2e869..776579119a0 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -57,7 +57,6 @@ typedef struct { pgdval_t pgd; } pgd_t; typedef struct { pgprotval_t pgprot; } pgprot_t; extern int page_is_ram(unsigned long pagenr); -extern int pagerange_is_ram(unsigned long start, unsigned long end); extern int devmem_is_allowed(unsigned long pagenr); extern void map_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index af750ab973b..f45d5e29a72 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -134,25 +134,6 @@ int page_is_ram(unsigned long pagenr) return 0; } -int pagerange_is_ram(unsigned long start, unsigned long end) -{ - int ram_page = 0, not_rampage = 0; - unsigned long page_nr; - - for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT); - ++page_nr) { - if (page_is_ram(page_nr)) - ram_page = 1; - else - not_rampage = 1; - - if (ram_page == not_rampage) - return -1; - } - - return ram_page; -} - /* * Fix up the linear direct mapping of the kernel to avoid cache attribute * conflicts. diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 7b61036427d..aebbf67a79d 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -211,6 +211,33 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type) static struct memtype *cached_entry; static u64 cached_start; +static int pat_pagerange_is_ram(unsigned long start, unsigned long end) +{ + int ram_page = 0, not_rampage = 0; + unsigned long page_nr; + + for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT); + ++page_nr) { + /* + * For legacy reasons, physical address range in the legacy ISA + * region is tracked as non-RAM. This will allow users of + * /dev/mem to map portions of legacy ISA region, even when + * some of those portions are listed(or not even listed) with + * different e820 types(RAM/reserved/..) + */ + if (page_nr >= (ISA_END_ADDRESS >> PAGE_SHIFT) && + page_is_ram(page_nr)) + ram_page = 1; + else + not_rampage = 1; + + if (ram_page == not_rampage) + return -1; + } + + return ram_page; +} + /* * For RAM pages, mark the pages as non WB memory type using * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or @@ -336,20 +363,12 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, if (new_type) *new_type = actual_type; - /* - * For legacy reasons, some parts of the physical address range in the - * legacy 1MB region is treated as non-RAM (even when listed as RAM in - * the e820 tables). So we will track the memory attributes of this - * legacy 1MB region using the linear memtype_list always. - */ - if (end >= ISA_END_ADDRESS) { - is_range_ram = pagerange_is_ram(start, end); - if (is_range_ram == 1) - return reserve_ram_pages_type(start, end, req_type, - new_type); - else if (is_range_ram < 0) - return -EINVAL; - } + is_range_ram = pat_pagerange_is_ram(start, end); + if (is_range_ram == 1) + return reserve_ram_pages_type(start, end, req_type, + new_type); + else if (is_range_ram < 0) + return -EINVAL; new = kmalloc(sizeof(struct memtype), GFP_KERNEL); if (!new) @@ -446,19 +465,11 @@ int free_memtype(u64 start, u64 end) if (is_ISA_range(start, end - 1)) return 0; - /* - * For legacy reasons, some parts of the physical address range in the - * legacy 1MB region is treated as non-RAM (even when listed as RAM in - * the e820 tables). So we will track the memory attributes of this - * legacy 1MB region using the linear memtype_list always. - */ - if (end >= ISA_END_ADDRESS) { - is_range_ram = pagerange_is_ram(start, end); - if (is_range_ram == 1) - return free_ram_pages_type(start, end); - else if (is_range_ram < 0) - return -EINVAL; - } + is_range_ram = pat_pagerange_is_ram(start, end); + if (is_range_ram == 1) + return free_ram_pages_type(start, end); + else if (is_range_ram < 0) + return -EINVAL; spin_lock(&memtype_lock); list_for_each_entry(entry, &memtype_list, nd) { @@ -626,17 +637,13 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, unsigned long flags; unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); - is_ram = pagerange_is_ram(paddr, paddr + size); + is_ram = pat_pagerange_is_ram(paddr, paddr + size); - if (is_ram != 0) { - /* - * For mapping RAM pages, drivers need to call - * set_memory_[uc|wc|wb] directly, for reserve and free, before - * setting up the PTE. - */ - WARN_ON_ONCE(1); - return 0; - } + /* + * reserve_pfn_range() doesn't support RAM pages. + */ + if (is_ram != 0) + return -EINVAL; ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); if (ret) @@ -693,7 +700,7 @@ static void free_pfn_range(u64 paddr, unsigned long size) { int is_ram; - is_ram = pagerange_is_ram(paddr, paddr + size); + is_ram = pat_pagerange_is_ram(paddr, paddr + size); if (is_ram == 0) free_memtype(paddr, paddr + size); } -- cgit v1.2.3-70-g09d2 From d85cf93da66977dbc645352be1b2084a659d8a0b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 12 Feb 2009 10:02:56 -0800 Subject: x86/paravirt: make arch_flush_lazy_mmu/cpu disable preemption Impact: avoid access to percpu vars in preempible context They are intended to be used whenever there's the possibility that there's some stale state which is going to be overwritten with a queued update, or to force a state change when we may be in lazy mode. Either way, we could end up calling it with preemption enabled, so wrap the functions in their own little preempt-disable section so they can be safely called in any context (though preemption should never be enabled if we're actually in a lazy state). (Move out of line to avoid #include dependencies.) Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/paravirt.h | 17 ++--------------- arch/x86/kernel/paravirt.c | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index ba3e2ff6aed..a660eceaa27 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -1352,14 +1352,7 @@ static inline void arch_leave_lazy_cpu_mode(void) PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); } -static inline void arch_flush_lazy_cpu_mode(void) -{ - if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)) { - arch_leave_lazy_cpu_mode(); - arch_enter_lazy_cpu_mode(); - } -} - +void arch_flush_lazy_cpu_mode(void); #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE static inline void arch_enter_lazy_mmu_mode(void) @@ -1372,13 +1365,7 @@ static inline void arch_leave_lazy_mmu_mode(void) PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave); } -static inline void arch_flush_lazy_mmu_mode(void) -{ - if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU)) { - arch_leave_lazy_mmu_mode(); - arch_enter_lazy_mmu_mode(); - } -} +void arch_flush_lazy_mmu_mode(void); static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, unsigned long phys, pgprot_t flags) diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e4c8fb60887..dcba6c567a2 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -268,6 +268,30 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) return __get_cpu_var(paravirt_lazy_mode); } +void arch_flush_lazy_mmu_mode(void) +{ + preempt_disable(); + + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { + arch_leave_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode(); + } + + preempt_enable(); +} + +void arch_flush_lazy_cpu_mode(void) +{ + preempt_disable(); + + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { + arch_leave_lazy_cpu_mode(); + arch_enter_lazy_cpu_mode(); + } + + preempt_enable(); +} + struct pv_info pv_info = { .name = "bare hardware", .paravirt_enabled = 0, -- cgit v1.2.3-70-g09d2 From 34b0900d323122113683685b200aae9f9b75e63b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 12 Feb 2009 21:30:48 +0100 Subject: x86: warn if arch_flush_lazy_mmu_cpu is called in preemptible context Impact: Catch cases where lazy MMU state is active in a preemtible context arch_flush_lazy_mmu_cpu() has been changed to disable preemption so the checks in enter/leave will never trigger. Put the preemtible() check into arch_flush_lazy_mmu_cpu() to catch such cases. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/paravirt.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index dcba6c567a2..c6520a4e85d 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -273,6 +273,7 @@ void arch_flush_lazy_mmu_mode(void) preempt_disable(); if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { + WARN_ON(preempt_count() == 1); arch_leave_lazy_mmu_mode(); arch_enter_lazy_mmu_mode(); } @@ -285,6 +286,7 @@ void arch_flush_lazy_cpu_mode(void) preempt_disable(); if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { + WARN_ON(preempt_count() == 1); arch_leave_lazy_cpu_mode(); arch_enter_lazy_cpu_mode(); } -- cgit v1.2.3-70-g09d2 From 7ad9de6ac83bd825996d2de98c92e0f425c31050 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 12 Feb 2009 21:16:09 +0100 Subject: x86: CPA avoid repeated lazy mmu flush Impact: Flush the lazy MMU only once Pending mmu updates only need to be flushed once to bring the in-memory pagetable state up to date. Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index f664bc1c409..8ca0d8566fc 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -575,14 +575,6 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) address = cpa->vaddr[cpa->curpage]; else address = *cpa->vaddr; - - /* - * If we're called with lazy mmu updates enabled, the - * in-memory pte state may be stale. Flush pending updates to - * bring them up to date. - */ - arch_flush_lazy_mmu_mode(); - repeat: kpte = lookup_address(address, &level); if (!kpte) @@ -819,6 +811,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, vm_unmap_aliases(); + /* + * If we're called with lazy mmu updates enabled, the + * in-memory pte state may be stale. Flush pending updates to + * bring them up to date. + */ + arch_flush_lazy_mmu_mode(); + cpa.vaddr = addr; cpa.numpages = numpages; cpa.mask_set = mask_set; -- cgit v1.2.3-70-g09d2 From b13e24644c138d0ddbc451403c30a96b09bfd556 Mon Sep 17 00:00:00 2001 From: john stultz Date: Thu, 12 Feb 2009 18:48:53 -0800 Subject: x86, hpet: fix for LS21 + HPET = boot hang Between 2.6.23 and 2.6.24-rc1 a change was made that broke IBM LS21 systems that had the HPET enabled in the BIOS, resulting in boot hangs for x86_64. Specifically commit b8ce33590687888ebb900d09557b8807c4539022, which merges the i386 and x86_64 HPET code. Prior to this commit, when we setup the HPET timers in x86_64, we did the following: hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT, HPET_T0_CFG); However after the i386/x86_64 HPET merge, we do the following: cfg = hpet_readl(HPET_Tn_CFG(timer)); cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT; hpet_writel(cfg, HPET_Tn_CFG(timer)); However on LS21s with HPET enabled in the BIOS, the HPET_T0_CFG register boots with Level triggered interrupts (HPET_TN_LEVEL) enabled. This causes the periodic interrupt to be not so periodic, and that results in the boot time hang I reported earlier in the delay calibration. My fix: Always disable HPET_TN_LEVEL when setting up periodic mode. Signed-off-by: John Stultz Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 64d5ad0b8ad..5c8da2c2c18 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -269,6 +269,8 @@ static void hpet_set_mode(enum clock_event_mode mode, now = hpet_readl(HPET_COUNTER); cmp = now + (unsigned long) delta; cfg = hpet_readl(HPET_Tn_CFG(timer)); + /* Make sure we use edge triggered interrupts */ + cfg &= ~HPET_TN_LEVEL; cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT; hpet_writel(cfg, HPET_Tn_CFG(timer)); -- cgit v1.2.3-70-g09d2 From e49590b6dd356f8ef10ba3531a29e5086f6f2e3a Mon Sep 17 00:00:00 2001 From: Chris Ball Date: Fri, 13 Feb 2009 20:56:18 -0500 Subject: x86, olpc: fix model detection without OFW Impact: fix "garbled display, laptop is unusable" bug Commit e51a1ac2dfca9ad869471e88f828281db7e810c0 ("x86, olpc: fix endian bug in openfirmware workaround") breaks model comparison on OLPC; the value 0xc2 needs to be scaled up by olpc_board(). The pre-patch version was wrong, but accidentally worked anyway (big-endian 0xc2 is big enough to satisfy all other board revisions, but little endian 0xc2 is not). Signed-off-by: Chris Ball Cc: Andrew Morton Acked-by: Andres Salomon Cc: Harvey Harrison Signed-off-by: Ingo Molnar --- arch/x86/kernel/olpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c index 7a13fac63a1..4006c522adc 100644 --- a/arch/x86/kernel/olpc.c +++ b/arch/x86/kernel/olpc.c @@ -203,7 +203,7 @@ static void __init platform_detect(void) static void __init platform_detect(void) { /* stopgap until OFW support is added to the kernel */ - olpc_platform_info.boardrev = 0xc2; + olpc_platform_info.boardrev = olpc_board(0xc2); } #endif -- cgit v1.2.3-70-g09d2 From 7a0eb1960e8ddcb68ea631caf16815485af0e228 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 19 Jan 2009 14:57:52 +0200 Subject: KVM: Avoid using CONFIG_ in userspace visible headers Kconfig symbols are not available in userspace, and are not stripped by headers-install. Avoid their use by adding #defines in to suit each architecture. Signed-off-by: Avi Kivity --- arch/ia64/include/asm/kvm.h | 4 ++++ arch/x86/include/asm/kvm.h | 7 +++++++ include/linux/kvm.h | 10 +++++----- 3 files changed, 16 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h index 68aa6da807c..bfa86b6af7c 100644 --- a/arch/ia64/include/asm/kvm.h +++ b/arch/ia64/include/asm/kvm.h @@ -25,6 +25,10 @@ #include +/* Select x86 specific features in */ +#define __KVM_HAVE_IOAPIC +#define __KVM_HAVE_DEVICE_ASSIGNMENT + /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index d2e3bf3608a..886c9402ec4 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -9,6 +9,13 @@ #include #include +/* Select x86 specific features in */ +#define __KVM_HAVE_PIT +#define __KVM_HAVE_IOAPIC +#define __KVM_HAVE_DEVICE_ASSIGNMENT +#define __KVM_HAVE_MSI +#define __KVM_HAVE_USER_NMI + /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 5715f190760..0424326f167 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -58,10 +58,10 @@ struct kvm_irqchip { __u32 pad; union { char dummy[512]; /* reserving space */ -#ifdef CONFIG_X86 +#ifdef __KVM_HAVE_PIT struct kvm_pic_state pic; #endif -#if defined(CONFIG_X86) || defined(CONFIG_IA64) +#ifdef __KVM_HAVE_IOAPIC struct kvm_ioapic_state ioapic; #endif } chip; @@ -384,16 +384,16 @@ struct kvm_trace_rec { #define KVM_CAP_MP_STATE 14 #define KVM_CAP_COALESCED_MMIO 15 #define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */ -#if defined(CONFIG_X86)||defined(CONFIG_IA64) +#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT #define KVM_CAP_DEVICE_ASSIGNMENT 17 #endif #define KVM_CAP_IOMMU 18 -#if defined(CONFIG_X86) +#ifdef __KVM_HAVE_MSI #define KVM_CAP_DEVICE_MSI 20 #endif /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 -#if defined(CONFIG_X86) +#ifdef __KVM_HAVE_USER_NMI #define KVM_CAP_USER_NMI 22 #endif -- cgit v1.2.3-70-g09d2 From ad8ba2cd44d4d39fb3fe55d5dcc565b19fc3a7fb Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 6 Jan 2009 10:03:02 +0800 Subject: KVM: Add kvm_arch_sync_events to sync with asynchronize events kvm_arch_sync_events is introduced to quiet down all other events may happen contemporary with VM destroy process, like IRQ handler and work struct for assigned device. For kvm_arch_sync_events is called at the very beginning of kvm_destroy_vm(), so the state of KVM here is legal and can provide a environment to quiet down other events. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 4 ++++ arch/powerpc/kvm/powerpc.c | 4 ++++ arch/s390/kvm/kvm-s390.c | 4 ++++ arch/x86/kvm/x86.c | 4 ++++ include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 1 + 6 files changed, 18 insertions(+) (limited to 'arch/x86') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 4e586f6110a..28f982045f2 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1337,6 +1337,10 @@ static void kvm_release_vm_pages(struct kvm *kvm) } } +void kvm_arch_sync_events(struct kvm *kvm) +{ +} + void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_iommu_unmap_guest(kvm); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 2822c8ccfaa..5f81256287f 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -125,6 +125,10 @@ static void kvmppc_free_vcpus(struct kvm *kvm) } } +void kvm_arch_sync_events(struct kvm *kvm) +{ +} + void kvm_arch_destroy_vm(struct kvm *kvm) { kvmppc_free_vcpus(kvm); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index be8497186b9..0d33893e1e8 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -212,6 +212,10 @@ static void kvm_free_vcpus(struct kvm *kvm) } } +void kvm_arch_sync_events(struct kvm *kvm) +{ +} + void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_free_vcpus(kvm); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cc17546a240..b0fc079f1be 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4127,6 +4127,10 @@ static void kvm_free_vcpus(struct kvm *kvm) } +void kvm_arch_sync_events(struct kvm *kvm) +{ +} + void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_free_all_assigned_devices(kvm); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ec49d0be7f5..bf6f703642f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -285,6 +285,7 @@ void kvm_free_physmem(struct kvm *kvm); struct kvm *kvm_arch_create_vm(void); void kvm_arch_destroy_vm(struct kvm *kvm); void kvm_free_all_assigned_devices(struct kvm *kvm); +void kvm_arch_sync_events(struct kvm *kvm); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *v); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0b6f2f71271..68e3f1ec167 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -891,6 +891,7 @@ static void kvm_destroy_vm(struct kvm *kvm) { struct mm_struct *mm = kvm->mm; + kvm_arch_sync_events(kvm); spin_lock(&kvm_lock); list_del(&kvm->vm_list); spin_unlock(&kvm_lock); -- cgit v1.2.3-70-g09d2 From ba4cef31d5a397b64ba6d3ff713ce06c62f0c597 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 6 Jan 2009 10:03:03 +0800 Subject: KVM: Fix racy in kvm_free_assigned_irq In the past, kvm_get_kvm() and kvm_put_kvm() was called in assigned device irq handler and interrupt_work, in order to prevent cancel_work_sync() in kvm_free_assigned_irq got a illegal state when waiting for interrupt_work done. But it's tricky and still got two problems: 1. A bug ignored two conditions that cancel_work_sync() would return true result in a additional kvm_put_kvm(). 2. If interrupt type is MSI, we would got a window between cancel_work_sync() and free_irq(), which interrupt would be injected again... This patch discard the reference count used for irq handler and interrupt_work, and ensure the legal state by moving the free function at the very beginning of kvm_destroy_vm(). And the patch fix the second bug by disable irq before cancel_work_sync(), which may result in nested disable of irq but OK for we are going to free it. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 2 +- virt/kvm/kvm_main.c | 27 +++++++++++++++++++-------- 2 files changed, 20 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b0fc079f1be..fc3e329f6ad 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4129,11 +4129,11 @@ static void kvm_free_vcpus(struct kvm *kvm) void kvm_arch_sync_events(struct kvm *kvm) { + kvm_free_all_assigned_devices(kvm); } void kvm_arch_destroy_vm(struct kvm *kvm) { - kvm_free_all_assigned_devices(kvm); kvm_iommu_unmap_guest(kvm); kvm_free_pit(kvm); kfree(kvm->arch.vpic); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 68e3f1ec167..277ea7f39fc 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -173,7 +173,6 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) assigned_dev->host_irq_disabled = false; } mutex_unlock(&assigned_dev->kvm->lock); - kvm_put_kvm(assigned_dev->kvm); } static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) @@ -181,8 +180,6 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) struct kvm_assigned_dev_kernel *assigned_dev = (struct kvm_assigned_dev_kernel *) dev_id; - kvm_get_kvm(assigned_dev->kvm); - schedule_work(&assigned_dev->interrupt_work); disable_irq_nosync(irq); @@ -213,6 +210,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) } } +/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ static void kvm_free_assigned_irq(struct kvm *kvm, struct kvm_assigned_dev_kernel *assigned_dev) { @@ -228,11 +226,24 @@ static void kvm_free_assigned_irq(struct kvm *kvm, if (!assigned_dev->irq_requested_type) return; - if (cancel_work_sync(&assigned_dev->interrupt_work)) - /* We had pending work. That means we will have to take - * care of kvm_put_kvm. - */ - kvm_put_kvm(kvm); + /* + * In kvm_free_device_irq, cancel_work_sync return true if: + * 1. work is scheduled, and then cancelled. + * 2. work callback is executed. + * + * The first one ensured that the irq is disabled and no more events + * would happen. But for the second one, the irq may be enabled (e.g. + * for MSI). So we disable irq here to prevent further events. + * + * Notice this maybe result in nested disable if the interrupt type is + * INTx, but it's OK for we are going to free it. + * + * If this function is a part of VM destroy, please ensure that till + * now, the kvm state is still legal for probably we also have to wait + * interrupt_work done. + */ + disable_irq_nosync(assigned_dev->host_irq); + cancel_work_sync(&assigned_dev->interrupt_work); free_irq(assigned_dev->host_irq, (void *)assigned_dev); -- cgit v1.2.3-70-g09d2 From d2a8284e8fca9e2a938bee6cd074064d23864886 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 30 Dec 2008 15:55:05 -0200 Subject: KVM: PIT: fix i8254 pending count read count_load_time assignment is bogus: its supposed to contain what it means, not the expiration time. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/i8254.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index e665d1c623c..72bd275a9b5 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -207,7 +207,7 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps) hrtimer_add_expires_ns(&pt->timer, pt->period); pt->scheduled = hrtimer_get_expires_ns(&pt->timer); if (pt->period) - ps->channels[0].count_load_time = hrtimer_get_expires(&pt->timer); + ps->channels[0].count_load_time = ktime_get(); return (pt->period == 0 ? 0 : 1); } -- cgit v1.2.3-70-g09d2 From abe6655dd699069b53bcccbc65b2717f60203b12 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 10 Feb 2009 20:59:45 -0200 Subject: KVM: x86: disable kvmclock on non constant TSC hosts This is better. Currently, this code path is posing us big troubles, and we won't have a decent patch in time. So, temporarily disable it. Signed-off-by: Glauber Costa Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fc3e329f6ad..758b7a155ae 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -967,7 +967,6 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: case KVM_CAP_SET_TSS_ADDR: case KVM_CAP_EXT_CPUID: - case KVM_CAP_CLOCKSOURCE: case KVM_CAP_PIT: case KVM_CAP_NOP_IO_DELAY: case KVM_CAP_MP_STATE: @@ -992,6 +991,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_IOMMU: r = iommu_found(); break; + case KVM_CAP_CLOCKSOURCE: + r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC); + break; default: r = 0; break; -- cgit v1.2.3-70-g09d2 From 2aaf69dcee864f4fb6402638dd2f263324ac839f Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 21 Jan 2009 16:52:16 +0800 Subject: KVM: MMU: Map device MMIO as UC in EPT Software are not allow to access device MMIO using cacheable memory type, the patch limit MMIO region with UC and WC(guest can select WC using PAT and PCD/PWT). Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 9 +++++++-- arch/x86/kvm/vmx.c | 3 +-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 83f11c7474a..2d4477c7147 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1698,8 +1698,13 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, if (largepage) spte |= PT_PAGE_SIZE_MASK; if (mt_mask) { - mt_mask = get_memory_type(vcpu, gfn) << - kvm_x86_ops->get_mt_mask_shift(); + if (!kvm_is_mmio_pfn(pfn)) { + mt_mask = get_memory_type(vcpu, gfn) << + kvm_x86_ops->get_mt_mask_shift(); + mt_mask |= VMX_EPT_IGMT_BIT; + } else + mt_mask = MTRR_TYPE_UNCACHABLE << + kvm_x86_ops->get_mt_mask_shift(); spte |= mt_mask; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6259d746764..07491c9c6ed 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3687,8 +3687,7 @@ static int __init vmx_init(void) if (vm_need_ept()) { bypass_guest_pf = 0; kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | - VMX_EPT_WRITABLE_MASK | - VMX_EPT_IGMT_BIT); + VMX_EPT_WRITABLE_MASK); kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, VMX_EPT_EXECUTABLE_MASK, VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); -- cgit v1.2.3-70-g09d2 From b682b814e3cc340f905c14dff87ce8bdba7c5eba Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 10 Feb 2009 20:41:41 -0200 Subject: KVM: x86: fix LAPIC pending count calculation Simplify LAPIC TMCCT calculation by using hrtimer provided function to query remaining time until expiration. Fixes host hang with nested ESX. Signed-off-by: Marcelo Tosatti Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/x86/kvm/irq.c | 7 ------ arch/x86/kvm/irq.h | 1 - arch/x86/kvm/lapic.c | 66 ++++++++++++---------------------------------------- arch/x86/kvm/lapic.h | 2 -- arch/x86/kvm/svm.c | 1 - arch/x86/kvm/vmx.c | 1 - 6 files changed, 15 insertions(+), 63 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index c019b8edcdb..cf17ed52f6f 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -87,13 +87,6 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); -void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec) -{ - kvm_apic_timer_intr_post(vcpu, vec); - /* TODO: PIT, RTC etc. */ -} -EXPORT_SYMBOL_GPL(kvm_timer_intr_post); - void __kvm_migrate_timers(struct kvm_vcpu *vcpu) { __kvm_migrate_apic_timer(vcpu); diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 2bf32a03cee..82579ee538d 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -89,7 +89,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm) void kvm_pic_reset(struct kvm_kpic_state *s); -void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec); void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index afac68c0815..f0b67f2cdd6 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -35,6 +35,12 @@ #include "kvm_cache_regs.h" #include "irq.h" +#ifndef CONFIG_X86_64 +#define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) +#else +#define mod_64(x, y) ((x) % (y)) +#endif + #define PRId64 "d" #define PRIx64 "llx" #define PRIu64 "u" @@ -511,52 +517,22 @@ static void apic_send_ipi(struct kvm_lapic *apic) static u32 apic_get_tmcct(struct kvm_lapic *apic) { - u64 counter_passed; - ktime_t passed, now; + ktime_t remaining; + s64 ns; u32 tmcct; ASSERT(apic != NULL); - now = apic->timer.dev.base->get_time(); - tmcct = apic_get_reg(apic, APIC_TMICT); - /* if initial count is 0, current count should also be 0 */ - if (tmcct == 0) + if (apic_get_reg(apic, APIC_TMICT) == 0) return 0; - if (unlikely(ktime_to_ns(now) <= - ktime_to_ns(apic->timer.last_update))) { - /* Wrap around */ - passed = ktime_add(( { - (ktime_t) { - .tv64 = KTIME_MAX - - (apic->timer.last_update).tv64}; } - ), now); - apic_debug("time elapsed\n"); - } else - passed = ktime_sub(now, apic->timer.last_update); - - counter_passed = div64_u64(ktime_to_ns(passed), - (APIC_BUS_CYCLE_NS * apic->timer.divide_count)); - - if (counter_passed > tmcct) { - if (unlikely(!apic_lvtt_period(apic))) { - /* one-shot timers stick at 0 until reset */ - tmcct = 0; - } else { - /* - * periodic timers reset to APIC_TMICT when they - * hit 0. The while loop simulates this happening N - * times. (counter_passed %= tmcct) would also work, - * but might be slower or not work on 32-bit?? - */ - while (counter_passed > tmcct) - counter_passed -= tmcct; - tmcct -= counter_passed; - } - } else { - tmcct -= counter_passed; - } + remaining = hrtimer_expires_remaining(&apic->timer.dev); + if (ktime_to_ns(remaining) < 0) + remaining = ktime_set(0, 0); + + ns = mod_64(ktime_to_ns(remaining), apic->timer.period); + tmcct = div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->timer.divide_count)); return tmcct; } @@ -653,8 +629,6 @@ static void start_apic_timer(struct kvm_lapic *apic) { ktime_t now = apic->timer.dev.base->get_time(); - apic->timer.last_update = now; - apic->timer.period = apic_get_reg(apic, APIC_TMICT) * APIC_BUS_CYCLE_NS * apic->timer.divide_count; atomic_set(&apic->timer.pending, 0); @@ -1110,16 +1084,6 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) } } -void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec) -{ - struct kvm_lapic *apic = vcpu->arch.apic; - - if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec) - apic->timer.last_update = ktime_add_ns( - apic->timer.last_update, - apic->timer.period); -} - int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) { int vector = kvm_apic_has_interrupt(vcpu); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 81858881287..45ab6ee7120 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -12,7 +12,6 @@ struct kvm_lapic { atomic_t pending; s64 period; /* unit: ns */ u32 divide_count; - ktime_t last_update; struct hrtimer dev; } timer; struct kvm_vcpu *vcpu; @@ -42,7 +41,6 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu); int kvm_lapic_enabled(struct kvm_vcpu *vcpu); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); -void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec); void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1452851ae25..a9e769e4e25 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1600,7 +1600,6 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu) /* Okay, we can deliver the interrupt: grab it and update PIC state. */ intr_vector = kvm_cpu_get_interrupt(vcpu); svm_inject_irq(svm, intr_vector); - kvm_timer_intr_post(vcpu, intr_vector); out: update_cr8_intercept(vcpu); } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 07491c9c6ed..b1fe1422afb 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3285,7 +3285,6 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) } if (vcpu->arch.interrupt.pending) { vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); - kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr); if (kvm_cpu_has_interrupt(vcpu)) enable_irq_window(vcpu); } -- cgit v1.2.3-70-g09d2 From 516a1a7e9dc80358030fe01aabb3bedf882db9e2 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Feb 2009 02:32:07 +0200 Subject: KVM: VMX: Flush volatile msrs before emulating rdmsr Some msrs (notable MSR_KERNEL_GS_BASE) are held in the processor registers and need to be flushed to the vcpu struture before they can be read. This fixes cygwin longjmp() failure on Windows x64. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b1fe1422afb..7611af57682 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -903,6 +903,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) data = vmcs_readl(GUEST_SYSENTER_ESP); break; default: + vmx_load_host_state(to_vmx(vcpu)); msr = find_msr_entry(to_vmx(vcpu), msr_index); if (msr) { data = msr->data; -- cgit v1.2.3-70-g09d2 From be716615fe596ee117292dc615e95f707fb67fd1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Jan 2009 23:36:34 +0100 Subject: x86, vm86: fix preemption bug Commit 3d2a71a596bd9c761c8487a2178e95f8a61da083 ("x86, traps: converge do_debug handlers") changed the preemption disable logic of do_debug() so vm86_handle_trap() is called with preemption disabled resulting in: BUG: sleeping function called from invalid context at include/linux/kernel.h:155 in_atomic(): 1, irqs_disabled(): 0, pid: 3005, name: dosemu.bin Pid: 3005, comm: dosemu.bin Tainted: G W 2.6.29-rc1 #51 Call Trace: [] copy_to_user+0x33/0x108 [] save_v86_state+0x65/0x149 [] handle_vm86_trap+0x20/0x8f [] do_debug+0x15b/0x1a4 [] debug_stack_correct+0x27/0x2c [] sysenter_do_call+0x12/0x2f BUG: scheduling while atomic: dosemu.bin/3005/0x10000001 Restore the original calling convention and reenable preemption before calling handle_vm86_trap(). Reported-by: Michal Suchanek Cc: stable@kernel.org Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7932338d7cb..a9e7548e179 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -99,6 +99,12 @@ static inline void preempt_conditional_sti(struct pt_regs *regs) local_irq_enable(); } +static inline void conditional_cli(struct pt_regs *regs) +{ + if (regs->flags & X86_EFLAGS_IF) + local_irq_disable(); +} + static inline void preempt_conditional_cli(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) @@ -626,8 +632,10 @@ clear_dr7: #ifdef CONFIG_X86_32 debug_vm86: + /* reenable preemption: handle_vm86_trap() might sleep */ + dec_preempt_count(); handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); - preempt_conditional_cli(regs); + conditional_cli(regs); return; #endif -- cgit v1.2.3-70-g09d2 From 6bc5c366b1a45ca18fba6851f62db5743b3f6db5 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Sat, 3 Jan 2009 21:23:51 +0200 Subject: trace: mmiotrace to the tracer menu in Kconfig Impact: cosmetic change in Kconfig menu layout This patch was originally suggested by Peter Zijlstra, but seems it was forgotten. CONFIG_MMIOTRACE and CONFIG_MMIOTRACE_TEST were selectable directly under the Kernel hacking / debugging menu in the kernel configuration system. They were present only for x86 and x86_64. Other tracers that use the ftrace tracing framework are in their own sub-menu. This patch moves the mmiotrace configuration options there. Since the Kconfig file, where the tracer menu is, is not architecture specific, HAVE_MMIOTRACE_SUPPORT is introduced and provided only by x86/x86_64. CONFIG_MMIOTRACE now depends on it. Signed-off-by: Pekka Paalanen Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.debug | 24 ++---------------------- kernel/trace/Kconfig | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 10d6cc3fd05..e1983fa025d 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -174,28 +174,8 @@ config IOMMU_LEAK Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. -config MMIOTRACE - bool "Memory mapped IO tracing" - depends on DEBUG_KERNEL && PCI - select TRACING - help - Mmiotrace traces Memory Mapped I/O access and is meant for - debugging and reverse engineering. It is called from the ioremap - implementation and works via page faults. Tracing is disabled by - default and can be enabled at run-time. - - See Documentation/tracers/mmiotrace.txt. - If you are not helping to develop drivers, say N. - -config MMIOTRACE_TEST - tristate "Test module for mmiotrace" - depends on MMIOTRACE && m - help - This is a dumb module for testing mmiotrace. It is very dangerous - as it will write garbage to IO memory starting at a given address. - However, it should be safe to use on e.g. unused portion of VRAM. - - Say N, unless you absolutely know what you are doing. +config HAVE_MMIOTRACE_SUPPORT + def_bool y # # IO delay types: diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index e2a4ff6fc3a..58a93fbd68a 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -302,4 +302,27 @@ config FTRACE_STARTUP_TEST functioning properly. It will do tests on all the configured tracers of ftrace. +config MMIOTRACE + bool "Memory mapped IO tracing" + depends on HAVE_MMIOTRACE_SUPPORT && DEBUG_KERNEL && PCI + select TRACING + help + Mmiotrace traces Memory Mapped I/O access and is meant for + debugging and reverse engineering. It is called from the ioremap + implementation and works via page faults. Tracing is disabled by + default and can be enabled at run-time. + + See Documentation/tracers/mmiotrace.txt. + If you are not helping to develop drivers, say N. + +config MMIOTRACE_TEST + tristate "Test module for mmiotrace" + depends on MMIOTRACE && m + help + This is a dumb module for testing mmiotrace. It is very dangerous + as it will write garbage to IO memory starting at a given address. + However, it should be safe to use on e.g. unused portion of VRAM. + + Say N, unless you absolutely know what you are doing. + endmenu -- cgit v1.2.3-70-g09d2 From cb9eff097831007afb30d64373f29d99825d0068 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Thu, 12 Feb 2009 05:03:36 +0000 Subject: net: new user space API for time stamping of incoming and outgoing packets User space can request hardware and/or software time stamping. Reporting of the result(s) via a new control message is enabled separately for each field in the message because some of the fields may require additional computation and thus cause overhead. User space can tell the different kinds of time stamps apart and choose what suits its needs. When a TX timestamp operation is requested, the TX skb will be cloned and the clone will be time stamped (in hardware or software) and added to the socket error queue of the skb, if the skb has a socket associated with it. The actual TX timestamp will reach userspace as a RX timestamp on the cloned packet. If timestamping is requested and no timestamping is done in the device driver (potentially this may use hardware timestamping), it will be done in software after the device's start_hard_xmit routine. Signed-off-by: Patrick Ohly Signed-off-by: David S. Miller --- Documentation/networking/timestamping.txt | 178 +++++++ Documentation/networking/timestamping/.gitignore | 1 + Documentation/networking/timestamping/Makefile | 6 + .../networking/timestamping/timestamping.c | 533 +++++++++++++++++++++ arch/alpha/include/asm/socket.h | 3 + arch/arm/include/asm/socket.h | 3 + arch/avr32/include/asm/socket.h | 3 + arch/blackfin/include/asm/socket.h | 3 + arch/cris/include/asm/socket.h | 3 + arch/h8300/include/asm/socket.h | 3 + arch/ia64/include/asm/socket.h | 3 + arch/m68k/include/asm/socket.h | 3 + arch/mips/include/asm/socket.h | 3 + arch/parisc/include/asm/socket.h | 3 + arch/powerpc/include/asm/socket.h | 3 + arch/s390/include/asm/socket.h | 3 + arch/sh/include/asm/socket.h | 3 + arch/sparc/include/asm/socket.h | 3 + arch/x86/include/asm/socket.h | 3 + arch/xtensa/include/asm/socket.h | 3 + include/asm-frv/socket.h | 3 + include/asm-m32r/socket.h | 3 + include/asm-mn10300/socket.h | 3 + include/linux/errqueue.h | 1 + include/linux/net_tstamp.h | 104 ++++ include/linux/sockios.h | 3 + 26 files changed, 883 insertions(+) create mode 100644 Documentation/networking/timestamping.txt create mode 100644 Documentation/networking/timestamping/.gitignore create mode 100644 Documentation/networking/timestamping/Makefile create mode 100644 Documentation/networking/timestamping/timestamping.c create mode 100644 include/linux/net_tstamp.h (limited to 'arch/x86') diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt new file mode 100644 index 00000000000..a681a65b5bc --- /dev/null +++ b/Documentation/networking/timestamping.txt @@ -0,0 +1,178 @@ +The existing interfaces for getting network packages time stamped are: + +* SO_TIMESTAMP + Generate time stamp for each incoming packet using the (not necessarily + monotonous!) system time. Result is returned via recv_msg() in a + control message as timeval (usec resolution). + +* SO_TIMESTAMPNS + Same time stamping mechanism as SO_TIMESTAMP, but returns result as + timespec (nsec resolution). + +* IP_MULTICAST_LOOP + SO_TIMESTAMP[NS] + Only for multicasts: approximate send time stamp by receiving the looped + packet and using its receive time stamp. + +The following interface complements the existing ones: receive time +stamps can be generated and returned for arbitrary packets and much +closer to the point where the packet is really sent. Time stamps can +be generated in software (as before) or in hardware (if the hardware +has such a feature). + +SO_TIMESTAMPING: + +Instructs the socket layer which kind of information is wanted. The +parameter is an integer with some of the following bits set. Setting +other bits is an error and doesn't change the current state. + +SOF_TIMESTAMPING_TX_HARDWARE: try to obtain send time stamp in hardware +SOF_TIMESTAMPING_TX_SOFTWARE: if SOF_TIMESTAMPING_TX_HARDWARE is off or + fails, then do it in software +SOF_TIMESTAMPING_RX_HARDWARE: return the original, unmodified time stamp + as generated by the hardware +SOF_TIMESTAMPING_RX_SOFTWARE: if SOF_TIMESTAMPING_RX_HARDWARE is off or + fails, then do it in software +SOF_TIMESTAMPING_RAW_HARDWARE: return original raw hardware time stamp +SOF_TIMESTAMPING_SYS_HARDWARE: return hardware time stamp transformed to + the system time base +SOF_TIMESTAMPING_SOFTWARE: return system time stamp generated in + software + +SOF_TIMESTAMPING_TX/RX determine how time stamps are generated. +SOF_TIMESTAMPING_RAW/SYS determine how they are reported in the +following control message: + struct scm_timestamping { + struct timespec systime; + struct timespec hwtimetrans; + struct timespec hwtimeraw; + }; + +recvmsg() can be used to get this control message for regular incoming +packets. For send time stamps the outgoing packet is looped back to +the socket's error queue with the send time stamp(s) attached. It can +be received with recvmsg(flags=MSG_ERRQUEUE). The call returns the +original outgoing packet data including all headers preprended down to +and including the link layer, the scm_timestamping control message and +a sock_extended_err control message with ee_errno==ENOMSG and +ee_origin==SO_EE_ORIGIN_TIMESTAMPING. A socket with such a pending +bounced packet is ready for reading as far as select() is concerned. + +All three values correspond to the same event in time, but were +generated in different ways. Each of these values may be empty (= all +zero), in which case no such value was available. If the application +is not interested in some of these values, they can be left blank to +avoid the potential overhead of calculating them. + +systime is the value of the system time at that moment. This +corresponds to the value also returned via SO_TIMESTAMP[NS]. If the +time stamp was generated by hardware, then this field is +empty. Otherwise it is filled in if SOF_TIMESTAMPING_SOFTWARE is +set. + +hwtimeraw is the original hardware time stamp. Filled in if +SOF_TIMESTAMPING_RAW_HARDWARE is set. No assumptions about its +relation to system time should be made. + +hwtimetrans is the hardware time stamp transformed so that it +corresponds as good as possible to system time. This correlation is +not perfect; as a consequence, sorting packets received via different +NICs by their hwtimetrans may differ from the order in which they were +received. hwtimetrans may be non-monotonic even for the same NIC. +Filled in if SOF_TIMESTAMPING_SYS_HARDWARE is set. Requires support +by the network device and will be empty without that support. + + +SIOCSHWTSTAMP: + +Hardware time stamping must also be initialized for each device driver +that is expected to do hardware time stamping. The parameter is: + +struct hwtstamp_config { + int flags; /* no flags defined right now, must be zero */ + int tx_type; /* HWTSTAMP_TX_* */ + int rx_filter; /* HWTSTAMP_FILTER_* */ +}; + +Desired behavior is passed into the kernel and to a specific device by +calling ioctl(SIOCSHWTSTAMP) with a pointer to a struct ifreq whose +ifr_data points to a struct hwtstamp_config. The tx_type and +rx_filter are hints to the driver what it is expected to do. If +the requested fine-grained filtering for incoming packets is not +supported, the driver may time stamp more than just the requested types +of packets. + +A driver which supports hardware time stamping shall update the struct +with the actual, possibly more permissive configuration. If the +requested packets cannot be time stamped, then nothing should be +changed and ERANGE shall be returned (in contrast to EINVAL, which +indicates that SIOCSHWTSTAMP is not supported at all). + +Only a processes with admin rights may change the configuration. User +space is responsible to ensure that multiple processes don't interfere +with each other and that the settings are reset. + +/* possible values for hwtstamp_config->tx_type */ +enum { + /* + * no outgoing packet will need hardware time stamping; + * should a packet arrive which asks for it, no hardware + * time stamping will be done + */ + HWTSTAMP_TX_OFF, + + /* + * enables hardware time stamping for outgoing packets; + * the sender of the packet decides which are to be + * time stamped by setting SOF_TIMESTAMPING_TX_SOFTWARE + * before sending the packet + */ + HWTSTAMP_TX_ON, +}; + +/* possible values for hwtstamp_config->rx_filter */ +enum { + /* time stamp no incoming packet at all */ + HWTSTAMP_FILTER_NONE, + + /* time stamp any incoming packet */ + HWTSTAMP_FILTER_ALL, + + /* return value: time stamp all packets requested plus some others */ + HWTSTAMP_FILTER_SOME, + + /* PTP v1, UDP, any kind of event packet */ + HWTSTAMP_FILTER_PTP_V1_L4_EVENT, + + ... +}; + + +DEVICE IMPLEMENTATION + +A driver which supports hardware time stamping must support the +SIOCSHWTSTAMP ioctl. Time stamps for received packets must be stored +in the skb with skb_hwtstamp_set(). + +Time stamps for outgoing packets are to be generated as follows: +- In hard_start_xmit(), check if skb_hwtstamp_check_tx_hardware() + returns non-zero. If yes, then the driver is expected + to do hardware time stamping. +- If this is possible for the skb and requested, then declare + that the driver is doing the time stamping by calling + skb_hwtstamp_tx_in_progress(). A driver not supporting + hardware time stamping doesn't do that. A driver must never + touch sk_buff::tstamp! It is used to store how time stamping + for an outgoing packets is to be done. +- As soon as the driver has sent the packet and/or obtained a + hardware time stamp for it, it passes the time stamp back by + calling skb_hwtstamp_tx() with the original skb, the raw + hardware time stamp and a handle to the device (necessary + to convert the hardware time stamp to system time). If obtaining + the hardware time stamp somehow fails, then the driver should + not fall back to software time stamping. The rationale is that + this would occur at a later time in the processing pipeline + than other software time stamping and therefore could lead + to unexpected deltas between time stamps. +- If the driver did not call skb_hwtstamp_tx_in_progress(), then + dev_hard_start_xmit() checks whether software time stamping + is wanted as fallback and potentially generates the time stamp. diff --git a/Documentation/networking/timestamping/.gitignore b/Documentation/networking/timestamping/.gitignore new file mode 100644 index 00000000000..71e81eb2e22 --- /dev/null +++ b/Documentation/networking/timestamping/.gitignore @@ -0,0 +1 @@ +timestamping diff --git a/Documentation/networking/timestamping/Makefile b/Documentation/networking/timestamping/Makefile new file mode 100644 index 00000000000..2a1489fdc03 --- /dev/null +++ b/Documentation/networking/timestamping/Makefile @@ -0,0 +1,6 @@ +CPPFLAGS = -I../../../include + +timestamping: timestamping.c + +clean: + rm -f timestamping diff --git a/Documentation/networking/timestamping/timestamping.c b/Documentation/networking/timestamping/timestamping.c new file mode 100644 index 00000000000..43d14310421 --- /dev/null +++ b/Documentation/networking/timestamping/timestamping.c @@ -0,0 +1,533 @@ +/* + * This program demonstrates how the various time stamping features in + * the Linux kernel work. It emulates the behavior of a PTP + * implementation in stand-alone master mode by sending PTPv1 Sync + * multicasts once every second. It looks for similar packets, but + * beyond that doesn't actually implement PTP. + * + * Outgoing packets are time stamped with SO_TIMESTAMPING with or + * without hardware support. + * + * Incoming packets are time stamped with SO_TIMESTAMPING with or + * without hardware support, SIOCGSTAMP[NS] (per-socket time stamp) and + * SO_TIMESTAMP[NS]. + * + * Copyright (C) 2009 Intel Corporation. + * Author: Patrick Ohly + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "asm/types.h" +#include "linux/net_tstamp.h" +#include "linux/errqueue.h" + +#ifndef SO_TIMESTAMPING +# define SO_TIMESTAMPING 37 +# define SCM_TIMESTAMPING SO_TIMESTAMPING +#endif + +#ifndef SO_TIMESTAMPNS +# define SO_TIMESTAMPNS 35 +#endif + +#ifndef SIOCGSTAMPNS +# define SIOCGSTAMPNS 0x8907 +#endif + +#ifndef SIOCSHWTSTAMP +# define SIOCSHWTSTAMP 0x89b0 +#endif + +static void usage(const char *error) +{ + if (error) + printf("invalid option: %s\n", error); + printf("timestamping interface option*\n\n" + "Options:\n" + " IP_MULTICAST_LOOP - looping outgoing multicasts\n" + " SO_TIMESTAMP - normal software time stamping, ms resolution\n" + " SO_TIMESTAMPNS - more accurate software time stamping\n" + " SOF_TIMESTAMPING_TX_HARDWARE - hardware time stamping of outgoing packets\n" + " SOF_TIMESTAMPING_TX_SOFTWARE - software fallback for outgoing packets\n" + " SOF_TIMESTAMPING_RX_HARDWARE - hardware time stamping of incoming packets\n" + " SOF_TIMESTAMPING_RX_SOFTWARE - software fallback for incoming packets\n" + " SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n" + " SOF_TIMESTAMPING_SYS_HARDWARE - request reporting of transformed HW time stamps\n" + " SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n" + " SIOCGSTAMP - check last socket time stamp\n" + " SIOCGSTAMPNS - more accurate socket time stamp\n"); + exit(1); +} + +static void bail(const char *error) +{ + printf("%s: %s\n", error, strerror(errno)); + exit(1); +} + +static const unsigned char sync[] = { + 0x00, 0x01, 0x00, 0x01, + 0x5f, 0x44, 0x46, 0x4c, + 0x54, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x01, 0x01, + + /* fake uuid */ + 0x00, 0x01, + 0x02, 0x03, 0x04, 0x05, + + 0x00, 0x01, 0x00, 0x37, + 0x00, 0x00, 0x00, 0x08, + 0x00, 0x00, 0x00, 0x00, + 0x49, 0x05, 0xcd, 0x01, + 0x29, 0xb1, 0x8d, 0xb0, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, + + /* fake uuid */ + 0x00, 0x01, + 0x02, 0x03, 0x04, 0x05, + + 0x00, 0x00, 0x00, 0x37, + 0x00, 0x00, 0x00, 0x04, + 0x44, 0x46, 0x4c, 0x54, + 0x00, 0x00, 0xf0, 0x60, + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0xf0, 0x60, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x04, + 0x44, 0x46, 0x4c, 0x54, + 0x00, 0x01, + + /* fake uuid */ + 0x00, 0x01, + 0x02, 0x03, 0x04, 0x05, + + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 +}; + +static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len) +{ + struct timeval now; + int res; + + res = sendto(sock, sync, sizeof(sync), 0, + addr, addr_len); + gettimeofday(&now, 0); + if (res < 0) + printf("%s: %s\n", "send", strerror(errno)); + else + printf("%ld.%06ld: sent %d bytes\n", + (long)now.tv_sec, (long)now.tv_usec, + res); +} + +static void printpacket(struct msghdr *msg, int res, + char *data, + int sock, int recvmsg_flags, + int siocgstamp, int siocgstampns) +{ + struct sockaddr_in *from_addr = (struct sockaddr_in *)msg->msg_name; + struct cmsghdr *cmsg; + struct timeval tv; + struct timespec ts; + struct timeval now; + + gettimeofday(&now, 0); + + printf("%ld.%06ld: received %s data, %d bytes from %s, %d bytes control messages\n", + (long)now.tv_sec, (long)now.tv_usec, + (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular", + res, + inet_ntoa(from_addr->sin_addr), + msg->msg_controllen); + for (cmsg = CMSG_FIRSTHDR(msg); + cmsg; + cmsg = CMSG_NXTHDR(msg, cmsg)) { + printf(" cmsg len %d: ", cmsg->cmsg_len); + switch (cmsg->cmsg_level) { + case SOL_SOCKET: + printf("SOL_SOCKET "); + switch (cmsg->cmsg_type) { + case SO_TIMESTAMP: { + struct timeval *stamp = + (struct timeval *)CMSG_DATA(cmsg); + printf("SO_TIMESTAMP %ld.%06ld", + (long)stamp->tv_sec, + (long)stamp->tv_usec); + break; + } + case SO_TIMESTAMPNS: { + struct timespec *stamp = + (struct timespec *)CMSG_DATA(cmsg); + printf("SO_TIMESTAMPNS %ld.%09ld", + (long)stamp->tv_sec, + (long)stamp->tv_nsec); + break; + } + case SO_TIMESTAMPING: { + struct timespec *stamp = + (struct timespec *)CMSG_DATA(cmsg); + printf("SO_TIMESTAMPING "); + printf("SW %ld.%09ld ", + (long)stamp->tv_sec, + (long)stamp->tv_nsec); + stamp++; + printf("HW transformed %ld.%09ld ", + (long)stamp->tv_sec, + (long)stamp->tv_nsec); + stamp++; + printf("HW raw %ld.%09ld", + (long)stamp->tv_sec, + (long)stamp->tv_nsec); + break; + } + default: + printf("type %d", cmsg->cmsg_type); + break; + } + break; + case IPPROTO_IP: + printf("IPPROTO_IP "); + switch (cmsg->cmsg_type) { + case IP_RECVERR: { + struct sock_extended_err *err = + (struct sock_extended_err *)CMSG_DATA(cmsg); + printf("IP_RECVERR ee_errno '%s' ee_origin %d => %s", + strerror(err->ee_errno), + err->ee_origin, +#ifdef SO_EE_ORIGIN_TIMESTAMPING + err->ee_origin == SO_EE_ORIGIN_TIMESTAMPING ? + "bounced packet" : "unexpected origin" +#else + "probably SO_EE_ORIGIN_TIMESTAMPING" +#endif + ); + if (res < sizeof(sync)) + printf(" => truncated data?!"); + else if (!memcmp(sync, data + res - sizeof(sync), + sizeof(sync))) + printf(" => GOT OUR DATA BACK (HURRAY!)"); + break; + } + case IP_PKTINFO: { + struct in_pktinfo *pktinfo = + (struct in_pktinfo *)CMSG_DATA(cmsg); + printf("IP_PKTINFO interface index %u", + pktinfo->ipi_ifindex); + break; + } + default: + printf("type %d", cmsg->cmsg_type); + break; + } + break; + default: + printf("level %d type %d", + cmsg->cmsg_level, + cmsg->cmsg_type); + break; + } + printf("\n"); + } + + if (siocgstamp) { + if (ioctl(sock, SIOCGSTAMP, &tv)) + printf(" %s: %s\n", "SIOCGSTAMP", strerror(errno)); + else + printf("SIOCGSTAMP %ld.%06ld\n", + (long)tv.tv_sec, + (long)tv.tv_usec); + } + if (siocgstampns) { + if (ioctl(sock, SIOCGSTAMPNS, &ts)) + printf(" %s: %s\n", "SIOCGSTAMPNS", strerror(errno)); + else + printf("SIOCGSTAMPNS %ld.%09ld\n", + (long)ts.tv_sec, + (long)ts.tv_nsec); + } +} + +static void recvpacket(int sock, int recvmsg_flags, + int siocgstamp, int siocgstampns) +{ + char data[256]; + struct msghdr msg; + struct iovec entry; + struct sockaddr_in from_addr; + struct { + struct cmsghdr cm; + char control[512]; + } control; + int res; + + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = &entry; + msg.msg_iovlen = 1; + entry.iov_base = data; + entry.iov_len = sizeof(data); + msg.msg_name = (caddr_t)&from_addr; + msg.msg_namelen = sizeof(from_addr); + msg.msg_control = &control; + msg.msg_controllen = sizeof(control); + + res = recvmsg(sock, &msg, recvmsg_flags|MSG_DONTWAIT); + if (res < 0) { + printf("%s %s: %s\n", + "recvmsg", + (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular", + strerror(errno)); + } else { + printpacket(&msg, res, data, + sock, recvmsg_flags, + siocgstamp, siocgstampns); + } +} + +int main(int argc, char **argv) +{ + int so_timestamping_flags = 0; + int so_timestamp = 0; + int so_timestampns = 0; + int siocgstamp = 0; + int siocgstampns = 0; + int ip_multicast_loop = 0; + char *interface; + int i; + int enabled = 1; + int sock; + struct ifreq device; + struct ifreq hwtstamp; + struct hwtstamp_config hwconfig, hwconfig_requested; + struct sockaddr_in addr; + struct ip_mreq imr; + struct in_addr iaddr; + int val; + socklen_t len; + struct timeval next; + + if (argc < 2) + usage(0); + interface = argv[1]; + + for (i = 2; i < argc; i++) { + if (!strcasecmp(argv[i], "SO_TIMESTAMP")) + so_timestamp = 1; + else if (!strcasecmp(argv[i], "SO_TIMESTAMPNS")) + so_timestampns = 1; + else if (!strcasecmp(argv[i], "SIOCGSTAMP")) + siocgstamp = 1; + else if (!strcasecmp(argv[i], "SIOCGSTAMPNS")) + siocgstampns = 1; + else if (!strcasecmp(argv[i], "IP_MULTICAST_LOOP")) + ip_multicast_loop = 1; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_TX_SOFTWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_HARDWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_RX_HARDWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_SOFTWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_RX_SOFTWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SOFTWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_SOFTWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SYS_HARDWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_SYS_HARDWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RAW_HARDWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_RAW_HARDWARE; + else + usage(argv[i]); + } + + sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); + if (socket < 0) + bail("socket"); + + memset(&device, 0, sizeof(device)); + strncpy(device.ifr_name, interface, sizeof(device.ifr_name)); + if (ioctl(sock, SIOCGIFADDR, &device) < 0) + bail("getting interface IP address"); + + memset(&hwtstamp, 0, sizeof(hwtstamp)); + strncpy(hwtstamp.ifr_name, interface, sizeof(hwtstamp.ifr_name)); + hwtstamp.ifr_data = (void *)&hwconfig; + memset(&hwconfig, 0, sizeof(&hwconfig)); + hwconfig.tx_type = + (so_timestamping_flags & SOF_TIMESTAMPING_TX_HARDWARE) ? + HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; + hwconfig.rx_filter = + (so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ? + HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE; + hwconfig_requested = hwconfig; + if (ioctl(sock, SIOCSHWTSTAMP, &hwtstamp) < 0) { + if ((errno == EINVAL || errno == ENOTSUP) && + hwconfig_requested.tx_type == HWTSTAMP_TX_OFF && + hwconfig_requested.rx_filter == HWTSTAMP_FILTER_NONE) + printf("SIOCSHWTSTAMP: disabling hardware time stamping not possible\n"); + else + bail("SIOCSHWTSTAMP"); + } + printf("SIOCSHWTSTAMP: tx_type %d requested, got %d; rx_filter %d requested, got %d\n", + hwconfig_requested.tx_type, hwconfig.tx_type, + hwconfig_requested.rx_filter, hwconfig.rx_filter); + + /* bind to PTP port */ + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = htons(319 /* PTP event port */); + if (bind(sock, + (struct sockaddr *)&addr, + sizeof(struct sockaddr_in)) < 0) + bail("bind"); + + /* set multicast group for outgoing packets */ + inet_aton("224.0.1.130", &iaddr); /* alternate PTP domain 1 */ + addr.sin_addr = iaddr; + imr.imr_multiaddr.s_addr = iaddr.s_addr; + imr.imr_interface.s_addr = + ((struct sockaddr_in *)&device.ifr_addr)->sin_addr.s_addr; + if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_IF, + &imr.imr_interface.s_addr, sizeof(struct in_addr)) < 0) + bail("set multicast"); + + /* join multicast group, loop our own packet */ + if (setsockopt(sock, IPPROTO_IP, IP_ADD_MEMBERSHIP, + &imr, sizeof(struct ip_mreq)) < 0) + bail("join multicast group"); + + if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_LOOP, + &ip_multicast_loop, sizeof(enabled)) < 0) { + bail("loop multicast"); + } + + /* set socket options for time stamping */ + if (so_timestamp && + setsockopt(sock, SOL_SOCKET, SO_TIMESTAMP, + &enabled, sizeof(enabled)) < 0) + bail("setsockopt SO_TIMESTAMP"); + + if (so_timestampns && + setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS, + &enabled, sizeof(enabled)) < 0) + bail("setsockopt SO_TIMESTAMPNS"); + + if (so_timestamping_flags && + setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, + &so_timestamping_flags, + sizeof(so_timestamping_flags)) < 0) + bail("setsockopt SO_TIMESTAMPING"); + + /* request IP_PKTINFO for debugging purposes */ + if (setsockopt(sock, SOL_IP, IP_PKTINFO, + &enabled, sizeof(enabled)) < 0) + printf("%s: %s\n", "setsockopt IP_PKTINFO", strerror(errno)); + + /* verify socket options */ + len = sizeof(val); + if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMP, &val, &len) < 0) + printf("%s: %s\n", "getsockopt SO_TIMESTAMP", strerror(errno)); + else + printf("SO_TIMESTAMP %d\n", val); + + if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS, &val, &len) < 0) + printf("%s: %s\n", "getsockopt SO_TIMESTAMPNS", + strerror(errno)); + else + printf("SO_TIMESTAMPNS %d\n", val); + + if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &val, &len) < 0) { + printf("%s: %s\n", "getsockopt SO_TIMESTAMPING", + strerror(errno)); + } else { + printf("SO_TIMESTAMPING %d\n", val); + if (val != so_timestamping_flags) + printf(" not the expected value %d\n", + so_timestamping_flags); + } + + /* send packets forever every five seconds */ + gettimeofday(&next, 0); + next.tv_sec = (next.tv_sec + 1) / 5 * 5; + next.tv_usec = 0; + while (1) { + struct timeval now; + struct timeval delta; + long delta_us; + int res; + fd_set readfs, errorfs; + + gettimeofday(&now, 0); + delta_us = (long)(next.tv_sec - now.tv_sec) * 1000000 + + (long)(next.tv_usec - now.tv_usec); + if (delta_us > 0) { + /* continue waiting for timeout or data */ + delta.tv_sec = delta_us / 1000000; + delta.tv_usec = delta_us % 1000000; + + FD_ZERO(&readfs); + FD_ZERO(&errorfs); + FD_SET(sock, &readfs); + FD_SET(sock, &errorfs); + printf("%ld.%06ld: select %ldus\n", + (long)now.tv_sec, (long)now.tv_usec, + delta_us); + res = select(sock + 1, &readfs, 0, &errorfs, &delta); + gettimeofday(&now, 0); + printf("%ld.%06ld: select returned: %d, %s\n", + (long)now.tv_sec, (long)now.tv_usec, + res, + res < 0 ? strerror(errno) : "success"); + if (res > 0) { + if (FD_ISSET(sock, &readfs)) + printf("ready for reading\n"); + if (FD_ISSET(sock, &errorfs)) + printf("has error\n"); + recvpacket(sock, 0, + siocgstamp, + siocgstampns); + recvpacket(sock, MSG_ERRQUEUE, + siocgstamp, + siocgstampns); + } + } else { + /* write one packet */ + sendpacket(sock, + (struct sockaddr *)&addr, + sizeof(addr)); + next.tv_sec += 5; + continue; + } + } + + return 0; +} diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h index a1057c2d95e..3641ec1452f 100644 --- a/arch/alpha/include/asm/socket.h +++ b/arch/alpha/include/asm/socket.h @@ -62,6 +62,9 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + /* O_NONBLOCK clashes with the bits used for socket types. Therefore we * have to define SOCK_NONBLOCK to a different value here. */ diff --git a/arch/arm/include/asm/socket.h b/arch/arm/include/asm/socket.h index 6817be9573a..537de4e0ef5 100644 --- a/arch/arm/include/asm/socket.h +++ b/arch/arm/include/asm/socket.h @@ -54,4 +54,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_SOCKET_H */ diff --git a/arch/avr32/include/asm/socket.h b/arch/avr32/include/asm/socket.h index 35863f26092..04c86061970 100644 --- a/arch/avr32/include/asm/socket.h +++ b/arch/avr32/include/asm/socket.h @@ -54,4 +54,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* __ASM_AVR32_SOCKET_H */ diff --git a/arch/blackfin/include/asm/socket.h b/arch/blackfin/include/asm/socket.h index 2ca702e44d4..fac7fe9e1f8 100644 --- a/arch/blackfin/include/asm/socket.h +++ b/arch/blackfin/include/asm/socket.h @@ -53,4 +53,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_SOCKET_H */ diff --git a/arch/cris/include/asm/socket.h b/arch/cris/include/asm/socket.h index 9df0ca82f5d..d5cf7400540 100644 --- a/arch/cris/include/asm/socket.h +++ b/arch/cris/include/asm/socket.h @@ -56,6 +56,9 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_SOCKET_H */ diff --git a/arch/h8300/include/asm/socket.h b/arch/h8300/include/asm/socket.h index da2520dbf25..602518a70a1 100644 --- a/arch/h8300/include/asm/socket.h +++ b/arch/h8300/include/asm/socket.h @@ -54,4 +54,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_SOCKET_H */ diff --git a/arch/ia64/include/asm/socket.h b/arch/ia64/include/asm/socket.h index d5ef0aa3e31..745421225ec 100644 --- a/arch/ia64/include/asm/socket.h +++ b/arch/ia64/include/asm/socket.h @@ -63,4 +63,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/m68k/include/asm/socket.h b/arch/m68k/include/asm/socket.h index dbc64e92c41..ca87f938b03 100644 --- a/arch/m68k/include/asm/socket.h +++ b/arch/m68k/include/asm/socket.h @@ -54,4 +54,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_SOCKET_H */ diff --git a/arch/mips/include/asm/socket.h b/arch/mips/include/asm/socket.h index facc2d7a87c..2abca178016 100644 --- a/arch/mips/include/asm/socket.h +++ b/arch/mips/include/asm/socket.h @@ -75,6 +75,9 @@ To add: #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #ifdef __KERNEL__ /** sock_type - Socket types diff --git a/arch/parisc/include/asm/socket.h b/arch/parisc/include/asm/socket.h index fba402c95ac..885472bf7b7 100644 --- a/arch/parisc/include/asm/socket.h +++ b/arch/parisc/include/asm/socket.h @@ -54,6 +54,9 @@ #define SO_MARK 0x401f +#define SO_TIMESTAMPING 0x4020 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + /* O_NONBLOCK clashes with the bits used for socket types. Therefore we * have to define SOCK_NONBLOCK to a different value here. */ diff --git a/arch/powerpc/include/asm/socket.h b/arch/powerpc/include/asm/socket.h index f5a4e168e49..1e5cfad0e3f 100644 --- a/arch/powerpc/include/asm/socket.h +++ b/arch/powerpc/include/asm/socket.h @@ -61,4 +61,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/s390/include/asm/socket.h b/arch/s390/include/asm/socket.h index c786ab623b2..02330c50241 100644 --- a/arch/s390/include/asm/socket.h +++ b/arch/s390/include/asm/socket.h @@ -62,4 +62,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_SOCKET_H */ diff --git a/arch/sh/include/asm/socket.h b/arch/sh/include/asm/socket.h index 6d4bf651295..345653b9682 100644 --- a/arch/sh/include/asm/socket.h +++ b/arch/sh/include/asm/socket.h @@ -54,4 +54,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* __ASM_SH_SOCKET_H */ diff --git a/arch/sparc/include/asm/socket.h b/arch/sparc/include/asm/socket.h index bf50d0c2d58..982a12f959f 100644 --- a/arch/sparc/include/asm/socket.h +++ b/arch/sparc/include/asm/socket.h @@ -50,6 +50,9 @@ #define SO_MARK 0x0022 +#define SO_TIMESTAMPING 0x0023 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/arch/x86/include/asm/socket.h b/arch/x86/include/asm/socket.h index 8ab9cc8b2ec..ca8bf2cd0ba 100644 --- a/arch/x86/include/asm/socket.h +++ b/arch/x86/include/asm/socket.h @@ -54,4 +54,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_X86_SOCKET_H */ diff --git a/arch/xtensa/include/asm/socket.h b/arch/xtensa/include/asm/socket.h index 6100682b1da..dd1a7a4a1ce 100644 --- a/arch/xtensa/include/asm/socket.h +++ b/arch/xtensa/include/asm/socket.h @@ -65,4 +65,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _XTENSA_SOCKET_H */ diff --git a/include/asm-frv/socket.h b/include/asm-frv/socket.h index e51ca67b935..57c3d4054e8 100644 --- a/include/asm-frv/socket.h +++ b/include/asm-frv/socket.h @@ -54,5 +54,8 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-m32r/socket.h b/include/asm-m32r/socket.h index 9a0e2001222..be7ed589af5 100644 --- a/include/asm-m32r/socket.h +++ b/include/asm-m32r/socket.h @@ -54,4 +54,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_M32R_SOCKET_H */ diff --git a/include/asm-mn10300/socket.h b/include/asm-mn10300/socket.h index 80af9c4ccad..fb5daf438ec 100644 --- a/include/asm-mn10300/socket.h +++ b/include/asm-mn10300/socket.h @@ -54,4 +54,7 @@ #define SO_MARK 36 +#define SO_TIMESTAMPING 37 +#define SCM_TIMESTAMPING SO_TIMESTAMPING + #endif /* _ASM_SOCKET_H */ diff --git a/include/linux/errqueue.h b/include/linux/errqueue.h index ceb1454b697..ec12cc74366 100644 --- a/include/linux/errqueue.h +++ b/include/linux/errqueue.h @@ -18,6 +18,7 @@ struct sock_extended_err #define SO_EE_ORIGIN_LOCAL 1 #define SO_EE_ORIGIN_ICMP 2 #define SO_EE_ORIGIN_ICMP6 3 +#define SO_EE_ORIGIN_TIMESTAMPING 4 #define SO_EE_OFFENDER(ee) ((struct sockaddr*)((ee)+1)) diff --git a/include/linux/net_tstamp.h b/include/linux/net_tstamp.h new file mode 100644 index 00000000000..a3b8546354a --- /dev/null +++ b/include/linux/net_tstamp.h @@ -0,0 +1,104 @@ +/* + * Userspace API for hardware time stamping of network packets + * + * Copyright (C) 2008,2009 Intel Corporation + * Author: Patrick Ohly + * + */ + +#ifndef _NET_TIMESTAMPING_H +#define _NET_TIMESTAMPING_H + +#include /* for SO_TIMESTAMPING */ + +/* SO_TIMESTAMPING gets an integer bit field comprised of these values */ +enum { + SOF_TIMESTAMPING_TX_HARDWARE = (1<<0), + SOF_TIMESTAMPING_TX_SOFTWARE = (1<<1), + SOF_TIMESTAMPING_RX_HARDWARE = (1<<2), + SOF_TIMESTAMPING_RX_SOFTWARE = (1<<3), + SOF_TIMESTAMPING_SOFTWARE = (1<<4), + SOF_TIMESTAMPING_SYS_HARDWARE = (1<<5), + SOF_TIMESTAMPING_RAW_HARDWARE = (1<<6), + SOF_TIMESTAMPING_MASK = + (SOF_TIMESTAMPING_RAW_HARDWARE - 1) | + SOF_TIMESTAMPING_RAW_HARDWARE +}; + +/** + * struct hwtstamp_config - %SIOCSHWTSTAMP parameter + * + * @flags: no flags defined right now, must be zero + * @tx_type: one of HWTSTAMP_TX_* + * @rx_type: one of one of HWTSTAMP_FILTER_* + * + * %SIOCSHWTSTAMP expects a &struct ifreq with a ifr_data pointer to + * this structure. dev_ifsioc() in the kernel takes care of the + * translation between 32 bit userspace and 64 bit kernel. The + * structure is intentionally chosen so that it has the same layout on + * 32 and 64 bit systems, don't break this! + */ +struct hwtstamp_config { + int flags; + int tx_type; + int rx_filter; +}; + +/* possible values for hwtstamp_config->tx_type */ +enum { + /* + * No outgoing packet will need hardware time stamping; + * should a packet arrive which asks for it, no hardware + * time stamping will be done. + */ + HWTSTAMP_TX_OFF, + + /* + * Enables hardware time stamping for outgoing packets; + * the sender of the packet decides which are to be + * time stamped by setting %SOF_TIMESTAMPING_TX_SOFTWARE + * before sending the packet. + */ + HWTSTAMP_TX_ON, +}; + +/* possible values for hwtstamp_config->rx_filter */ +enum { + /* time stamp no incoming packet at all */ + HWTSTAMP_FILTER_NONE, + + /* time stamp any incoming packet */ + HWTSTAMP_FILTER_ALL, + + /* return value: time stamp all packets requested plus some others */ + HWTSTAMP_FILTER_SOME, + + /* PTP v1, UDP, any kind of event packet */ + HWTSTAMP_FILTER_PTP_V1_L4_EVENT, + /* PTP v1, UDP, Sync packet */ + HWTSTAMP_FILTER_PTP_V1_L4_SYNC, + /* PTP v1, UDP, Delay_req packet */ + HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ, + /* PTP v2, UDP, any kind of event packet */ + HWTSTAMP_FILTER_PTP_V2_L4_EVENT, + /* PTP v2, UDP, Sync packet */ + HWTSTAMP_FILTER_PTP_V2_L4_SYNC, + /* PTP v2, UDP, Delay_req packet */ + HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ, + + /* 802.AS1, Ethernet, any kind of event packet */ + HWTSTAMP_FILTER_PTP_V2_L2_EVENT, + /* 802.AS1, Ethernet, Sync packet */ + HWTSTAMP_FILTER_PTP_V2_L2_SYNC, + /* 802.AS1, Ethernet, Delay_req packet */ + HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ, + + /* PTP v2/802.AS1, any layer, any kind of event packet */ + HWTSTAMP_FILTER_PTP_V2_EVENT, + /* PTP v2/802.AS1, any layer, Sync packet */ + HWTSTAMP_FILTER_PTP_V2_SYNC, + /* PTP v2/802.AS1, any layer, Delay_req packet */ + HWTSTAMP_FILTER_PTP_V2_DELAY_REQ, +}; + +#endif /* _NET_TIMESTAMPING_H */ diff --git a/include/linux/sockios.h b/include/linux/sockios.h index abef7596655..241f179347d 100644 --- a/include/linux/sockios.h +++ b/include/linux/sockios.h @@ -122,6 +122,9 @@ #define SIOCBRADDIF 0x89a2 /* add interface to bridge */ #define SIOCBRDELIF 0x89a3 /* remove interface from bridge */ +/* hardware time stamping: parameters in linux/net_tstamp.h */ +#define SIOCSHWTSTAMP 0x89b0 + /* Device private ioctl calls */ /* -- cgit v1.2.3-70-g09d2 From a0abd520fd69295f4a3735e29a9448a32e101d47 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 16 Feb 2009 17:31:58 -0600 Subject: cpumask: fix powernow-k8: partial revert of 2fdf66b491ac706657946442789ec644cc317e1a Impact: fix powernow-k8 when acpi=off (or other error). There was a spurious change introduced into powernow-k8 in this patch: so that we try to "restore" the cpus_allowed we never saved. We revert that file. See lkml "[PATCH] x86/powernow: fix cpus_allowed brokage when acpi=off" from Yinghai for the bug report. Cc: Mike Travis Cc: Yinghai Lu Signed-off-by: Rusty Russell Acked-by: Ingo Molnar --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index fb039cd345d..6428aa17b40 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1157,8 +1157,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) data->cpu = pol->cpu; data->currpstate = HW_PSTATE_INVALID; - rc = powernow_k8_cpu_init_acpi(data); - if (rc) { + if (powernow_k8_cpu_init_acpi(data)) { /* * Use the PSB BIOS structure. This is only availabe on * an UP version, and is deprecated by AMD. @@ -1176,17 +1175,20 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) "ACPI maintainers and complain to your BIOS " "vendor.\n"); #endif - goto err_out; + kfree(data); + return -ENODEV; } if (pol->cpu != 0) { printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " "CPU other than CPU0. Complain to your BIOS " "vendor.\n"); - goto err_out; + kfree(data); + return -ENODEV; } rc = find_psb_table(data); if (rc) { - goto err_out; + kfree(data); + return -ENODEV; } /* Take a crude guess here. * That guess was in microseconds, so multiply with 1000 */ -- cgit v1.2.3-70-g09d2 From bf51935f3e988e0ed6f34b55593e5912f990750a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 17 Feb 2009 06:01:30 -0800 Subject: x86, rcu: fix strange load average and ksoftirqd behavior Damien Wyart reported high ksoftirqd CPU usage (20%) on an otherwise idle system. The function-graph trace Damien provided: > 799.521187 | 1) -0 | | rcu_check_callbacks() { > 799.521371 | 1) -0 | | rcu_check_callbacks() { > 799.521555 | 1) -0 | | rcu_check_callbacks() { > 799.521738 | 1) -0 | | rcu_check_callbacks() { > 799.521934 | 1) -0 | | rcu_check_callbacks() { > 799.522068 | 1) ksoftir-2324 | | rcu_check_callbacks() { > 799.522208 | 1) -0 | | rcu_check_callbacks() { > 799.522392 | 1) -0 | | rcu_check_callbacks() { > 799.522575 | 1) -0 | | rcu_check_callbacks() { > 799.522759 | 1) -0 | | rcu_check_callbacks() { > 799.522956 | 1) -0 | | rcu_check_callbacks() { > 799.523074 | 1) ksoftir-2324 | | rcu_check_callbacks() { > 799.523214 | 1) -0 | | rcu_check_callbacks() { > 799.523397 | 1) -0 | | rcu_check_callbacks() { > 799.523579 | 1) -0 | | rcu_check_callbacks() { > 799.523762 | 1) -0 | | rcu_check_callbacks() { > 799.523960 | 1) -0 | | rcu_check_callbacks() { > 799.524079 | 1) ksoftir-2324 | | rcu_check_callbacks() { > 799.524220 | 1) -0 | | rcu_check_callbacks() { > 799.524403 | 1) -0 | | rcu_check_callbacks() { > 799.524587 | 1) -0 | | rcu_check_callbacks() { > 799.524770 | 1) -0 | | rcu_check_callbacks() { > [ . . . ] Shows rcu_check_callbacks() being invoked way too often. It should be called once per jiffy, and here it is called no less than 22 times in about 3.5 milliseconds, meaning one call every 160 microseconds or so. Why do we need to call rcu_pending() and rcu_check_callbacks() from the idle loop of 32-bit x86, especially given that no other architecture does this? The following patch removes the call to rcu_pending() and rcu_check_callbacks() from the x86 32-bit idle loop in order to reduce the softirq load on idle systems. Reported-by: Damien Wyart Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_32.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index a546f55c77b..bd4da2af08a 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -104,9 +104,6 @@ void cpu_idle(void) check_pgt_cache(); rmb(); - if (rcu_pending(cpu)) - rcu_check_callbacks(cpu, 0); - if (cpu_is_offline(cpu)) play_dead(); -- cgit v1.2.3-70-g09d2 From 6ec68bff3c81e776a455f6aca95c8c5f1d630198 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 12 Feb 2009 13:39:26 +0100 Subject: x86, mce: reinitialize per cpu features on resume Impact: Bug fix This fixes a long standing bug in the machine check code. On resume the boot CPU wouldn't get its vendor specific state like thermal handling reinitialized. This means the boot cpu wouldn't ever get any thermal events reported again. Call the respective initialization functions on resume v2: Remove ancient init because they don't have a resume device anyways. Pointed out by Thomas Gleixner. v3: Now fix the Subject too to reflect v2 change Signed-off-by: Andi Kleen Acked-by: Thomas Gleixner Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce_64.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 1c838032fd3..1f184efb6bc 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -734,6 +734,7 @@ __setup("mce=", mcheck_enable); static int mce_resume(struct sys_device *dev) { mce_init(NULL); + mce_cpu_features(¤t_cpu_data); return 0; } -- cgit v1.2.3-70-g09d2 From 380851bc6b1b4107c61dfa2997f9095dcf779336 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 12 Feb 2009 13:39:33 +0100 Subject: x86, mce: use force_sig_info to kill process in machine check Impact: bug fix (with tolerant == 3) do_exit cannot be called directly from the exception handler because it can sleep and the exception handler runs on the exception stack. Use force_sig() instead. Based on a earlier patch by Ying Huang who debugged the problem. Signed-off-by: Andi Kleen Acked-by: Thomas Gleixner Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 1f184efb6bc..25cf624eccb 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -295,11 +295,11 @@ void do_machine_check(struct pt_regs * regs, long error_code) * If we know that the error was in user space, send a * SIGBUS. Otherwise, panic if tolerance is low. * - * do_exit() takes an awful lot of locks and has a slight + * force_sig() takes an awful lot of locks and has a slight * risk of deadlocking. */ if (user_space) { - do_exit(SIGBUS); + force_sig(SIGBUS, current); } else if (panic_on_oops || tolerant < 2) { mce_panic("Uncorrected machine check", &panicm, mcestart); -- cgit v1.2.3-70-g09d2 From 07db1c140eb233971341396e492cc73d4280e698 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 12 Feb 2009 13:39:35 +0100 Subject: x86, mce: fix ifdef for 64bit thermal apic vector clear on shutdown Impact: Bugfix The ifdef for the apic clear on shutdown for the 64bit intel thermal vector was incorrect and never triggered. Fix that. Signed-off-by: Andi Kleen Acked-by: Thomas Gleixner Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 115449f869e..570f36e44e5 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -862,7 +862,7 @@ void clear_local_APIC(void) } /* lets not touch this if we didn't frob it */ -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) { v = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); -- cgit v1.2.3-70-g09d2 From 109568e110ed67d4be1b28609b9fa00fca97f8eb Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 9 Jan 2009 16:49:30 +1100 Subject: crypto: aes - Move key_length in struct crypto_aes_ctx to be the last field The Intel AES-NI AES acceleration instructions need key_enc, key_dec in struct crypto_aes_ctx to be 16 byte aligned, it make this easier to move key_length to be the last one. Signed-off-by: Huang Ying Signed-off-by: Herbert Xu --- arch/x86/crypto/aes-i586-asm_32.S | 6 +++--- arch/x86/crypto/aes-x86_64-asm_64.S | 4 ++-- include/crypto/aes.h | 6 +++++- 3 files changed, 10 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S index e41b147f450..5252c388017 100644 --- a/arch/x86/crypto/aes-i586-asm_32.S +++ b/arch/x86/crypto/aes-i586-asm_32.S @@ -46,9 +46,9 @@ #define in_blk 16 /* offsets in crypto_tfm structure */ -#define klen (crypto_tfm_ctx_offset + 0) -#define ekey (crypto_tfm_ctx_offset + 4) -#define dkey (crypto_tfm_ctx_offset + 244) +#define klen (crypto_tfm_ctx_offset + 480) +#define ekey (crypto_tfm_ctx_offset + 0) +#define dkey (crypto_tfm_ctx_offset + 240) // register mapping for encrypt and decrypt subroutines diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S index a120f526c3d..7f28f62737d 100644 --- a/arch/x86/crypto/aes-x86_64-asm_64.S +++ b/arch/x86/crypto/aes-x86_64-asm_64.S @@ -56,13 +56,13 @@ .align 8; \ FUNC: movq r1,r2; \ movq r3,r4; \ - leaq BASE+KEY+48+4(r8),r9; \ + leaq BASE+KEY+48(r8),r9; \ movq r10,r11; \ movl (r7),r5 ## E; \ movl 4(r7),r1 ## E; \ movl 8(r7),r6 ## E; \ movl 12(r7),r7 ## E; \ - movl BASE+0(r8),r10 ## E; \ + movl BASE+480(r8),r10 ## E; \ xorl -48(r9),r5 ## E; \ xorl -44(r9),r1 ## E; \ xorl -40(r9),r6 ## E; \ diff --git a/include/crypto/aes.h b/include/crypto/aes.h index 656a4c66a56..7524ba3b6f3 100644 --- a/include/crypto/aes.h +++ b/include/crypto/aes.h @@ -17,10 +17,14 @@ #define AES_MAX_KEYLENGTH (15 * 16) #define AES_MAX_KEYLENGTH_U32 (AES_MAX_KEYLENGTH / sizeof(u32)) +/* + * Please ensure that the first two fields are 16-byte aligned + * relative to the start of the structure, i.e., don't move them! + */ struct crypto_aes_ctx { - u32 key_length; u32 key_enc[AES_MAX_KEYLENGTH_U32]; u32 key_dec[AES_MAX_KEYLENGTH_U32]; + u32 key_length; }; extern const u32 crypto_ft_tab[4][256]; -- cgit v1.2.3-70-g09d2 From 07bf44f86989f5ed866510374fe761d1903681fb Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 9 Jan 2009 17:25:50 +1100 Subject: crypto: aes - Export x86 AES encrypt/decrypt functions Intel AES-NI AES acceleration instructions touch XMM state, to use that in soft_irq context, general x86 AES implementation is used as fallback. The first parameter is changed from struct crypto_tfm * to struct crypto_aes_ctx * to make it easier to deal with 16 bytes alignment requirement of AES-NI implementation. Signed-off-by: Huang Ying Signed-off-by: Herbert Xu --- arch/x86/crypto/aes-i586-asm_32.S | 18 +++++++++--------- arch/x86/crypto/aes-x86_64-asm_64.S | 6 ++---- arch/x86/crypto/aes_glue.c | 20 ++++++++++++++++---- arch/x86/include/asm/aes.h | 11 +++++++++++ 4 files changed, 38 insertions(+), 17 deletions(-) create mode 100644 arch/x86/include/asm/aes.h (limited to 'arch/x86') diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S index 5252c388017..b949ec2f9af 100644 --- a/arch/x86/crypto/aes-i586-asm_32.S +++ b/arch/x86/crypto/aes-i586-asm_32.S @@ -41,14 +41,14 @@ #define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) /* offsets to parameters with one register pushed onto stack */ -#define tfm 8 +#define ctx 8 #define out_blk 12 #define in_blk 16 -/* offsets in crypto_tfm structure */ -#define klen (crypto_tfm_ctx_offset + 480) -#define ekey (crypto_tfm_ctx_offset + 0) -#define dkey (crypto_tfm_ctx_offset + 240) +/* offsets in crypto_aes_ctx structure */ +#define klen (480) +#define ekey (0) +#define dkey (240) // register mapping for encrypt and decrypt subroutines @@ -217,7 +217,7 @@ do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */ // AES (Rijndael) Encryption Subroutine -/* void aes_enc_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ +/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ .global aes_enc_blk @@ -228,7 +228,7 @@ aes_enc_blk: push %ebp - mov tfm(%esp),%ebp + mov ctx(%esp),%ebp // CAUTION: the order and the values used in these assigns // rely on the register mappings @@ -292,7 +292,7 @@ aes_enc_blk: ret // AES (Rijndael) Decryption Subroutine -/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ +/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ .global aes_dec_blk @@ -303,7 +303,7 @@ aes_enc_blk: aes_dec_blk: push %ebp - mov tfm(%esp),%ebp + mov ctx(%esp),%ebp // CAUTION: the order and the values used in these assigns // rely on the register mappings diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S index 7f28f62737d..5b577d5a059 100644 --- a/arch/x86/crypto/aes-x86_64-asm_64.S +++ b/arch/x86/crypto/aes-x86_64-asm_64.S @@ -17,8 +17,6 @@ #include -#define BASE crypto_tfm_ctx_offset - #define R1 %rax #define R1E %eax #define R1X %ax @@ -56,13 +54,13 @@ .align 8; \ FUNC: movq r1,r2; \ movq r3,r4; \ - leaq BASE+KEY+48(r8),r9; \ + leaq KEY+48(r8),r9; \ movq r10,r11; \ movl (r7),r5 ## E; \ movl 4(r7),r1 ## E; \ movl 8(r7),r6 ## E; \ movl 12(r7),r7 ## E; \ - movl BASE+480(r8),r10 ## E; \ + movl 480(r8),r10 ## E; \ xorl -48(r9),r5 ## E; \ xorl -44(r9),r1 ## E; \ xorl -40(r9),r6 ## E; \ diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c index 71f45782711..49ae9fe32b2 100644 --- a/arch/x86/crypto/aes_glue.c +++ b/arch/x86/crypto/aes_glue.c @@ -5,17 +5,29 @@ #include -asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in); -asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in); +asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); +asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); + +void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) +{ + aes_enc_blk(ctx, dst, src); +} +EXPORT_SYMBOL_GPL(crypto_aes_encrypt_x86); + +void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) +{ + aes_dec_blk(ctx, dst, src); +} +EXPORT_SYMBOL_GPL(crypto_aes_decrypt_x86); static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { - aes_enc_blk(tfm, dst, src); + aes_enc_blk(crypto_tfm_ctx(tfm), dst, src); } static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { - aes_dec_blk(tfm, dst, src); + aes_dec_blk(crypto_tfm_ctx(tfm), dst, src); } static struct crypto_alg aes_alg = { diff --git a/arch/x86/include/asm/aes.h b/arch/x86/include/asm/aes.h new file mode 100644 index 00000000000..80545a1cbe3 --- /dev/null +++ b/arch/x86/include/asm/aes.h @@ -0,0 +1,11 @@ +#ifndef ASM_X86_AES_H +#define ASM_X86_AES_H + +#include +#include + +void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, + const u8 *src); +void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, + const u8 *src); +#endif -- cgit v1.2.3-70-g09d2 From 54b6a1bd5364aca95cd6ffae00f2b64c6511122c Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Sun, 18 Jan 2009 16:28:34 +1100 Subject: crypto: aes-ni - Add support to Intel AES-NI instructions for x86_64 platform Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD) instructions that are going to be introduced in the next generation of Intel processor, as of 2009. These instructions enable fast and secure data encryption and decryption, using the Advanced Encryption Standard (AES), defined by FIPS Publication number 197. The architecture introduces six instructions that offer full hardware support for AES. Four of them support high performance data encryption and decryption, and the other two instructions support the AES key expansion procedure. The white paper can be downloaded from: http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf AES may be used in soft_irq context, but MMX/SSE context can not be touched safely in soft_irq context. So in_interrupt() is checked, if in IRQ or soft_irq context, the general x86_64 implementation are used instead. Signed-off-by: Huang Ying Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 3 + arch/x86/crypto/aesni-intel_asm.S | 896 +++++++++++++++++++++++++++++++++++++ arch/x86/crypto/aesni-intel_glue.c | 461 +++++++++++++++++++ arch/x86/include/asm/cpufeature.h | 1 + crypto/Kconfig | 25 ++ 5 files changed, 1386 insertions(+) create mode 100644 arch/x86/crypto/aesni-intel_asm.S create mode 100644 arch/x86/crypto/aesni-intel_glue.c (limited to 'arch/x86') diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 903de4aa509..ebe7deedd5b 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o +obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o @@ -19,3 +20,5 @@ salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o + +aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S new file mode 100644 index 00000000000..caba9960170 --- /dev/null +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -0,0 +1,896 @@ +/* + * Implement AES algorithm in Intel AES-NI instructions. + * + * The white paper of AES-NI instructions can be downloaded from: + * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf + * + * Copyright (C) 2008, Intel Corp. + * Author: Huang Ying + * Vinodh Gopal + * Kahraman Akdemir + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include + +.text + +#define STATE1 %xmm0 +#define STATE2 %xmm4 +#define STATE3 %xmm5 +#define STATE4 %xmm6 +#define STATE STATE1 +#define IN1 %xmm1 +#define IN2 %xmm7 +#define IN3 %xmm8 +#define IN4 %xmm9 +#define IN IN1 +#define KEY %xmm2 +#define IV %xmm3 + +#define KEYP %rdi +#define OUTP %rsi +#define INP %rdx +#define LEN %rcx +#define IVP %r8 +#define KLEN %r9d +#define T1 %r10 +#define TKEYP T1 +#define T2 %r11 + +_key_expansion_128: +_key_expansion_256a: + pshufd $0b11111111, %xmm1, %xmm1 + shufps $0b00010000, %xmm0, %xmm4 + pxor %xmm4, %xmm0 + shufps $0b10001100, %xmm0, %xmm4 + pxor %xmm4, %xmm0 + pxor %xmm1, %xmm0 + movaps %xmm0, (%rcx) + add $0x10, %rcx + ret + +_key_expansion_192a: + pshufd $0b01010101, %xmm1, %xmm1 + shufps $0b00010000, %xmm0, %xmm4 + pxor %xmm4, %xmm0 + shufps $0b10001100, %xmm0, %xmm4 + pxor %xmm4, %xmm0 + pxor %xmm1, %xmm0 + + movaps %xmm2, %xmm5 + movaps %xmm2, %xmm6 + pslldq $4, %xmm5 + pshufd $0b11111111, %xmm0, %xmm3 + pxor %xmm3, %xmm2 + pxor %xmm5, %xmm2 + + movaps %xmm0, %xmm1 + shufps $0b01000100, %xmm0, %xmm6 + movaps %xmm6, (%rcx) + shufps $0b01001110, %xmm2, %xmm1 + movaps %xmm1, 16(%rcx) + add $0x20, %rcx + ret + +_key_expansion_192b: + pshufd $0b01010101, %xmm1, %xmm1 + shufps $0b00010000, %xmm0, %xmm4 + pxor %xmm4, %xmm0 + shufps $0b10001100, %xmm0, %xmm4 + pxor %xmm4, %xmm0 + pxor %xmm1, %xmm0 + + movaps %xmm2, %xmm5 + pslldq $4, %xmm5 + pshufd $0b11111111, %xmm0, %xmm3 + pxor %xmm3, %xmm2 + pxor %xmm5, %xmm2 + + movaps %xmm0, (%rcx) + add $0x10, %rcx + ret + +_key_expansion_256b: + pshufd $0b10101010, %xmm1, %xmm1 + shufps $0b00010000, %xmm2, %xmm4 + pxor %xmm4, %xmm2 + shufps $0b10001100, %xmm2, %xmm4 + pxor %xmm4, %xmm2 + pxor %xmm1, %xmm2 + movaps %xmm2, (%rcx) + add $0x10, %rcx + ret + +/* + * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, + * unsigned int key_len) + */ +ENTRY(aesni_set_key) + movups (%rsi), %xmm0 # user key (first 16 bytes) + movaps %xmm0, (%rdi) + lea 0x10(%rdi), %rcx # key addr + movl %edx, 480(%rdi) + pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x + cmp $24, %dl + jb .Lenc_key128 + je .Lenc_key192 + movups 0x10(%rsi), %xmm2 # other user key + movaps %xmm2, (%rcx) + add $0x10, %rcx + # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01 + call _key_expansion_256a + # aeskeygenassist $0x1, %xmm0, %xmm1 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01 + call _key_expansion_256b + # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02 + call _key_expansion_256a + # aeskeygenassist $0x2, %xmm0, %xmm1 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02 + call _key_expansion_256b + # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04 + call _key_expansion_256a + # aeskeygenassist $0x4, %xmm0, %xmm1 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04 + call _key_expansion_256b + # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08 + call _key_expansion_256a + # aeskeygenassist $0x8, %xmm0, %xmm1 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08 + call _key_expansion_256b + # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10 + call _key_expansion_256a + # aeskeygenassist $0x10, %xmm0, %xmm1 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10 + call _key_expansion_256b + # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20 + call _key_expansion_256a + # aeskeygenassist $0x20, %xmm0, %xmm1 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20 + call _key_expansion_256b + # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40 + call _key_expansion_256a + jmp .Ldec_key +.Lenc_key192: + movq 0x10(%rsi), %xmm2 # other user key + # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01 + call _key_expansion_192a + # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02 + call _key_expansion_192b + # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04 + call _key_expansion_192a + # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08 + call _key_expansion_192b + # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10 + call _key_expansion_192a + # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20 + call _key_expansion_192b + # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40 + call _key_expansion_192a + # aeskeygenassist $0x80, %xmm2, %xmm1 # round 8 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x80 + call _key_expansion_192b + jmp .Ldec_key +.Lenc_key128: + # aeskeygenassist $0x1, %xmm0, %xmm1 # round 1 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01 + call _key_expansion_128 + # aeskeygenassist $0x2, %xmm0, %xmm1 # round 2 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02 + call _key_expansion_128 + # aeskeygenassist $0x4, %xmm0, %xmm1 # round 3 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04 + call _key_expansion_128 + # aeskeygenassist $0x8, %xmm0, %xmm1 # round 4 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08 + call _key_expansion_128 + # aeskeygenassist $0x10, %xmm0, %xmm1 # round 5 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10 + call _key_expansion_128 + # aeskeygenassist $0x20, %xmm0, %xmm1 # round 6 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20 + call _key_expansion_128 + # aeskeygenassist $0x40, %xmm0, %xmm1 # round 7 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x40 + call _key_expansion_128 + # aeskeygenassist $0x80, %xmm0, %xmm1 # round 8 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x80 + call _key_expansion_128 + # aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x1b + call _key_expansion_128 + # aeskeygenassist $0x36, %xmm0, %xmm1 # round 10 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x36 + call _key_expansion_128 +.Ldec_key: + sub $0x10, %rcx + movaps (%rdi), %xmm0 + movaps (%rcx), %xmm1 + movaps %xmm0, 240(%rcx) + movaps %xmm1, 240(%rdi) + add $0x10, %rdi + lea 240-16(%rcx), %rsi +.align 4 +.Ldec_key_loop: + movaps (%rdi), %xmm0 + # aesimc %xmm0, %xmm1 + .byte 0x66, 0x0f, 0x38, 0xdb, 0xc8 + movaps %xmm1, (%rsi) + add $0x10, %rdi + sub $0x10, %rsi + cmp %rcx, %rdi + jb .Ldec_key_loop + xor %rax, %rax + ret + +/* + * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) + */ +ENTRY(aesni_enc) + movl 480(KEYP), KLEN # key length + movups (INP), STATE # input + call _aesni_enc1 + movups STATE, (OUTP) # output + ret + +/* + * _aesni_enc1: internal ABI + * input: + * KEYP: key struct pointer + * KLEN: round count + * STATE: initial state (input) + * output: + * STATE: finial state (output) + * changed: + * KEY + * TKEYP (T1) + */ +_aesni_enc1: + movaps (KEYP), KEY # key + mov KEYP, TKEYP + pxor KEY, STATE # round 0 + add $0x30, TKEYP + cmp $24, KLEN + jb .Lenc128 + lea 0x20(TKEYP), TKEYP + je .Lenc192 + add $0x20, TKEYP + movaps -0x60(TKEYP), KEY + # aesenc KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + movaps -0x50(TKEYP), KEY + # aesenc KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 +.align 4 +.Lenc192: + movaps -0x40(TKEYP), KEY + # aesenc KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + movaps -0x30(TKEYP), KEY + # aesenc KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 +.align 4 +.Lenc128: + movaps -0x20(TKEYP), KEY + # aesenc KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + movaps -0x10(TKEYP), KEY + # aesenc KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + movaps (TKEYP), KEY + # aesenc KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + movaps 0x10(TKEYP), KEY + # aesenc KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + movaps 0x20(TKEYP), KEY + # aesenc KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + movaps 0x30(TKEYP), KEY + # aesenc KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + movaps 0x40(TKEYP), KEY + # aesenc KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + movaps 0x50(TKEYP), KEY + # aesenc KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + movaps 0x60(TKEYP), KEY + # aesenc KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + movaps 0x70(TKEYP), KEY + # aesenclast KEY, STATE # last round + .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2 + ret + +/* + * _aesni_enc4: internal ABI + * input: + * KEYP: key struct pointer + * KLEN: round count + * STATE1: initial state (input) + * STATE2 + * STATE3 + * STATE4 + * output: + * STATE1: finial state (output) + * STATE2 + * STATE3 + * STATE4 + * changed: + * KEY + * TKEYP (T1) + */ +_aesni_enc4: + movaps (KEYP), KEY # key + mov KEYP, TKEYP + pxor KEY, STATE1 # round 0 + pxor KEY, STATE2 + pxor KEY, STATE3 + pxor KEY, STATE4 + add $0x30, TKEYP + cmp $24, KLEN + jb .L4enc128 + lea 0x20(TKEYP), TKEYP + je .L4enc192 + add $0x20, TKEYP + movaps -0x60(TKEYP), KEY + # aesenc KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + # aesenc KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 + # aesenc KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xea + # aesenc KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + movaps -0x50(TKEYP), KEY + # aesenc KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + # aesenc KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 + # aesenc KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xea + # aesenc KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 +#.align 4 +.L4enc192: + movaps -0x40(TKEYP), KEY + # aesenc KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + # aesenc KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 + # aesenc KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xea + # aesenc KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + movaps -0x30(TKEYP), KEY + # aesenc KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + # aesenc KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 + # aesenc KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xea + # aesenc KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 +#.align 4 +.L4enc128: + movaps -0x20(TKEYP), KEY + # aesenc KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + # aesenc KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 + # aesenc KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xea + # aesenc KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + movaps -0x10(TKEYP), KEY + # aesenc KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + # aesenc KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 + # aesenc KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xea + # aesenc KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + movaps (TKEYP), KEY + # aesenc KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + # aesenc KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 + # aesenc KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xea + # aesenc KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + movaps 0x10(TKEYP), KEY + # aesenc KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + # aesenc KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 + # aesenc KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xea + # aesenc KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + movaps 0x20(TKEYP), KEY + # aesenc KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + # aesenc KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 + # aesenc KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xea + # aesenc KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + movaps 0x30(TKEYP), KEY + # aesenc KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + # aesenc KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 + # aesenc KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xea + # aesenc KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + movaps 0x40(TKEYP), KEY + # aesenc KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + # aesenc KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 + # aesenc KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xea + # aesenc KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + movaps 0x50(TKEYP), KEY + # aesenc KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + # aesenc KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 + # aesenc KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xea + # aesenc KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + movaps 0x60(TKEYP), KEY + # aesenc KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + # aesenc KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 + # aesenc KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xea + # aesenc KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + movaps 0x70(TKEYP), KEY + # aesenclast KEY, STATE1 # last round + .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2 + # aesenclast KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdd, 0xe2 + # aesenclast KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdd, 0xea + # aesenclast KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdd, 0xf2 + ret + +/* + * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) + */ +ENTRY(aesni_dec) + mov 480(KEYP), KLEN # key length + add $240, KEYP + movups (INP), STATE # input + call _aesni_dec1 + movups STATE, (OUTP) #output + ret + +/* + * _aesni_dec1: internal ABI + * input: + * KEYP: key struct pointer + * KLEN: key length + * STATE: initial state (input) + * output: + * STATE: finial state (output) + * changed: + * KEY + * TKEYP (T1) + */ +_aesni_dec1: + movaps (KEYP), KEY # key + mov KEYP, TKEYP + pxor KEY, STATE # round 0 + add $0x30, TKEYP + cmp $24, KLEN + jb .Ldec128 + lea 0x20(TKEYP), TKEYP + je .Ldec192 + add $0x20, TKEYP + movaps -0x60(TKEYP), KEY + # aesdec KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + movaps -0x50(TKEYP), KEY + # aesdec KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 +.align 4 +.Ldec192: + movaps -0x40(TKEYP), KEY + # aesdec KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + movaps -0x30(TKEYP), KEY + # aesdec KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 +.align 4 +.Ldec128: + movaps -0x20(TKEYP), KEY + # aesdec KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + movaps -0x10(TKEYP), KEY + # aesdec KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + movaps (TKEYP), KEY + # aesdec KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + movaps 0x10(TKEYP), KEY + # aesdec KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + movaps 0x20(TKEYP), KEY + # aesdec KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + movaps 0x30(TKEYP), KEY + # aesdec KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + movaps 0x40(TKEYP), KEY + # aesdec KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + movaps 0x50(TKEYP), KEY + # aesdec KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + movaps 0x60(TKEYP), KEY + # aesdec KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + movaps 0x70(TKEYP), KEY + # aesdeclast KEY, STATE # last round + .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2 + ret + +/* + * _aesni_dec4: internal ABI + * input: + * KEYP: key struct pointer + * KLEN: key length + * STATE1: initial state (input) + * STATE2 + * STATE3 + * STATE4 + * output: + * STATE1: finial state (output) + * STATE2 + * STATE3 + * STATE4 + * changed: + * KEY + * TKEYP (T1) + */ +_aesni_dec4: + movaps (KEYP), KEY # key + mov KEYP, TKEYP + pxor KEY, STATE1 # round 0 + pxor KEY, STATE2 + pxor KEY, STATE3 + pxor KEY, STATE4 + add $0x30, TKEYP + cmp $24, KLEN + jb .L4dec128 + lea 0x20(TKEYP), TKEYP + je .L4dec192 + add $0x20, TKEYP + movaps -0x60(TKEYP), KEY + # aesdec KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + # aesdec KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 + # aesdec KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xde, 0xea + # aesdec KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + movaps -0x50(TKEYP), KEY + # aesdec KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + # aesdec KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 + # aesdec KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xde, 0xea + # aesdec KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 +.align 4 +.L4dec192: + movaps -0x40(TKEYP), KEY + # aesdec KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + # aesdec KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 + # aesdec KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xde, 0xea + # aesdec KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + movaps -0x30(TKEYP), KEY + # aesdec KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + # aesdec KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 + # aesdec KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xde, 0xea + # aesdec KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 +.align 4 +.L4dec128: + movaps -0x20(TKEYP), KEY + # aesdec KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + # aesdec KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 + # aesdec KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xde, 0xea + # aesdec KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + movaps -0x10(TKEYP), KEY + # aesdec KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + # aesdec KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 + # aesdec KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xde, 0xea + # aesdec KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + movaps (TKEYP), KEY + # aesdec KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + # aesdec KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 + # aesdec KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xde, 0xea + # aesdec KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + movaps 0x10(TKEYP), KEY + # aesdec KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + # aesdec KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 + # aesdec KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xde, 0xea + # aesdec KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + movaps 0x20(TKEYP), KEY + # aesdec KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + # aesdec KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 + # aesdec KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xde, 0xea + # aesdec KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + movaps 0x30(TKEYP), KEY + # aesdec KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + # aesdec KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 + # aesdec KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xde, 0xea + # aesdec KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + movaps 0x40(TKEYP), KEY + # aesdec KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + # aesdec KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 + # aesdec KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xde, 0xea + # aesdec KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + movaps 0x50(TKEYP), KEY + # aesdec KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + # aesdec KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 + # aesdec KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xde, 0xea + # aesdec KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + movaps 0x60(TKEYP), KEY + # aesdec KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + # aesdec KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 + # aesdec KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xde, 0xea + # aesdec KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + movaps 0x70(TKEYP), KEY + # aesdeclast KEY, STATE1 # last round + .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2 + # aesdeclast KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdf, 0xe2 + # aesdeclast KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdf, 0xea + # aesdeclast KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdf, 0xf2 + ret + +/* + * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, + * size_t len) + */ +ENTRY(aesni_ecb_enc) + test LEN, LEN # check length + jz .Lecb_enc_ret + mov 480(KEYP), KLEN + cmp $16, LEN + jb .Lecb_enc_ret + cmp $64, LEN + jb .Lecb_enc_loop1 +.align 4 +.Lecb_enc_loop4: + movups (INP), STATE1 + movups 0x10(INP), STATE2 + movups 0x20(INP), STATE3 + movups 0x30(INP), STATE4 + call _aesni_enc4 + movups STATE1, (OUTP) + movups STATE2, 0x10(OUTP) + movups STATE3, 0x20(OUTP) + movups STATE4, 0x30(OUTP) + sub $64, LEN + add $64, INP + add $64, OUTP + cmp $64, LEN + jge .Lecb_enc_loop4 + cmp $16, LEN + jb .Lecb_enc_ret +.align 4 +.Lecb_enc_loop1: + movups (INP), STATE1 + call _aesni_enc1 + movups STATE1, (OUTP) + sub $16, LEN + add $16, INP + add $16, OUTP + cmp $16, LEN + jge .Lecb_enc_loop1 +.Lecb_enc_ret: + ret + +/* + * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, + * size_t len); + */ +ENTRY(aesni_ecb_dec) + test LEN, LEN + jz .Lecb_dec_ret + mov 480(KEYP), KLEN + add $240, KEYP + cmp $16, LEN + jb .Lecb_dec_ret + cmp $64, LEN + jb .Lecb_dec_loop1 +.align 4 +.Lecb_dec_loop4: + movups (INP), STATE1 + movups 0x10(INP), STATE2 + movups 0x20(INP), STATE3 + movups 0x30(INP), STATE4 + call _aesni_dec4 + movups STATE1, (OUTP) + movups STATE2, 0x10(OUTP) + movups STATE3, 0x20(OUTP) + movups STATE4, 0x30(OUTP) + sub $64, LEN + add $64, INP + add $64, OUTP + cmp $64, LEN + jge .Lecb_dec_loop4 + cmp $16, LEN + jb .Lecb_dec_ret +.align 4 +.Lecb_dec_loop1: + movups (INP), STATE1 + call _aesni_dec1 + movups STATE1, (OUTP) + sub $16, LEN + add $16, INP + add $16, OUTP + cmp $16, LEN + jge .Lecb_dec_loop1 +.Lecb_dec_ret: + ret + +/* + * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, + * size_t len, u8 *iv) + */ +ENTRY(aesni_cbc_enc) + cmp $16, LEN + jb .Lcbc_enc_ret + mov 480(KEYP), KLEN + movups (IVP), STATE # load iv as initial state +.align 4 +.Lcbc_enc_loop: + movups (INP), IN # load input + pxor IN, STATE + call _aesni_enc1 + movups STATE, (OUTP) # store output + sub $16, LEN + add $16, INP + add $16, OUTP + cmp $16, LEN + jge .Lcbc_enc_loop + movups STATE, (IVP) +.Lcbc_enc_ret: + ret + +/* + * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, + * size_t len, u8 *iv) + */ +ENTRY(aesni_cbc_dec) + cmp $16, LEN + jb .Lcbc_dec_ret + mov 480(KEYP), KLEN + add $240, KEYP + movups (IVP), IV + cmp $64, LEN + jb .Lcbc_dec_loop1 +.align 4 +.Lcbc_dec_loop4: + movups (INP), IN1 + movaps IN1, STATE1 + movups 0x10(INP), IN2 + movaps IN2, STATE2 + movups 0x20(INP), IN3 + movaps IN3, STATE3 + movups 0x30(INP), IN4 + movaps IN4, STATE4 + call _aesni_dec4 + pxor IV, STATE1 + pxor IN1, STATE2 + pxor IN2, STATE3 + pxor IN3, STATE4 + movaps IN4, IV + movups STATE1, (OUTP) + movups STATE2, 0x10(OUTP) + movups STATE3, 0x20(OUTP) + movups STATE4, 0x30(OUTP) + sub $64, LEN + add $64, INP + add $64, OUTP + cmp $64, LEN + jge .Lcbc_dec_loop4 + cmp $16, LEN + jb .Lcbc_dec_ret +.align 4 +.Lcbc_dec_loop1: + movups (INP), IN + movaps IN, STATE + call _aesni_dec1 + pxor IV, STATE + movups STATE, (OUTP) + movaps IN, IV + sub $16, LEN + add $16, INP + add $16, OUTP + cmp $16, LEN + jge .Lcbc_dec_loop1 + movups IV, (IVP) +.Lcbc_dec_ret: + ret diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c new file mode 100644 index 00000000000..02af0af6549 --- /dev/null +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -0,0 +1,461 @@ +/* + * Support for Intel AES-NI instructions. This file contains glue + * code, the real AES implementation is in intel-aes_asm.S. + * + * Copyright (C) 2008, Intel Corp. + * Author: Huang Ying + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct async_aes_ctx { + struct cryptd_ablkcipher *cryptd_tfm; +}; + +#define AESNI_ALIGN 16 +#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) + +asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, + unsigned int key_len); +asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in); +asmlinkage void aesni_dec(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in); +asmlinkage void aesni_ecb_enc(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len); +asmlinkage void aesni_ecb_dec(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len); +asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); +asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); + +static inline int kernel_fpu_using(void) +{ + if (in_interrupt() && !(read_cr0() & X86_CR0_TS)) + return 1; + return 0; +} + +static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) +{ + unsigned long addr = (unsigned long)raw_ctx; + unsigned long align = AESNI_ALIGN; + + if (align <= crypto_tfm_ctx_alignment()) + align = 1; + return (struct crypto_aes_ctx *)ALIGN(addr, align); +} + +static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx, + const u8 *in_key, unsigned int key_len) +{ + struct crypto_aes_ctx *ctx = aes_ctx(raw_ctx); + u32 *flags = &tfm->crt_flags; + int err; + + if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 && + key_len != AES_KEYSIZE_256) { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + if (kernel_fpu_using()) + err = crypto_aes_expand_key(ctx, in_key, key_len); + else { + kernel_fpu_begin(); + err = aesni_set_key(ctx, in_key, key_len); + kernel_fpu_end(); + } + + return err; +} + +static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + return aes_set_key_common(tfm, crypto_tfm_ctx(tfm), in_key, key_len); +} + +static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); + + if (kernel_fpu_using()) + crypto_aes_encrypt_x86(ctx, dst, src); + else { + kernel_fpu_begin(); + aesni_enc(ctx, dst, src); + kernel_fpu_end(); + } +} + +static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); + + if (kernel_fpu_using()) + crypto_aes_decrypt_x86(ctx, dst, src); + else { + kernel_fpu_begin(); + aesni_dec(ctx, dst, src); + kernel_fpu_end(); + } +} + +static struct crypto_alg aesni_alg = { + .cra_name = "aes", + .cra_driver_name = "aes-aesni", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(aesni_alg.cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = AES_MIN_KEY_SIZE, + .cia_max_keysize = AES_MAX_KEY_SIZE, + .cia_setkey = aes_set_key, + .cia_encrypt = aes_encrypt, + .cia_decrypt = aes_decrypt + } + } +}; + +static int ecb_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_fpu_begin(); + while ((nbytes = walk.nbytes)) { + aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, + nbytes & AES_BLOCK_MASK); + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + kernel_fpu_end(); + + return err; +} + +static int ecb_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_fpu_begin(); + while ((nbytes = walk.nbytes)) { + aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, + nbytes & AES_BLOCK_MASK); + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + kernel_fpu_end(); + + return err; +} + +static struct crypto_alg blk_ecb_alg = { + .cra_name = "__ecb-aes-aesni", + .cra_driver_name = "__driver-ecb-aes-aesni", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = aes_set_key, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}; + +static int cbc_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_fpu_begin(); + while ((nbytes = walk.nbytes)) { + aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, + nbytes & AES_BLOCK_MASK, walk.iv); + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + kernel_fpu_end(); + + return err; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_fpu_begin(); + while ((nbytes = walk.nbytes)) { + aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, + nbytes & AES_BLOCK_MASK, walk.iv); + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + kernel_fpu_end(); + + return err; +} + +static struct crypto_alg blk_cbc_alg = { + .cra_name = "__cbc-aes-aesni", + .cra_driver_name = "__driver-cbc-aes-aesni", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = aes_set_key, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}; + +static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); + + return crypto_ablkcipher_setkey(&ctx->cryptd_tfm->base, key, key_len); +} + +static int ablk_encrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); + + if (kernel_fpu_using()) { + struct ablkcipher_request *cryptd_req = + ablkcipher_request_ctx(req); + memcpy(cryptd_req, req, sizeof(*req)); + ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); + return crypto_ablkcipher_encrypt(cryptd_req); + } else { + struct blkcipher_desc desc; + desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); + desc.info = req->info; + desc.flags = 0; + return crypto_blkcipher_crt(desc.tfm)->encrypt( + &desc, req->dst, req->src, req->nbytes); + } +} + +static int ablk_decrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); + + if (kernel_fpu_using()) { + struct ablkcipher_request *cryptd_req = + ablkcipher_request_ctx(req); + memcpy(cryptd_req, req, sizeof(*req)); + ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); + return crypto_ablkcipher_decrypt(cryptd_req); + } else { + struct blkcipher_desc desc; + desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); + desc.info = req->info; + desc.flags = 0; + return crypto_blkcipher_crt(desc.tfm)->decrypt( + &desc, req->dst, req->src, req->nbytes); + } +} + +static void ablk_exit(struct crypto_tfm *tfm) +{ + struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm); + + cryptd_free_ablkcipher(ctx->cryptd_tfm); +} + +static void ablk_init_common(struct crypto_tfm *tfm, + struct cryptd_ablkcipher *cryptd_tfm) +{ + struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->cryptd_tfm = cryptd_tfm; + tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + + crypto_ablkcipher_reqsize(&cryptd_tfm->base); +} + +static int ablk_ecb_init(struct crypto_tfm *tfm) +{ + struct cryptd_ablkcipher *cryptd_tfm; + + cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ecb-aes-aesni", 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ablk_init_common(tfm, cryptd_tfm); + return 0; +} + +static struct crypto_alg ablk_ecb_alg = { + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_aes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ablk_ecb_alg.cra_list), + .cra_init = ablk_ecb_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}; + +static int ablk_cbc_init(struct crypto_tfm *tfm) +{ + struct cryptd_ablkcipher *cryptd_tfm; + + cryptd_tfm = cryptd_alloc_ablkcipher("__driver-cbc-aes-aesni", 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ablk_init_common(tfm, cryptd_tfm); + return 0; +} + +static struct crypto_alg ablk_cbc_alg = { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_aes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ablk_cbc_alg.cra_list), + .cra_init = ablk_cbc_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}; + +static int __init aesni_init(void) +{ + int err; + + if (!cpu_has_aes) { + printk(KERN_ERR "Intel AES-NI instructions are not detected.\n"); + return -ENODEV; + } + if ((err = crypto_register_alg(&aesni_alg))) + goto aes_err; + if ((err = crypto_register_alg(&blk_ecb_alg))) + goto blk_ecb_err; + if ((err = crypto_register_alg(&blk_cbc_alg))) + goto blk_cbc_err; + if ((err = crypto_register_alg(&ablk_ecb_alg))) + goto ablk_ecb_err; + if ((err = crypto_register_alg(&ablk_cbc_alg))) + goto ablk_cbc_err; + + return err; + +ablk_cbc_err: + crypto_unregister_alg(&ablk_ecb_alg); +ablk_ecb_err: + crypto_unregister_alg(&blk_cbc_alg); +blk_cbc_err: + crypto_unregister_alg(&blk_ecb_alg); +blk_ecb_err: + crypto_unregister_alg(&aesni_alg); +aes_err: + return err; +} + +static void __exit aesni_exit(void) +{ + crypto_unregister_alg(&ablk_cbc_alg); + crypto_unregister_alg(&ablk_ecb_alg); + crypto_unregister_alg(&blk_cbc_alg); + crypto_unregister_alg(&blk_ecb_alg); + crypto_unregister_alg(&aesni_alg); +} + +module_init(aesni_init); +module_exit(aesni_exit); + +MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, Intel AES-NI instructions optimized"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("aes"); diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 7301e60dc4a..0beba0d1468 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -213,6 +213,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM) #define cpu_has_xmm2 boot_cpu_has(X86_FEATURE_XMM2) #define cpu_has_xmm3 boot_cpu_has(X86_FEATURE_XMM3) +#define cpu_has_aes boot_cpu_has(X86_FEATURE_AES) #define cpu_has_ht boot_cpu_has(X86_FEATURE_HT) #define cpu_has_mp boot_cpu_has(X86_FEATURE_MP) #define cpu_has_nx boot_cpu_has(X86_FEATURE_NX) diff --git a/crypto/Kconfig b/crypto/Kconfig index 8dde4fcf99c..a83ce0462b6 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -470,6 +470,31 @@ config CRYPTO_AES_X86_64 See for more information. +config CRYPTO_AES_NI_INTEL + tristate "AES cipher algorithms (AES-NI)" + depends on (X86 || UML_X86) && 64BIT + select CRYPTO_AES_X86_64 + select CRYPTO_CRYPTD + select CRYPTO_ALGAPI + help + Use Intel AES-NI instructions for AES algorithm. + + AES cipher algorithms (FIPS-197). AES uses the Rijndael + algorithm. + + Rijndael appears to be consistently a very good performer in + both hardware and software across a wide range of computing + environments regardless of its use in feedback or non-feedback + modes. Its key setup time is excellent, and its key agility is + good. Rijndael's very low memory requirements make it very well + suited for restricted-space environments, in which it also + demonstrates excellent performance. Rijndael's operations are + among the easiest to defend against power and timing attacks. + + The AES specifies three key sizes: 128, 192 and 256 bits + + See for more information. + config CRYPTO_ANUBIS tristate "Anubis cipher algorithm" select CRYPTO_ALGAPI -- cgit v1.2.3-70-g09d2 From f2dbcfa738368c8a40d4a5f0b65dc9879577cb21 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 18 Feb 2009 14:48:32 -0800 Subject: mm: clean up for early_pfn_to_nid() What's happening is that the assertion in mm/page_alloc.c:move_freepages() is triggering: BUG_ON(page_zone(start_page) != page_zone(end_page)); Once I knew this is what was happening, I added some annotations: if (unlikely(page_zone(start_page) != page_zone(end_page))) { printk(KERN_ERR "move_freepages: Bogus zones: " "start_page[%p] end_page[%p] zone[%p]\n", start_page, end_page, zone); printk(KERN_ERR "move_freepages: " "start_zone[%p] end_zone[%p]\n", page_zone(start_page), page_zone(end_page)); printk(KERN_ERR "move_freepages: " "start_pfn[0x%lx] end_pfn[0x%lx]\n", page_to_pfn(start_page), page_to_pfn(end_page)); printk(KERN_ERR "move_freepages: " "start_nid[%d] end_nid[%d]\n", page_to_nid(start_page), page_to_nid(end_page)); ... And here's what I got: move_freepages: Bogus zones: start_page[2207d0000] end_page[2207dffc0] zone[fffff8103effcb00] move_freepages: start_zone[fffff8103effcb00] end_zone[fffff8003fffeb00] move_freepages: start_pfn[0x81f600] end_pfn[0x81f7ff] move_freepages: start_nid[1] end_nid[0] My memory layout on this box is: [ 0.000000] Zone PFN ranges: [ 0.000000] Normal 0x00000000 -> 0x0081ff5d [ 0.000000] Movable zone start PFN for each node [ 0.000000] early_node_map[8] active PFN ranges [ 0.000000] 0: 0x00000000 -> 0x00020000 [ 0.000000] 1: 0x00800000 -> 0x0081f7ff [ 0.000000] 1: 0x0081f800 -> 0x0081fe50 [ 0.000000] 1: 0x0081fed1 -> 0x0081fed8 [ 0.000000] 1: 0x0081feda -> 0x0081fedb [ 0.000000] 1: 0x0081fedd -> 0x0081fee5 [ 0.000000] 1: 0x0081fee7 -> 0x0081ff51 [ 0.000000] 1: 0x0081ff59 -> 0x0081ff5d So it's a block move in that 0x81f600-->0x81f7ff region which triggers the problem. This patch: Declaration of early_pfn_to_nid() is scattered over per-arch include files, and it seems it's complicated to know when the declaration is used. I think it makes fix-for-memmap-init not easy. This patch moves all declaration to include/linux/mm.h After this, if !CONFIG_NODES_POPULATES_NODE_MAP && !CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -> Use static definition in include/linux/mm.h else if !CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -> Use generic definition in mm/page_alloc.c else -> per-arch back end function will be called. Signed-off-by: KAMEZAWA Hiroyuki Tested-by: KOSAKI Motohiro Reported-by: David Miller Cc: Mel Gorman Cc: Heiko Carstens Cc: [2.6.25.x, 2.6.26.x, 2.6.27.x, 2.6.28.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/include/asm/mmzone.h | 4 ---- arch/ia64/mm/numa.c | 2 +- arch/x86/include/asm/mmzone_32.h | 2 -- arch/x86/include/asm/mmzone_64.h | 2 -- arch/x86/mm/numa_64.c | 2 +- include/linux/mm.h | 19 ++++++++++++++++--- mm/page_alloc.c | 8 +++++++- 7 files changed, 25 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/include/asm/mmzone.h b/arch/ia64/include/asm/mmzone.h index 34efe88eb84..f2ca32069b3 100644 --- a/arch/ia64/include/asm/mmzone.h +++ b/arch/ia64/include/asm/mmzone.h @@ -31,10 +31,6 @@ static inline int pfn_to_nid(unsigned long pfn) #endif } -#ifdef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -extern int early_pfn_to_nid(unsigned long pfn); -#endif - #ifdef CONFIG_IA64_DIG /* DIG systems are small */ # define MAX_PHYSNODE_ID 8 # define NR_NODE_MEMBLKS (MAX_NUMNODES * 8) diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index b73bf1838e5..5061c3fb679 100644 --- a/arch/ia64/mm/numa.c +++ b/arch/ia64/mm/numa.c @@ -58,7 +58,7 @@ paddr_to_nid(unsigned long paddr) * SPARSEMEM to allocate the SPARSEMEM sectionmap on the NUMA node where * the section resides. */ -int early_pfn_to_nid(unsigned long pfn) +int __meminit __early_pfn_to_nid(unsigned long pfn) { int i, section = pfn >> PFN_SECTION_SHIFT, ssec, esec; diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index 07f1af494ca..105fb90a063 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h @@ -32,8 +32,6 @@ static inline void get_memcfg_numa(void) get_memcfg_numa_flat(); } -extern int early_pfn_to_nid(unsigned long pfn); - extern void resume_map_numa_kva(pgd_t *pgd); #else /* !CONFIG_NUMA */ diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h index a5b3817d4b9..a29f48c2a32 100644 --- a/arch/x86/include/asm/mmzone_64.h +++ b/arch/x86/include/asm/mmzone_64.h @@ -40,8 +40,6 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) #define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ NODE_DATA(nid)->node_spanned_pages) -extern int early_pfn_to_nid(unsigned long pfn); - #ifdef CONFIG_NUMA_EMU #define FAKE_NODE_MIN_SIZE (64 * 1024 * 1024) #define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 71a14f89f89..f3516da035d 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -145,7 +145,7 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes, return shift; } -int early_pfn_to_nid(unsigned long pfn) +int __meminit __early_pfn_to_nid(unsigned long pfn) { return phys_to_nid(pfn << PAGE_SHIFT); } diff --git a/include/linux/mm.h b/include/linux/mm.h index 10074212a35..065cdf8c09f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1041,10 +1041,23 @@ extern void free_bootmem_with_active_regions(int nid, typedef int (*work_fn_t)(unsigned long, unsigned long, void *); extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); extern void sparse_memory_present_with_active_regions(int nid); -#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -extern int early_pfn_to_nid(unsigned long pfn); -#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ + +#if !defined(CONFIG_ARCH_POPULATES_NODE_MAP) && \ + !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) +static inline int __early_pfn_to_nid(unsigned long pfn) +{ + return 0; +} +#else +/* please see mm/page_alloc.c */ +extern int __meminit early_pfn_to_nid(unsigned long pfn); +#ifdef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID +/* there is a per-arch backend function. */ +extern int __meminit __early_pfn_to_nid(unsigned long pfn); +#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ +#endif + extern void set_dma_reserve(unsigned long new_dma_reserve); extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long, enum memmap_context); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5675b307385..c5dd74602ef 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2989,7 +2989,7 @@ static int __meminit next_active_region_index_in_nid(int index, int nid) * was used and there are no special requirements, this is a convenient * alternative */ -int __meminit early_pfn_to_nid(unsigned long pfn) +int __meminit __early_pfn_to_nid(unsigned long pfn) { int i; @@ -3005,6 +3005,12 @@ int __meminit early_pfn_to_nid(unsigned long pfn) } #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ +int __meminit early_pfn_to_nid(unsigned long pfn) +{ + return __early_pfn_to_nid(pfn); +} + + /* Basic iterator support to walk early_node_map[] */ #define for_each_active_range_index_in_nid(i, nid) \ for (i = first_active_region_index_in_nid(nid); i != -1; \ -- cgit v1.2.3-70-g09d2 From 48ffc70b675aa7798a52a2e92e20f6cce9140b3d Mon Sep 17 00:00:00 2001 From: Alok N Kataria Date: Wed, 18 Feb 2009 12:33:55 -0800 Subject: x86, vmi: TSC going backwards check in vmi clocksource Impact: fix time warps under vmware Similar to the check for TSC going backwards in the TSC clocksource, we also need this check for VMI clocksource. Signed-off-by: Alok N Kataria Cc: Zachary Amsden Signed-off-by: Ingo Molnar Cc: stable@kernel.org --- arch/x86/kernel/vmiclock_32.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index c4c1f9e0940..bde106cae0a 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -283,10 +283,13 @@ void __devinit vmi_time_ap_init(void) #endif /** vmi clocksource */ +static struct clocksource clocksource_vmi; static cycle_t read_real_cycles(void) { - return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); + cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); + return ret >= clocksource_vmi.cycle_last ? + ret : clocksource_vmi.cycle_last; } static struct clocksource clocksource_vmi = { -- cgit v1.2.3-70-g09d2 From 07a66d7c53a538e1a9759954a82bb6c07365eff9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 20 Feb 2009 08:04:13 +0100 Subject: x86: use the right protections for split-up pagetables Steven Rostedt found a bug in where in his modified kernel ftrace was unable to modify the kernel text, due to the PMD itself having been marked read-only as well in split_large_page(). The fix, suggested by Linus, is to not try to 'clone' the reference protection of a huge-page, but to use the standard (and permissive) page protection bits of KERNPG_TABLE. The 'cloning' makes sense for the ptes but it's a confused and incorrect concept at the page table level - because the pagetable entry is a set of all ptes and hence cannot 'clone' any single protection attribute - the ptes can be any mixture of protections. With the permissive KERNPG_TABLE, even if the pte protections get changed after this point (due to ftrace doing code-patching or other similar activities like kprobes), the resulting combined protections will still be correct and the pte's restrictive (or permissive) protections will control it. Also update the comment. This bug was there for a long time but has not caused visible problems before as it needs a rather large read-only area to trigger. Steve possibly hacked his kernel with some really large arrays or so. Anyway, the bug is definitely worth fixing. [ Huang Ying also experienced problems in this area when writing the EFI code, but the real bug in split_large_page() was not realized back then. ] Reported-by: Steven Rostedt Reported-by: Huang Ying Acked-by: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 8ca0d8566fc..7be47d1a97e 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -508,18 +508,13 @@ static int split_large_page(pte_t *kpte, unsigned long address) #endif /* - * Install the new, split up pagetable. Important details here: + * Install the new, split up pagetable. * - * On Intel the NX bit of all levels must be cleared to make a - * page executable. See section 4.13.2 of Intel 64 and IA-32 - * Architectures Software Developer's Manual). - * - * Mark the entry present. The current mapping might be - * set to not present, which we preserved above. + * We use the standard kernel pagetable protections for the new + * pagetable protections, the actual ptes set above control the + * primary protection behavior: */ - ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte))); - pgprot_val(ref_prot) |= _PAGE_PRESENT; - __set_pmd_pte(kpte, address, mk_pte(base, ref_prot)); + __set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE))); base = NULL; out_unlock: -- cgit v1.2.3-70-g09d2 From cc3ca22063784076bd240fda87217387a8f2ae92 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 20 Feb 2009 23:35:51 -0800 Subject: x86, mce: remove incorrect __cpuinit for mce_cpu_features() Impact: Bug fix on UP Checkin 6ec68bff3c81e776a455f6aca95c8c5f1d630198: x86, mce: reinitialize per cpu features on resume introduced a call to mce_cpu_features() in the resume path, in order for the MCE machinery to get properly reinitialized after a resume. However, this function (and its successors) was flagged __cpuinit, which becomes __init on UP configurations (on SMP suspend/resume requires CPU hotplug and so this would not be seen.) Remove the offending __cpuinit annotations for mce_cpu_features() and its successor functions. Cc: Andi Kleen Cc: Linus Torvalds Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce_64.c | 2 +- arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 2 +- arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 25cf624eccb..fe79985ce0f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -490,7 +490,7 @@ static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) } -static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) +static void mce_cpu_features(struct cpuinfo_x86 *c) { switch (c->x86_vendor) { case X86_VENDOR_INTEL: diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 8ae8c4ff094..f2ee0ae29bd 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -121,7 +121,7 @@ static long threshold_restart_bank(void *_tr) } /* cpu init entry point, called from mce.c with preempt off */ -void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) +void mce_amd_feature_init(struct cpuinfo_x86 *c) { unsigned int bank, block; unsigned int cpu = smp_processor_id(); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index 4b48f251fd3..f44c3662436 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -30,7 +30,7 @@ asmlinkage void smp_thermal_interrupt(void) irq_exit(); } -static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c) +static void intel_init_thermal(struct cpuinfo_x86 *c) { u32 l, h; int tm2 = 0; @@ -84,7 +84,7 @@ static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c) return; } -void __cpuinit mce_intel_feature_init(struct cpuinfo_x86 *c) +void mce_intel_feature_init(struct cpuinfo_x86 *c) { intel_init_thermal(c); } -- cgit v1.2.3-70-g09d2 From e6bd6760c92dc8475c79c4c4a8a16ac313c0b93d Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Sun, 15 Feb 2009 22:45:49 +0100 Subject: x86_64: acpi/wakeup_64 cleanup - remove %ds re-set, it's already set in wakeup_long64 - remove double labels and alignment (ENTRY already adds both) - use meaningful resume point labelname - skip alignment while jumping from wakeup_long64 to the resume point - remove .size, .type and unused labels [v2] - added ENDPROCs Signed-off-by: Jiri Slaby Acked-by: Cyrill Gorcunov Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki Signed-off-by: Len Brown --- arch/x86/kernel/acpi/wakeup_64.S | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index bcc293423a7..b5dee6a0de3 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -13,7 +13,6 @@ * Hooray, we are in Long 64-bit mode (but still running in low memory) */ ENTRY(wakeup_long64) -wakeup_long64: movq saved_magic, %rax movq $0x123456789abcdef0, %rdx cmpq %rdx, %rax @@ -34,16 +33,12 @@ wakeup_long64: movq saved_rip, %rax jmp *%rax +ENDPROC(wakeup_long64) bogus_64_magic: jmp bogus_64_magic - .align 2 - .p2align 4,,15 -.globl do_suspend_lowlevel - .type do_suspend_lowlevel,@function -do_suspend_lowlevel: -.LFB5: +ENTRY(do_suspend_lowlevel) subq $8, %rsp xorl %eax, %eax call save_processor_state @@ -67,7 +62,7 @@ do_suspend_lowlevel: pushfq popq pt_regs_flags(%rax) - movq $.L97, saved_rip(%rip) + movq $resume_point, saved_rip(%rip) movq %rsp, saved_rsp movq %rbp, saved_rbp @@ -79,13 +74,9 @@ do_suspend_lowlevel: movl $3, %edi xorl %eax, %eax jmp acpi_enter_sleep_state -.L97: - .p2align 4,,7 -.L99: - .align 4 - movl $24, %eax - movw %ax, %ds + .align 4 +resume_point: /* We don't restore %rax, it must be 0 anyway */ movq $saved_context, %rax movq saved_context_cr4(%rax), %rbx @@ -117,12 +108,9 @@ do_suspend_lowlevel: xorl %eax, %eax addq $8, %rsp jmp restore_processor_state -.LFE5: -.Lfe5: - .size do_suspend_lowlevel, .Lfe5-do_suspend_lowlevel - +ENDPROC(do_suspend_lowlevel) + .data -ALIGN ENTRY(saved_rbp) .quad 0 ENTRY(saved_rsi) .quad 0 ENTRY(saved_rdi) .quad 0 -- cgit v1.2.3-70-g09d2 From 6defa2fe2019f3729933516fba5cfd75eecd07de Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Sun, 15 Feb 2009 22:46:45 +0100 Subject: x86_64: Fix S3 fail path As acpi_enter_sleep_state can fail, take this into account in do_suspend_lowlevel and don't return to the do_suspend_lowlevel's caller. This would break (currently) fpu status and preempt count. Technically, this means use `call' instead of `jmp' and `jmp' to the `resume_point' after the `call' (i.e. if acpi_enter_sleep_state returns=fails). `resume_point' will handle the restore of fpu and preempt count gracefully. Signed-off-by: Jiri Slaby Signed-off-by: Rafael J. Wysocki Signed-off-by: Len Brown --- arch/x86/kernel/acpi/wakeup_64.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index b5dee6a0de3..96258d9dc97 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -73,7 +73,9 @@ ENTRY(do_suspend_lowlevel) addq $8, %rsp movl $3, %edi xorl %eax, %eax - jmp acpi_enter_sleep_state + call acpi_enter_sleep_state + /* in case something went wrong, restore the machine status and go on */ + jmp resume_point .align 4 resume_point: -- cgit v1.2.3-70-g09d2 From b98103a5597b87211a1c74077b06faeac554bedc Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Sat, 21 Feb 2009 00:09:47 +0100 Subject: x86: hpet: print HPET registers during setup (if hpet=verbose is used) Signed-off-by: Andreas Herrmann Cc: Mark Hounschell Cc: Borislav Petkov Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 4 +++- arch/x86/kernel/hpet.c | 45 +++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index b182626739e..01379a82264 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -492,10 +492,12 @@ and is between 256 and 4096 characters. It is defined in the file Default: 64 hpet= [X86-32,HPET] option to control HPET usage - Format: { enable (default) | disable | force } + Format: { enable (default) | disable | force | + verbose } disable: disable HPET and use PIT instead force: allow force enabled of undocumented chips (ICH4, VIA, nVidia) + verbose: show contents of HPET registers during setup com20020= [HW,NET] ARCnet - COM20020 chipset Format: diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index a00545fe5cd..1d86ca3c1f9 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -80,6 +80,7 @@ static inline void hpet_clear_mapping(void) */ static int boot_hpet_disable; int hpet_force_user; +static int hpet_verbose; static int __init hpet_setup(char *str) { @@ -88,6 +89,8 @@ static int __init hpet_setup(char *str) boot_hpet_disable = 1; if (!strncmp("force", str, 5)) hpet_force_user = 1; + if (!strncmp("verbose", str, 7)) + hpet_verbose = 1; } return 1; } @@ -119,6 +122,43 @@ int is_hpet_enabled(void) } EXPORT_SYMBOL_GPL(is_hpet_enabled); +static void _hpet_print_config(const char *function, int line) +{ + u32 i, timers, l, h; + printk(KERN_INFO "hpet: %s(%d):\n", function, line); + l = hpet_readl(HPET_ID); + h = hpet_readl(HPET_PERIOD); + timers = ((l & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; + printk(KERN_INFO "hpet: ID: 0x%x, PERIOD: 0x%x\n", l, h); + l = hpet_readl(HPET_CFG); + h = hpet_readl(HPET_STATUS); + printk(KERN_INFO "hpet: CFG: 0x%x, STATUS: 0x%x\n", l, h); + l = hpet_readl(HPET_COUNTER); + h = hpet_readl(HPET_COUNTER+4); + printk(KERN_INFO "hpet: COUNTER_l: 0x%x, COUNTER_h: 0x%x\n", l, h); + + for (i = 0; i < timers; i++) { + l = hpet_readl(HPET_Tn_CFG(i)); + h = hpet_readl(HPET_Tn_CFG(i)+4); + printk(KERN_INFO "hpet: T%d: CFG_l: 0x%x, CFG_h: 0x%x\n", + i, l, h); + l = hpet_readl(HPET_Tn_CMP(i)); + h = hpet_readl(HPET_Tn_CMP(i)+4); + printk(KERN_INFO "hpet: T%d: CMP_l: 0x%x, CMP_h: 0x%x\n", + i, l, h); + l = hpet_readl(HPET_Tn_ROUTE(i)); + h = hpet_readl(HPET_Tn_ROUTE(i)+4); + printk(KERN_INFO "hpet: T%d ROUTE_l: 0x%x, ROUTE_h: 0x%x\n", + i, l, h); + } +} + +#define hpet_print_config() \ +do { \ + if (hpet_verbose) \ + _hpet_print_config(__FUNCTION__, __LINE__); \ +} while (0) + /* * When the hpet driver (/dev/hpet) is enabled, we need to reserve * timer 0 and timer 1 in case of RTC emulation. @@ -282,6 +322,7 @@ static void hpet_set_mode(enum clock_event_mode mode, hpet_writel(cmp, HPET_Tn_CMP(timer)); udelay(1); hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer)); + hpet_print_config(); break; case CLOCK_EVT_MODE_ONESHOT: @@ -308,6 +349,7 @@ static void hpet_set_mode(enum clock_event_mode mode, irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); enable_irq(hdev->irq); } + hpet_print_config(); break; } } @@ -526,6 +568,7 @@ static void hpet_msi_capability_lookup(unsigned int start_timer) num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT); num_timers++; /* Value read out starts from 0 */ + hpet_print_config(); hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL); if (!hpet_devs) @@ -793,6 +836,7 @@ int __init hpet_enable(void) * information and the number of channels */ id = hpet_readl(HPET_ID); + hpet_print_config(); #ifdef CONFIG_HPET_EMULATE_RTC /* @@ -845,6 +889,7 @@ static __init int hpet_late_init(void) return -ENODEV; hpet_reserve_platform_timers(hpet_readl(HPET_ID)); + hpet_print_config(); for_each_online_cpu(cpu) { hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu); -- cgit v1.2.3-70-g09d2 From 8d6f0c8214928f7c5083dd54ecb69c5d615b516e Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Sat, 21 Feb 2009 00:10:44 +0100 Subject: x86: hpet: provide separate functions to stop and start the counter By splitting up existing hpet_start_counter function. Signed-off-by: Andreas Herrmann Cc: Mark Hounschell Cc: Borislav Petkov Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 1d86ca3c1f9..7ae1f1e8b9b 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -231,27 +231,37 @@ static struct clock_event_device hpet_clockevent = { .rating = 50, }; -static void hpet_start_counter(void) +static void hpet_stop_counter(void) { unsigned long cfg = hpet_readl(HPET_CFG); - cfg &= ~HPET_CFG_ENABLE; hpet_writel(cfg, HPET_CFG); hpet_writel(0, HPET_COUNTER); hpet_writel(0, HPET_COUNTER + 4); +} + +static void hpet_start_counter(void) +{ + unsigned long cfg = hpet_readl(HPET_CFG); cfg |= HPET_CFG_ENABLE; hpet_writel(cfg, HPET_CFG); } +static void hpet_restart_counter(void) +{ + hpet_stop_counter(); + hpet_start_counter(); +} + static void hpet_resume_device(void) { force_hpet_resume(); } -static void hpet_restart_counter(void) +static void hpet_resume_counter(void) { hpet_resume_device(); - hpet_start_counter(); + hpet_restart_counter(); } static void hpet_enable_legacy_int(void) @@ -738,7 +748,7 @@ static struct clocksource clocksource_hpet = { .mask = HPET_MASK, .shift = HPET_SHIFT, .flags = CLOCK_SOURCE_IS_CONTINUOUS, - .resume = hpet_restart_counter, + .resume = hpet_resume_counter, #ifdef CONFIG_X86_64 .vread = vread_hpet, #endif @@ -750,7 +760,7 @@ static int hpet_clocksource_register(void) cycle_t t1; /* Start the counter */ - hpet_start_counter(); + hpet_restart_counter(); /* Verify whether hpet counter works */ t1 = read_hpet(); -- cgit v1.2.3-70-g09d2 From c23e253e67c9d8a91a0ffa33c1f571a17f0a2403 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Sat, 21 Feb 2009 00:16:35 +0100 Subject: x86: hpet: stop HPET_COUNTER when programming periodic mode Impact: fix system hang on some systems operating with HZ_1000 On a system that stalled with HZ_1000, the first value written to T0_CMP (when the main counter was not stopped) did not trigger an interrupt. Instead after the main counter wrapped around (after several minutes) an interrupt was triggered and afterwards the periodic interrupt took effect. This can be fixed by implementing HPET spec recommendation for programming the periodic mode (i.e. stopping the main counter). Signed-off-by: Andreas Herrmann Cc: Mark Hounschell Cc: Borislav Petkov Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 7ae1f1e8b9b..648b3a2a3a4 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -309,29 +309,22 @@ static int hpet_setup_msi_irq(unsigned int irq); static void hpet_set_mode(enum clock_event_mode mode, struct clock_event_device *evt, int timer) { - unsigned long cfg, cmp, now; + unsigned long cfg; uint64_t delta; switch (mode) { case CLOCK_EVT_MODE_PERIODIC: + hpet_stop_counter(); delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult; delta >>= evt->shift; - now = hpet_readl(HPET_COUNTER); - cmp = now + (unsigned long) delta; cfg = hpet_readl(HPET_Tn_CFG(timer)); /* Make sure we use edge triggered interrupts */ cfg &= ~HPET_TN_LEVEL; cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT; hpet_writel(cfg, HPET_Tn_CFG(timer)); - /* - * The first write after writing TN_SETVAL to the - * config register sets the counter value, the second - * write sets the period. - */ - hpet_writel(cmp, HPET_Tn_CMP(timer)); - udelay(1); hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer)); + hpet_start_counter(); hpet_print_config(); break; -- cgit v1.2.3-70-g09d2 From 936577c61d0c10b8929608a92c98d839b22053bc Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 22 Feb 2009 10:27:49 -0800 Subject: x86: Add IRQF_TIMER to legacy x86 timer interrupt descriptors Right now nobody cares, but the suspend/resume code will eventually want to suspend device interrupts without suspending the timer, and will depend on this flag to know. The modern x86 timer infrastructure uses the local APIC timers and never shows up as a device interrupt at all, so it isn't affected and doesn't need any of this. Cc: Rafael J. Wysocki Cc: Ingo Molnar Signed-off-by: Linus Torvalds --- arch/x86/kernel/time_64.c | 2 +- arch/x86/kernel/vmiclock_32.c | 2 +- arch/x86/mach-default/setup.c | 2 +- arch/x86/mach-voyager/setup.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index e6e695acd72..241ec3923f6 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -115,7 +115,7 @@ unsigned long __init calibrate_cpu(void) static struct irqaction irq0 = { .handler = timer_interrupt, - .flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING, + .flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING | IRQF_TIMER, .mask = CPU_MASK_NONE, .name = "timer" }; diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index bde106cae0a..e5b088fffa4 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -202,7 +202,7 @@ static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id) static struct irqaction vmi_clock_action = { .name = "vmi-timer", .handler = vmi_timer_interrupt, - .flags = IRQF_DISABLED | IRQF_NOBALANCING, + .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER, .mask = CPU_MASK_ALL, }; diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index a265a7c6319..50b59187112 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c @@ -96,7 +96,7 @@ void __init trap_init_hook(void) static struct irqaction irq0 = { .handler = timer_interrupt, - .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL, + .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER, .mask = CPU_MASK_NONE, .name = "timer" }; diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c index d914a7996a6..8e5118371f0 100644 --- a/arch/x86/mach-voyager/setup.c +++ b/arch/x86/mach-voyager/setup.c @@ -56,7 +56,7 @@ void __init trap_init_hook(void) static struct irqaction irq0 = { .handler = timer_interrupt, - .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL, + .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER, .mask = CPU_MASK_NONE, .name = "timer" }; -- cgit v1.2.3-70-g09d2 From 770824bdc421ff58a64db608294323571c949f4c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 22 Feb 2009 18:38:50 +0100 Subject: PM: Split up sysdev_[suspend|resume] from device_power_[down|up] Move the sysdev_suspend/resume from the callee to the callers, with no real change in semantics, so that we can rework the disabling of interrupts during suspend/hibernation. This is based on an earlier patch from Linus. Signed-off-by: Rafael J. Wysocki Signed-off-by: Linus Torvalds --- arch/x86/kernel/apm_32.c | 4 ++++ drivers/base/base.h | 2 -- drivers/base/power/main.c | 3 --- drivers/xen/manage.c | 8 ++++++++ include/linux/pm.h | 2 ++ kernel/kexec.c | 7 +++++++ kernel/power/disk.c | 11 +++++++++++ kernel/power/main.c | 8 ++++++-- 8 files changed, 38 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 98807bb095a..266ec6c18b6 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -1192,6 +1192,7 @@ static int suspend(int vetoable) device_suspend(PMSG_SUSPEND); local_irq_disable(); device_power_down(PMSG_SUSPEND); + sysdev_suspend(PMSG_SUSPEND); local_irq_enable(); @@ -1208,6 +1209,7 @@ static int suspend(int vetoable) if (err != APM_SUCCESS) apm_error("suspend", err); err = (err == APM_SUCCESS) ? 0 : -EIO; + sysdev_resume(); device_power_up(PMSG_RESUME); local_irq_enable(); device_resume(PMSG_RESUME); @@ -1228,6 +1230,7 @@ static void standby(void) local_irq_disable(); device_power_down(PMSG_SUSPEND); + sysdev_suspend(PMSG_SUSPEND); local_irq_enable(); err = set_system_power_state(APM_STATE_STANDBY); @@ -1235,6 +1238,7 @@ static void standby(void) apm_error("standby", err); local_irq_disable(); + sysdev_resume(); device_power_up(PMSG_RESUME); local_irq_enable(); } diff --git a/drivers/base/base.h b/drivers/base/base.h index 0a5f055dffb..9f50f1b545d 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -88,8 +88,6 @@ extern void driver_detach(struct device_driver *drv); extern int driver_probe_device(struct device_driver *drv, struct device *dev); extern void sysdev_shutdown(void); -extern int sysdev_suspend(pm_message_t state); -extern int sysdev_resume(void); extern char *make_class_name(const char *name, struct kobject *kobj); diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 670c9d6c140..2d14f4ae6c0 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -333,7 +333,6 @@ static void dpm_power_up(pm_message_t state) */ void device_power_up(pm_message_t state) { - sysdev_resume(); dpm_power_up(state); } EXPORT_SYMBOL_GPL(device_power_up); @@ -577,8 +576,6 @@ int device_power_down(pm_message_t state) } dev->power.status = DPM_OFF_IRQ; } - if (!error) - error = sysdev_suspend(state); if (error) dpm_power_up(resume_event(state)); return error; diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 9b91617b958..56892a142ee 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -45,6 +45,13 @@ static int xen_suspend(void *data) err); return err; } + err = sysdev_suspend(PMSG_SUSPEND); + if (err) { + printk(KERN_ERR "xen_suspend: sysdev_suspend failed: %d\n", + err); + device_power_up(PMSG_RESUME); + return err; + } xen_mm_pin_all(); gnttab_suspend(); @@ -61,6 +68,7 @@ static int xen_suspend(void *data) gnttab_resume(); xen_mm_unpin_all(); + sysdev_resume(); device_power_up(PMSG_RESUME); if (!*cancelled) { diff --git a/include/linux/pm.h b/include/linux/pm.h index de2e0a8f672..24ba5f67b3a 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -381,10 +381,12 @@ struct dev_pm_info { #ifdef CONFIG_PM_SLEEP extern void device_pm_lock(void); +extern int sysdev_resume(void); extern void device_power_up(pm_message_t state); extern void device_resume(pm_message_t state); extern void device_pm_unlock(void); +extern int sysdev_suspend(pm_message_t state); extern int device_power_down(pm_message_t state); extern int device_suspend(pm_message_t state); extern int device_prepare_suspend(pm_message_t state); diff --git a/kernel/kexec.c b/kernel/kexec.c index 8a6d7b08864..48389957825 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1465,6 +1465,11 @@ int kernel_kexec(void) error = device_power_down(PMSG_FREEZE); if (error) goto Enable_irqs; + + /* Suspend system devices */ + error = sysdev_suspend(PMSG_FREEZE); + if (error) + goto Power_up_devices; } else #endif { @@ -1477,6 +1482,8 @@ int kernel_kexec(void) #ifdef CONFIG_KEXEC_JUMP if (kexec_image->preserve_context) { + sysdev_resume(); + Power_up_devices: device_power_up(PMSG_RESTORE); Enable_irqs: local_irq_enable(); diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 7b40e94b1d4..4a4a206b197 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -227,6 +227,12 @@ static int create_image(int platform_mode) "aborting hibernation\n"); goto Enable_irqs; } + sysdev_suspend(PMSG_FREEZE); + if (error) { + printk(KERN_ERR "PM: Some devices failed to power down, " + "aborting hibernation\n"); + goto Power_up_devices; + } if (hibernation_test(TEST_CORE)) goto Power_up; @@ -242,9 +248,11 @@ static int create_image(int platform_mode) if (!in_suspend) platform_leave(platform_mode); Power_up: + sysdev_resume(); /* NOTE: device_power_up() is just a resume() for devices * that suspended with irqs off ... no overall powerup. */ + Power_up_devices: device_power_up(in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); Enable_irqs: @@ -335,6 +343,7 @@ static int resume_target_kernel(void) "aborting resume\n"); goto Enable_irqs; } + sysdev_suspend(PMSG_QUIESCE); /* We'll ignore saved state, but this gets preempt count (etc) right */ save_processor_state(); error = restore_highmem(); @@ -357,6 +366,7 @@ static int resume_target_kernel(void) swsusp_free(); restore_processor_state(); touch_softlockup_watchdog(); + sysdev_resume(); device_power_up(PMSG_RECOVER); Enable_irqs: local_irq_enable(); @@ -440,6 +450,7 @@ int hibernation_platform_enter(void) local_irq_disable(); error = device_power_down(PMSG_HIBERNATE); if (!error) { + sysdev_suspend(PMSG_HIBERNATE); hibernation_ops->enter(); /* We should never get here */ while (1); diff --git a/kernel/power/main.c b/kernel/power/main.c index b4d219016b6..c9632f841f6 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -298,8 +298,12 @@ static int suspend_enter(suspend_state_t state) goto Done; } - if (!suspend_test(TEST_CORE)) - error = suspend_ops->enter(state); + error = sysdev_suspend(PMSG_SUSPEND); + if (!error) { + if (!suspend_test(TEST_CORE)) + error = suspend_ops->enter(state); + sysdev_resume(); + } device_power_up(PMSG_RESUME); Done: -- cgit v1.2.3-70-g09d2 From ddf9499b3d1e655f212f22b0a703506fcac90b25 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Mon, 23 Feb 2009 21:34:47 +0000 Subject: x86, Voyager: fix compile by lifting the degeneracy of phys_cpu_present_map This was changed to a physmap_t giving a clashing symbol redefinition, but actually using a physmap_t consumes rather a lot of space on x86, so stick with a private copy renamed with a voyager_ prefix and made static. Nothing outside of the Voyager code uses it, anyway. Signed-off-by: James Bottomley Signed-off-by: H. Peter Anvin --- arch/x86/mach-voyager/voyager_smp.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 7ffcdeec463..b9cc84a2a4f 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -65,7 +65,7 @@ static volatile unsigned long smp_invalidate_needed; /* Bitmask of CPUs present in the system - exported by i386_syms.c, used * by scheduler but indexed physically */ -cpumask_t phys_cpu_present_map = CPU_MASK_NONE; +static cpumask_t voyager_phys_cpu_present_map = CPU_MASK_NONE; /* The internal functions */ static void send_CPI(__u32 cpuset, __u8 cpi); @@ -366,19 +366,19 @@ void __init find_smp_config(void) /* set up everything for just this CPU, we can alter * this as we start the other CPUs later */ /* now get the CPU disposition from the extended CMOS */ - cpus_addr(phys_cpu_present_map)[0] = + cpus_addr(voyager_phys_cpu_present_map)[0] = voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK); - cpus_addr(phys_cpu_present_map)[0] |= + cpus_addr(voyager_phys_cpu_present_map)[0] |= voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 1) << 8; - cpus_addr(phys_cpu_present_map)[0] |= + cpus_addr(voyager_phys_cpu_present_map)[0] |= voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 2) << 16; - cpus_addr(phys_cpu_present_map)[0] |= + cpus_addr(voyager_phys_cpu_present_map)[0] |= voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 3) << 24; - init_cpu_possible(&phys_cpu_present_map); - printk("VOYAGER SMP: phys_cpu_present_map = 0x%lx\n", - cpus_addr(phys_cpu_present_map)[0]); + init_cpu_possible(&voyager_phys_cpu_present_map); + printk("VOYAGER SMP: voyager_phys_cpu_present_map = 0x%lx\n", + cpus_addr(voyager_phys_cpu_present_map)[0]); /* Here we set up the VIC to enable SMP */ /* enable the CPIs by writing the base vector to their register */ outb(VIC_DEFAULT_CPI_BASE, VIC_CPI_BASE_REGISTER); @@ -628,15 +628,15 @@ void __init smp_boot_cpus(void) /* now that the cat has probed the Voyager System Bus, sanity * check the cpu map */ if (((voyager_quad_processors | voyager_extended_vic_processors) - & cpus_addr(phys_cpu_present_map)[0]) != - cpus_addr(phys_cpu_present_map)[0]) { + & cpus_addr(voyager_phys_cpu_present_map)[0]) != + cpus_addr(voyager_phys_cpu_present_map)[0]) { /* should panic */ printk("\n\n***WARNING*** " "Sanity check of CPU present map FAILED\n"); } } else if (voyager_level == 4) voyager_extended_vic_processors = - cpus_addr(phys_cpu_present_map)[0]; + cpus_addr(voyager_phys_cpu_present_map)[0]; /* this sets up the idle task to run on the current cpu */ voyager_extended_cpus = 1; @@ -670,7 +670,7 @@ void __init smp_boot_cpus(void) /* loop over all the extended VIC CPUs and boot them. The * Quad CPUs must be bootstrapped by their extended VIC cpu */ for (i = 0; i < nr_cpu_ids; i++) { - if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map)) + if (i == boot_cpu_id || !cpu_isset(i, voyager_phys_cpu_present_map)) continue; do_boot_cpu(i); /* This udelay seems to be needed for the Quad boots -- cgit v1.2.3-70-g09d2 From fff78ad5cee1d6f695103ec590cbd2a9f3c39e8c Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 17 Jan 2009 22:28:42 -0500 Subject: [CPUFREQ] Stupidly trivial CodingStyle fix GNU indent complains about this being ambiguous, because it's dumb. One of my automated tests relies on the output of indent, so this shuts it up. Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k7.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index 1b446d79a8f..9cd73d15743 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -94,7 +94,7 @@ static struct cpufreq_frequency_table *powernow_table; static unsigned int can_scale_bus; static unsigned int can_scale_vid; -static unsigned int minimum_speed=-1; +static unsigned int minimum_speed = -1; static unsigned int maximum_speed; static unsigned int number_scales; static unsigned int fsb; -- cgit v1.2.3-70-g09d2 From b5c916666240032b29f73a1ca52c3e0fac37335c Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 17 Jan 2009 22:39:47 -0500 Subject: [CPUFREQ] checkpatch cleanups for cpufreq-nforce2 Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c | 38 +++++++++++++++------------ 1 file changed, 21 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c index 965ea52767a..ad8284f0e86 100644 --- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c +++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c @@ -32,7 +32,7 @@ * nforce2_chipset: * FSB is changed using the chipset */ -static struct pci_dev *nforce2_chipset_dev; +static struct pci_dev *nforce2_dev; /* fid: * multiplier * 10 @@ -56,7 +56,8 @@ MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)"); MODULE_PARM_DESC(min_fsb, "Minimum FSB to use, if not defined: current FSB - 50"); -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg) +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ + "cpufreq-nforce2", msg) /** * nforce2_calc_fsb - calculate FSB @@ -118,11 +119,11 @@ static void nforce2_write_pll(int pll) int temp; /* Set the pll addr. to 0x00 */ - pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLADR, 0); + pci_write_config_dword(nforce2_dev, NFORCE2_PLLADR, 0); /* Now write the value in all 64 registers */ for (temp = 0; temp <= 0x3f; temp++) - pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, pll); + pci_write_config_dword(nforce2_dev, NFORCE2_PLLREG, pll); return; } @@ -139,8 +140,8 @@ static unsigned int nforce2_fsb_read(int bootfsb) u32 fsb, temp = 0; /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */ - nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, - 0x01EF, PCI_ANY_ID, PCI_ANY_ID, NULL); + nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, 0x01EF, + PCI_ANY_ID, PCI_ANY_ID, NULL); if (!nforce2_sub5) return 0; @@ -148,13 +149,13 @@ static unsigned int nforce2_fsb_read(int bootfsb) fsb /= 1000000; /* Check if PLL register is already set */ - pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp); + pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp); if (bootfsb || !temp) return fsb; /* Use PLL register FSB value */ - pci_read_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, &temp); + pci_read_config_dword(nforce2_dev, NFORCE2_PLLREG, &temp); fsb = nforce2_calc_fsb(temp); return fsb; @@ -185,7 +186,7 @@ static int nforce2_set_fsb(unsigned int fsb) } /* First write? Then set actual value */ - pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp); + pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp); if (!temp) { pll = nforce2_calc_pll(tfsb); @@ -197,7 +198,7 @@ static int nforce2_set_fsb(unsigned int fsb) /* Enable write access */ temp = 0x01; - pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8)temp); + pci_write_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8)temp); diff = tfsb - fsb; @@ -222,7 +223,7 @@ static int nforce2_set_fsb(unsigned int fsb) } temp = 0x40; - pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLADR, (u8)temp); + pci_write_config_byte(nforce2_dev, NFORCE2_PLLADR, (u8)temp); return 0; } @@ -244,7 +245,8 @@ static unsigned int nforce2_get(unsigned int cpu) * nforce2_target - set a new CPUFreq policy * @policy: new policy * @target_freq: the target frequency - * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) + * @relation: how that frequency relates to achieved frequency + * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) * * Sets a new CPUFreq policy. */ @@ -328,7 +330,8 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy) if (!fid) { if (!cpu_khz) { printk(KERN_WARNING - "cpufreq: cpu_khz not set, can't calculate multiplier!\n"); + "cpufreq: cpu_khz not set, " + "can't calculate multiplier!\n"); return -ENODEV; } @@ -392,17 +395,18 @@ static struct cpufreq_driver nforce2_driver = { */ static unsigned int nforce2_detect_chipset(void) { - nforce2_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, + nforce2_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE2, PCI_ANY_ID, PCI_ANY_ID, NULL); - if (nforce2_chipset_dev == NULL) + if (nforce2_dev == NULL) return -ENODEV; printk(KERN_INFO "cpufreq: Detected nForce2 chipset revision %X\n", - nforce2_chipset_dev->revision); + nforce2_dev->revision); printk(KERN_INFO - "cpufreq: FSB changing is maybe unstable and can lead to crashes and data loss.\n"); + "cpufreq: FSB changing is maybe unstable and can lead to " + "crashes and data loss.\n"); return 0; } -- cgit v1.2.3-70-g09d2 From 20174b65d9fdc8dddef3d2ab9647e01fdab13064 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 17 Jan 2009 22:42:19 -0500 Subject: [CPUFREQ] nforce2: Use driver prefix, not cpufreq prefix. Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c index ad8284f0e86..99262906838 100644 --- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c +++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c @@ -56,6 +56,7 @@ MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)"); MODULE_PARM_DESC(min_fsb, "Minimum FSB to use, if not defined: current FSB - 50"); +#define PFX "cpufreq-nforce2: " #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ "cpufreq-nforce2", msg) @@ -175,13 +176,13 @@ static int nforce2_set_fsb(unsigned int fsb) int pll = 0; if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) { - printk(KERN_ERR "cpufreq: FSB %d is out of range!\n", fsb); + printk(KERN_ERR PFX "FSB %d is out of range!\n", fsb); return -EINVAL; } tfsb = nforce2_fsb_read(0); if (!tfsb) { - printk(KERN_ERR "cpufreq: Error while reading the FSB\n"); + printk(KERN_ERR PFX "Error while reading the FSB\n"); return -EINVAL; } @@ -278,7 +279,7 @@ static int nforce2_target(struct cpufreq_policy *policy, /* local_irq_save(flags); */ if (nforce2_set_fsb(target_fsb) < 0) - printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n", + printk(KERN_ERR PFX "Changing FSB to %d failed\n", target_fsb); else dprintk("Changed FSB successfully to %d\n", @@ -329,9 +330,8 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy) /* FIX: Get FID from CPU */ if (!fid) { if (!cpu_khz) { - printk(KERN_WARNING - "cpufreq: cpu_khz not set, " - "can't calculate multiplier!\n"); + printk(KERN_WARNING PFX + "cpu_khz not set, can't calculate multiplier!\n"); return -ENODEV; } @@ -346,7 +346,7 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy) } } - printk(KERN_INFO "cpufreq: FSB currently at %i MHz, FID %d.%d\n", fsb, + printk(KERN_INFO PFX "FSB currently at %i MHz, FID %d.%d\n", fsb, fid / 10, fid % 10); /* Set maximum FSB to FSB at boot time */ @@ -402,10 +402,10 @@ static unsigned int nforce2_detect_chipset(void) if (nforce2_dev == NULL) return -ENODEV; - printk(KERN_INFO "cpufreq: Detected nForce2 chipset revision %X\n", + printk(KERN_INFO PFX "Detected nForce2 chipset revision %X\n", nforce2_dev->revision); - printk(KERN_INFO - "cpufreq: FSB changing is maybe unstable and can lead to " + printk(KERN_INFO PFX + "FSB changing is maybe unstable and can lead to " "crashes and data loss.\n"); return 0; @@ -424,7 +424,7 @@ static int __init nforce2_init(void) /* detect chipset */ if (nforce2_detect_chipset()) { - printk(KERN_ERR "cpufreq: No nForce2 chipset.\n"); + printk(KERN_ERR PFX "No nForce2 chipset.\n"); return -ENODEV; } -- cgit v1.2.3-70-g09d2 From 04cd1a99dc22bcaa1eccbe8f8386df8c1295e979 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 17 Jan 2009 22:50:33 -0500 Subject: [CPUFREQ] checkpatch cleanups for elanfreq The remaining warning about the simple_strtoul conversion to strict_strtoul seems kind of pointless to me. Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/elanfreq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c index fe613c93b36..006b278b0d5 100644 --- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c +++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c @@ -184,7 +184,8 @@ static int elanfreq_target(struct cpufreq_policy *policy, { unsigned int newstate = 0; - if (cpufreq_frequency_table_target(policy, &elanfreq_table[0], target_freq, relation, &newstate)) + if (cpufreq_frequency_table_target(policy, &elanfreq_table[0], + target_freq, relation, &newstate)) return -EINVAL; elanfreq_set_cpu_state(newstate); @@ -301,7 +302,8 @@ static void __exit elanfreq_exit(void) module_param(max_freq, int, 0444); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Robert Schwebel , Sven Geggus "); +MODULE_AUTHOR("Robert Schwebel , " + "Sven Geggus "); MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs"); module_init(elanfreq_init); -- cgit v1.2.3-70-g09d2 From c9b8c871525aeda153494996d84a832270205d81 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 17 Jan 2009 22:54:47 -0500 Subject: [CPUFREQ] checkpatch cleanups for e_powersaver Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/e_powersaver.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c index c2f930d8664..3f83ea12c47 100644 --- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c +++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c @@ -12,12 +12,12 @@ #include #include #include +#include +#include +#include #include #include -#include -#include -#include #define EPS_BRAND_C7M 0 #define EPS_BRAND_C7 1 @@ -184,7 +184,7 @@ static int eps_cpu_init(struct cpufreq_policy *policy) break; } - switch(brand) { + switch (brand) { case EPS_BRAND_C7M: printk(KERN_CONT "C7-M\n"); break; @@ -218,17 +218,20 @@ static int eps_cpu_init(struct cpufreq_policy *policy) /* Print voltage and multiplier */ rdmsr(MSR_IA32_PERF_STATUS, lo, hi); current_voltage = lo & 0xff; - printk(KERN_INFO "eps: Current voltage = %dmV\n", current_voltage * 16 + 700); + printk(KERN_INFO "eps: Current voltage = %dmV\n", + current_voltage * 16 + 700); current_multiplier = (lo >> 8) & 0xff; printk(KERN_INFO "eps: Current multiplier = %d\n", current_multiplier); /* Print limits */ max_voltage = hi & 0xff; - printk(KERN_INFO "eps: Highest voltage = %dmV\n", max_voltage * 16 + 700); + printk(KERN_INFO "eps: Highest voltage = %dmV\n", + max_voltage * 16 + 700); max_multiplier = (hi >> 8) & 0xff; printk(KERN_INFO "eps: Highest multiplier = %d\n", max_multiplier); min_voltage = (hi >> 16) & 0xff; - printk(KERN_INFO "eps: Lowest voltage = %dmV\n", min_voltage * 16 + 700); + printk(KERN_INFO "eps: Lowest voltage = %dmV\n", + min_voltage * 16 + 700); min_multiplier = (hi >> 24) & 0xff; printk(KERN_INFO "eps: Lowest multiplier = %d\n", min_multiplier); @@ -318,7 +321,7 @@ static int eps_cpu_exit(struct cpufreq_policy *policy) return 0; } -static struct freq_attr* eps_attr[] = { +static struct freq_attr *eps_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, NULL, }; @@ -356,7 +359,7 @@ static void __exit eps_exit(void) cpufreq_unregister_driver(&eps_driver); } -MODULE_AUTHOR("Rafa³ Bilski "); +MODULE_AUTHOR("Rafal Bilski "); MODULE_DESCRIPTION("Enhanced PowerSaver driver for VIA C7 CPU's."); MODULE_LICENSE("GPL"); -- cgit v1.2.3-70-g09d2 From 00f6a235bf241e62dfadf32b4322309e65c7b177 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 17 Jan 2009 23:06:57 -0500 Subject: [CPUFREQ] checkpatch cleanups for gx-suspmod Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/gx-suspmod.c | 105 +++++++++++++++++++------------ 1 file changed, 65 insertions(+), 40 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c index 9d9eae82e60..ac27ec2264d 100644 --- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c +++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c @@ -79,8 +79,9 @@ #include #include #include +#include + #include -#include /* PCI config registers, all at F0 */ #define PCI_PMER1 0x80 /* power management enable register 1 */ @@ -122,8 +123,8 @@ static struct gxfreq_params *gx_params; static int stock_freq; /* PCI bus clock - defaults to 30.000 if cpu_khz is not available */ -static int pci_busclk = 0; -module_param (pci_busclk, int, 0444); +static int pci_busclk; +module_param(pci_busclk, int, 0444); /* maximum duration for which the cpu may be suspended * (32us * MAX_DURATION). If no parameter is given, this defaults @@ -132,7 +133,7 @@ module_param (pci_busclk, int, 0444); * is suspended -- processing power is just 0.39% of what it used to be, * though. 781.25 kHz(!) for a 200 MHz processor -- wow. */ static int max_duration = 255; -module_param (max_duration, int, 0444); +module_param(max_duration, int, 0444); /* For the default policy, we want at least some processing power * - let's say 5%. (min = maxfreq / POLICY_MIN_DIV) @@ -140,7 +141,8 @@ module_param (max_duration, int, 0444); #define POLICY_MIN_DIV 20 -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "gx-suspmod", msg) +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ + "gx-suspmod", msg) /** * we can detect a core multipiler from dir0_lsb @@ -166,12 +168,20 @@ static int gx_freq_mult[16] = { * Low Level chipset interface * ****************************************************************/ static struct pci_device_id gx_chipset_tbl[] __initdata = { - { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, PCI_ANY_ID, PCI_ANY_ID }, - { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, PCI_ANY_ID, PCI_ANY_ID }, - { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, PCI_ANY_ID, PCI_ANY_ID }, + { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, + PCI_ANY_ID, PCI_ANY_ID }, + { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, + PCI_ANY_ID, PCI_ANY_ID }, + { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, + PCI_ANY_ID, PCI_ANY_ID }, { 0, }, }; +static void gx_write_byte(int reg, int value) +{ + pci_write_config_byte(gx_params->cs55x0, reg, value); +} + /** * gx_detect_chipset: * @@ -200,7 +210,8 @@ static __init struct pci_dev *gx_detect_chipset(void) /** * gx_get_cpuspeed: * - * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi Geode CPU runs. + * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi + * Geode CPU runs. */ static unsigned int gx_get_cpuspeed(unsigned int cpu) { @@ -217,17 +228,18 @@ static unsigned int gx_get_cpuspeed(unsigned int cpu) * **/ -static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, u8 *off_duration) +static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, + u8 *off_duration) { unsigned int i; u8 tmp_on, tmp_off; int old_tmp_freq = stock_freq; int tmp_freq; - *off_duration=1; - *on_duration=0; + *off_duration = 1; + *on_duration = 0; - for (i=max_duration; i>0; i--) { + for (i = max_duration; i > 0; i--) { tmp_off = ((khz * i) / stock_freq) & 0xff; tmp_on = i - tmp_off; tmp_freq = (stock_freq * tmp_off) / i; @@ -259,26 +271,34 @@ static void gx_set_cpuspeed(unsigned int khz) freqs.cpu = 0; freqs.old = gx_get_cpuspeed(0); - new_khz = gx_validate_speed(khz, &gx_params->on_duration, &gx_params->off_duration); + new_khz = gx_validate_speed(khz, &gx_params->on_duration, + &gx_params->off_duration); freqs.new = new_khz; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); local_irq_save(flags); - if (new_khz != stock_freq) { /* if new khz == 100% of CPU speed, it is special case */ + + + if (new_khz != stock_freq) { + /* if new khz == 100% of CPU speed, it is special case */ switch (gx_params->cs55x0->device) { case PCI_DEVICE_ID_CYRIX_5530_LEGACY: pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP; /* FIXME: need to test other values -- Zwane,Miura */ - pci_write_config_byte(gx_params->cs55x0, PCI_IRQTC, 4); /* typical 2 to 4ms */ - pci_write_config_byte(gx_params->cs55x0, PCI_VIDTC, 100);/* typical 50 to 100ms */ - pci_write_config_byte(gx_params->cs55x0, PCI_PMER1, pmer1); - - if (gx_params->cs55x0->revision < 0x10) { /* CS5530(rev 1.2, 1.3) */ - suscfg = gx_params->pci_suscfg | SUSMOD; - } else { /* CS5530A,B.. */ - suscfg = gx_params->pci_suscfg | SUSMOD | PWRSVE; + /* typical 2 to 4ms */ + gx_write_byte(PCI_IRQTC, 4); + /* typical 50 to 100ms */ + gx_write_byte(PCI_VIDTC, 100); + gx_write_byte(PCI_PMER1, pmer1); + + if (gx_params->cs55x0->revision < 0x10) { + /* CS5530(rev 1.2, 1.3) */ + suscfg = gx_params->pci_suscfg|SUSMOD; + } else { + /* CS5530A,B.. */ + suscfg = gx_params->pci_suscfg|SUSMOD|PWRSVE; } break; case PCI_DEVICE_ID_CYRIX_5520: @@ -294,13 +314,13 @@ static void gx_set_cpuspeed(unsigned int khz) suscfg = gx_params->pci_suscfg & ~(SUSMOD); gx_params->off_duration = 0; gx_params->on_duration = 0; - dprintk("suspend modulation disabled: cpu runs 100 percent speed.\n"); + dprintk("suspend modulation disabled: cpu runs 100%% speed.\n"); } - pci_write_config_byte(gx_params->cs55x0, PCI_MODOFF, gx_params->off_duration); - pci_write_config_byte(gx_params->cs55x0, PCI_MODON, gx_params->on_duration); + gx_write_byte(PCI_MODOFF, gx_params->off_duration); + gx_write_byte(PCI_MODON, gx_params->on_duration); - pci_write_config_byte(gx_params->cs55x0, PCI_SUSCFG, suscfg); + gx_write_byte(PCI_SUSCFG, suscfg); pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg); local_irq_restore(flags); @@ -334,7 +354,8 @@ static int cpufreq_gx_verify(struct cpufreq_policy *policy) return -EINVAL; policy->cpu = 0; - cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq); + cpufreq_verify_within_limits(policy, (stock_freq / max_duration), + stock_freq); /* it needs to be assured that at least one supported frequency is * within policy->min and policy->max. If it is not, policy->max @@ -354,7 +375,8 @@ static int cpufreq_gx_verify(struct cpufreq_policy *policy) policy->max = tmp_freq; if (policy->max < policy->min) policy->max = policy->min; - cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq); + cpufreq_verify_within_limits(policy, (stock_freq / max_duration), + stock_freq); return 0; } @@ -398,18 +420,18 @@ static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy) return -ENODEV; /* determine maximum frequency */ - if (pci_busclk) { + if (pci_busclk) maxfreq = pci_busclk * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f]; - } else if (cpu_khz) { + else if (cpu_khz) maxfreq = cpu_khz; - } else { + else maxfreq = 30000 * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f]; - } + stock_freq = maxfreq; curfreq = gx_get_cpuspeed(0); dprintk("cpu max frequency is %d.\n", maxfreq); - dprintk("cpu current frequency is %dkHz.\n",curfreq); + dprintk("cpu current frequency is %dkHz.\n", curfreq); /* setup basic struct for cpufreq API */ policy->cpu = 0; @@ -447,7 +469,8 @@ static int __init cpufreq_gx_init(void) struct pci_dev *gx_pci; /* Test if we have the right hardware */ - if ((gx_pci = gx_detect_chipset()) == NULL) + gx_pci = gx_detect_chipset(); + if (gx_pci == NULL) return -ENODEV; /* check whether module parameters are sane */ @@ -468,9 +491,11 @@ static int __init cpufreq_gx_init(void) pci_read_config_byte(params->cs55x0, PCI_PMER1, &(params->pci_pmer1)); pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2)); pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration)); - pci_read_config_byte(params->cs55x0, PCI_MODOFF, &(params->off_duration)); + pci_read_config_byte(params->cs55x0, PCI_MODOFF, + &(params->off_duration)); - if ((ret = cpufreq_register_driver(&gx_suspmod_driver))) { + ret = cpufreq_register_driver(&gx_suspmod_driver); + if (ret) { kfree(params); return ret; /* register error! */ } @@ -485,9 +510,9 @@ static void __exit cpufreq_gx_exit(void) kfree(gx_params); } -MODULE_AUTHOR ("Hiroshi Miura "); -MODULE_DESCRIPTION ("Cpufreq driver for Cyrix MediaGX and NatSemi Geode"); -MODULE_LICENSE ("GPL"); +MODULE_AUTHOR("Hiroshi Miura "); +MODULE_DESCRIPTION("Cpufreq driver for Cyrix MediaGX and NatSemi Geode"); +MODULE_LICENSE("GPL"); module_init(cpufreq_gx_init); module_exit(cpufreq_gx_exit); -- cgit v1.2.3-70-g09d2 From ac617bd0f7b959dc6708ad2f0d6b9dcf4382f1ed Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 17 Jan 2009 23:29:53 -0500 Subject: [CPUFREQ] checkpatch cleanups for longhaul Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/longhaul.c | 182 +++++++++++++++++---------------- arch/x86/kernel/cpu/cpufreq/longhaul.h | 12 +-- 2 files changed, 100 insertions(+), 94 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index a4cff5d6e38..dc03622a83a 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -30,12 +30,12 @@ #include #include #include +#include +#include +#include +#include #include -#include -#include -#include -#include #include #include "longhaul.h" @@ -58,7 +58,7 @@ #define USE_NORTHBRIDGE (1 << 2) static int cpu_model; -static unsigned int numscales=16; +static unsigned int numscales = 16; static unsigned int fsb; static const struct mV_pos *vrm_mV_table; @@ -67,8 +67,8 @@ static const unsigned char *mV_vrm_table; static unsigned int highest_speed, lowest_speed; /* kHz */ static unsigned int minmult, maxmult; static int can_scale_voltage; -static struct acpi_processor *pr = NULL; -static struct acpi_processor_cx *cx = NULL; +static struct acpi_processor *pr; +static struct acpi_processor_cx *cx; static u32 acpi_regs_addr; static u8 longhaul_flags; static unsigned int longhaul_index; @@ -78,12 +78,13 @@ static int scale_voltage; static int disable_acpi_c3; static int revid_errata; -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ + "longhaul", msg) /* Clock ratios multiplied by 10 */ -static int clock_ratio[32]; -static int eblcr_table[32]; +static int mults[32]; +static int eblcr[32]; static int longhaul_version; static struct cpufreq_frequency_table *longhaul_table; @@ -93,7 +94,7 @@ static char speedbuffer[8]; static char *print_speed(int speed) { if (speed < 1000) { - snprintf(speedbuffer, sizeof(speedbuffer),"%dMHz", speed); + snprintf(speedbuffer, sizeof(speedbuffer), "%dMHz", speed); return speedbuffer; } @@ -122,27 +123,28 @@ static unsigned int calc_speed(int mult) static int longhaul_get_cpu_mult(void) { - unsigned long invalue=0,lo, hi; + unsigned long invalue = 0, lo, hi; - rdmsr (MSR_IA32_EBL_CR_POWERON, lo, hi); - invalue = (lo & (1<<22|1<<23|1<<24|1<<25)) >>22; - if (longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) { + rdmsr(MSR_IA32_EBL_CR_POWERON, lo, hi); + invalue = (lo & (1<<22|1<<23|1<<24|1<<25))>>22; + if (longhaul_version == TYPE_LONGHAUL_V2 || + longhaul_version == TYPE_POWERSAVER) { if (lo & (1<<27)) - invalue+=16; + invalue += 16; } - return eblcr_table[invalue]; + return eblcr[invalue]; } /* For processor with BCR2 MSR */ -static void do_longhaul1(unsigned int clock_ratio_index) +static void do_longhaul1(unsigned int mults_index) { union msr_bcr2 bcr2; rdmsrl(MSR_VIA_BCR2, bcr2.val); /* Enable software clock multiplier */ bcr2.bits.ESOFTBF = 1; - bcr2.bits.CLOCKMUL = clock_ratio_index & 0xff; + bcr2.bits.CLOCKMUL = mults_index & 0xff; /* Sync to timer tick */ safe_halt(); @@ -161,7 +163,7 @@ static void do_longhaul1(unsigned int clock_ratio_index) /* For processor with Longhaul MSR */ -static void do_powersaver(int cx_address, unsigned int clock_ratio_index, +static void do_powersaver(int cx_address, unsigned int mults_index, unsigned int dir) { union msr_longhaul longhaul; @@ -173,11 +175,11 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index, longhaul.bits.RevisionKey = longhaul.bits.RevisionID; else longhaul.bits.RevisionKey = 0; - longhaul.bits.SoftBusRatio = clock_ratio_index & 0xf; - longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; + longhaul.bits.SoftBusRatio = mults_index & 0xf; + longhaul.bits.SoftBusRatio4 = (mults_index & 0x10) >> 4; /* Setup new voltage */ if (can_scale_voltage) - longhaul.bits.SoftVID = (clock_ratio_index >> 8) & 0x1f; + longhaul.bits.SoftVID = (mults_index >> 8) & 0x1f; /* Sync to timer tick */ safe_halt(); /* Raise voltage if necessary */ @@ -240,14 +242,14 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index, /** * longhaul_set_cpu_frequency() - * @clock_ratio_index : bitpattern of the new multiplier. + * @mults_index : bitpattern of the new multiplier. * * Sets a new clock ratio. */ static void longhaul_setstate(unsigned int table_index) { - unsigned int clock_ratio_index; + unsigned int mults_index; int speed, mult; struct cpufreq_freqs freqs; unsigned long flags; @@ -256,9 +258,9 @@ static void longhaul_setstate(unsigned int table_index) u32 bm_timeout = 1000; unsigned int dir = 0; - clock_ratio_index = longhaul_table[table_index].index; + mults_index = longhaul_table[table_index].index; /* Safety precautions */ - mult = clock_ratio[clock_ratio_index & 0x1f]; + mult = mults[mults_index & 0x1f]; if (mult == -1) return; speed = calc_speed(mult); @@ -274,7 +276,7 @@ static void longhaul_setstate(unsigned int table_index) cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - dprintk ("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n", + dprintk("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n", fsb, mult/10, mult%10, print_speed(speed/1000)); retry_loop: preempt_disable(); @@ -282,8 +284,8 @@ retry_loop: pic2_mask = inb(0xA1); pic1_mask = inb(0x21); /* works on C3. save mask. */ - outb(0xFF,0xA1); /* Overkill */ - outb(0xFE,0x21); /* TMR0 only */ + outb(0xFF, 0xA1); /* Overkill */ + outb(0xFE, 0x21); /* TMR0 only */ /* Wait while PCI bus is busy. */ if (acpi_regs_addr && (longhaul_flags & USE_NORTHBRIDGE @@ -312,7 +314,7 @@ retry_loop: * Software controlled multipliers only. */ case TYPE_LONGHAUL_V1: - do_longhaul1(clock_ratio_index); + do_longhaul1(mults_index); break; /* @@ -327,9 +329,9 @@ retry_loop: if (longhaul_flags & USE_ACPI_C3) { /* Don't allow wakeup */ acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); - do_powersaver(cx->address, clock_ratio_index, dir); + do_powersaver(cx->address, mults_index, dir); } else { - do_powersaver(0, clock_ratio_index, dir); + do_powersaver(0, mults_index, dir); } break; } @@ -341,8 +343,8 @@ retry_loop: /* Enable bus master arbitration */ acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); } - outb(pic2_mask,0xA1); /* restore mask */ - outb(pic1_mask,0x21); + outb(pic2_mask, 0xA1); /* restore mask */ + outb(pic1_mask, 0x21); local_irq_restore(flags); preempt_enable(); @@ -392,7 +394,8 @@ retry_loop: cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); if (!bm_timeout) - printk(KERN_INFO PFX "Warning: Timeout while waiting for idle PCI bus.\n"); + printk(KERN_INFO PFX "Warning: Timeout while waiting for " + "idle PCI bus.\n"); } /* @@ -458,31 +461,32 @@ static int __init longhaul_get_ranges(void) break; } - dprintk ("MinMult:%d.%dx MaxMult:%d.%dx\n", + dprintk("MinMult:%d.%dx MaxMult:%d.%dx\n", minmult/10, minmult%10, maxmult/10, maxmult%10); highest_speed = calc_speed(maxmult); lowest_speed = calc_speed(minmult); - dprintk ("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb, + dprintk("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb, print_speed(lowest_speed/1000), print_speed(highest_speed/1000)); if (lowest_speed == highest_speed) { - printk (KERN_INFO PFX "highestspeed == lowest, aborting.\n"); + printk(KERN_INFO PFX "highestspeed == lowest, aborting.\n"); return -EINVAL; } if (lowest_speed > highest_speed) { - printk (KERN_INFO PFX "nonsense! lowest (%d > %d) !\n", + printk(KERN_INFO PFX "nonsense! lowest (%d > %d) !\n", lowest_speed, highest_speed); return -EINVAL; } - longhaul_table = kmalloc((numscales + 1) * sizeof(struct cpufreq_frequency_table), GFP_KERNEL); - if(!longhaul_table) + longhaul_table = kmalloc((numscales + 1) * sizeof(*longhaul_table), + GFP_KERNEL); + if (!longhaul_table) return -ENOMEM; for (j = 0; j < numscales; j++) { - ratio = clock_ratio[j]; + ratio = mults[j]; if (ratio == -1) continue; if (ratio > maxmult || ratio < minmult) @@ -521,7 +525,7 @@ static int __init longhaul_get_ranges(void) /* Find index we are running on */ for (j = 0; j < k; j++) { - if (clock_ratio[longhaul_table[j].index & 0x1f] == mult) { + if (mults[longhaul_table[j].index & 0x1f] == mult) { longhaul_index = j; break; } @@ -559,20 +563,22 @@ static void __init longhaul_setup_voltagescaling(void) maxvid = vrm_mV_table[longhaul.bits.MaximumVID]; if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) { - printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. " + printk(KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. " "Voltage scaling disabled.\n", - minvid.mV/1000, minvid.mV%1000, maxvid.mV/1000, maxvid.mV%1000); + minvid.mV/1000, minvid.mV%1000, + maxvid.mV/1000, maxvid.mV%1000); return; } if (minvid.mV == maxvid.mV) { - printk (KERN_INFO PFX "Claims to support voltage scaling but min & max are " - "both %d.%03d. Voltage scaling disabled\n", + printk(KERN_INFO PFX "Claims to support voltage scaling but " + "min & max are both %d.%03d. " + "Voltage scaling disabled\n", maxvid.mV/1000, maxvid.mV%1000); return; } - /* How many voltage steps */ + /* How many voltage steps*/ numvscales = maxvid.pos - minvid.pos + 1; printk(KERN_INFO PFX "Max VID=%d.%03d " @@ -586,7 +592,7 @@ static void __init longhaul_setup_voltagescaling(void) j = longhaul.bits.MinMHzBR; if (longhaul.bits.MinMHzBR4) j += 16; - min_vid_speed = eblcr_table[j]; + min_vid_speed = eblcr[j]; if (min_vid_speed == -1) return; switch (longhaul.bits.MinMHzFSB) { @@ -617,7 +623,8 @@ static void __init longhaul_setup_voltagescaling(void) pos = minvid.pos; longhaul_table[j].index |= mV_vrm_table[pos] << 8; vid = vrm_mV_table[mV_vrm_table[pos]]; - printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n", speed, j, vid.mV); + printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n", + speed, j, vid.mV); j++; } @@ -640,7 +647,8 @@ static int longhaul_target(struct cpufreq_policy *policy, unsigned int dir = 0; u8 vid, current_vid; - if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq, relation, &table_index)) + if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq, + relation, &table_index)) return -EINVAL; /* Don't set same frequency again */ @@ -656,7 +664,8 @@ static int longhaul_target(struct cpufreq_policy *policy, * this in hardware, C3 is old and we need to do this * in software. */ i = longhaul_index; - current_vid = (longhaul_table[longhaul_index].index >> 8) & 0x1f; + current_vid = (longhaul_table[longhaul_index].index >> 8); + current_vid &= 0x1f; if (table_index > longhaul_index) dir = 1; while (i != table_index) { @@ -691,9 +700,9 @@ static acpi_status longhaul_walk_callback(acpi_handle obj_handle, { struct acpi_device *d; - if ( acpi_bus_get_device(obj_handle, &d) ) { + if (acpi_bus_get_device(obj_handle, &d)) return 0; - } + *return_value = acpi_driver_data(d); return 1; } @@ -750,7 +759,7 @@ static int longhaul_setup_southbridge(void) /* Find VT8235 southbridge */ dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL); if (dev == NULL) - /* Find VT8237 southbridge */ + /* Find VT8237 southbridge */ dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, NULL); if (dev != NULL) { @@ -769,7 +778,8 @@ static int longhaul_setup_southbridge(void) if (pci_cmd & 1 << 7) { pci_read_config_dword(dev, 0x88, &acpi_regs_addr); acpi_regs_addr &= 0xff00; - printk(KERN_INFO PFX "ACPI I/O at 0x%x\n", acpi_regs_addr); + printk(KERN_INFO PFX "ACPI I/O at 0x%x\n", + acpi_regs_addr); } pci_dev_put(dev); @@ -781,7 +791,7 @@ static int longhaul_setup_southbridge(void) static int __init longhaul_cpu_init(struct cpufreq_policy *policy) { struct cpuinfo_x86 *c = &cpu_data(0); - char *cpuname=NULL; + char *cpuname = NULL; int ret; u32 lo, hi; @@ -791,8 +801,8 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) cpu_model = CPU_SAMUEL; cpuname = "C3 'Samuel' [C5A]"; longhaul_version = TYPE_LONGHAUL_V1; - memcpy (clock_ratio, samuel1_clock_ratio, sizeof(samuel1_clock_ratio)); - memcpy (eblcr_table, samuel1_eblcr, sizeof(samuel1_eblcr)); + memcpy(mults, samuel1_mults, sizeof(samuel1_mults)); + memcpy(eblcr, samuel1_eblcr, sizeof(samuel1_eblcr)); break; case 7: @@ -803,10 +813,8 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) cpuname = "C3 'Samuel 2' [C5B]"; /* Note, this is not a typo, early Samuel2's had * Samuel1 ratios. */ - memcpy(clock_ratio, samuel1_clock_ratio, - sizeof(samuel1_clock_ratio)); - memcpy(eblcr_table, samuel2_eblcr, - sizeof(samuel2_eblcr)); + memcpy(mults, samuel1_mults, sizeof(samuel1_mults)); + memcpy(eblcr, samuel2_eblcr, sizeof(samuel2_eblcr)); break; case 1 ... 15: longhaul_version = TYPE_LONGHAUL_V1; @@ -817,10 +825,8 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) cpu_model = CPU_EZRA; cpuname = "C3 'Ezra' [C5C]"; } - memcpy(clock_ratio, ezra_clock_ratio, - sizeof(ezra_clock_ratio)); - memcpy(eblcr_table, ezra_eblcr, - sizeof(ezra_eblcr)); + memcpy(mults, ezra_mults, sizeof(ezra_mults)); + memcpy(eblcr, ezra_eblcr, sizeof(ezra_eblcr)); break; } break; @@ -829,18 +835,16 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) cpu_model = CPU_EZRA_T; cpuname = "C3 'Ezra-T' [C5M]"; longhaul_version = TYPE_POWERSAVER; - numscales=32; - memcpy (clock_ratio, ezrat_clock_ratio, sizeof(ezrat_clock_ratio)); - memcpy (eblcr_table, ezrat_eblcr, sizeof(ezrat_eblcr)); + numscales = 32; + memcpy(mults, ezrat_mults, sizeof(ezrat_mults)); + memcpy(eblcr, ezrat_eblcr, sizeof(ezrat_eblcr)); break; case 9: longhaul_version = TYPE_POWERSAVER; numscales = 32; - memcpy(clock_ratio, - nehemiah_clock_ratio, - sizeof(nehemiah_clock_ratio)); - memcpy(eblcr_table, nehemiah_eblcr, sizeof(nehemiah_eblcr)); + memcpy(mults, nehemiah_mults, sizeof(nehemiah_mults)); + memcpy(eblcr, nehemiah_eblcr, sizeof(nehemiah_eblcr)); switch (c->x86_mask) { case 0 ... 1: cpu_model = CPU_NEHEMIAH; @@ -869,14 +873,14 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) longhaul_version = TYPE_LONGHAUL_V1; } - printk (KERN_INFO PFX "VIA %s CPU detected. ", cpuname); + printk(KERN_INFO PFX "VIA %s CPU detected. ", cpuname); switch (longhaul_version) { case TYPE_LONGHAUL_V1: case TYPE_LONGHAUL_V2: - printk ("Longhaul v%d supported.\n", longhaul_version); + printk(KERN_CONT "Longhaul v%d supported.\n", longhaul_version); break; case TYPE_POWERSAVER: - printk ("Powersaver supported.\n"); + printk(KERN_CONT "Powersaver supported.\n"); break; }; @@ -940,7 +944,7 @@ static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy) return 0; } -static struct freq_attr* longhaul_attr[] = { +static struct freq_attr *longhaul_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, NULL, }; @@ -966,13 +970,15 @@ static int __init longhaul_init(void) #ifdef CONFIG_SMP if (num_online_cpus() > 1) { - printk(KERN_ERR PFX "More than 1 CPU detected, longhaul disabled.\n"); + printk(KERN_ERR PFX "More than 1 CPU detected, " + "longhaul disabled.\n"); return -ENODEV; } #endif #ifdef CONFIG_X86_IO_APIC if (cpu_has_apic) { - printk(KERN_ERR PFX "APIC detected. Longhaul is currently broken in this configuration.\n"); + printk(KERN_ERR PFX "APIC detected. Longhaul is currently " + "broken in this configuration.\n"); return -ENODEV; } #endif @@ -993,8 +999,8 @@ static void __exit longhaul_exit(void) { int i; - for (i=0; i < numscales; i++) { - if (clock_ratio[i] == maxmult) { + for (i = 0; i < numscales; i++) { + if (mults[i] == maxmult) { longhaul_setstate(i); break; } @@ -1007,11 +1013,11 @@ static void __exit longhaul_exit(void) /* Even if BIOS is exporting ACPI C3 state, and it is used * with success when CPU is idle, this state doesn't * trigger frequency transition in some cases. */ -module_param (disable_acpi_c3, int, 0644); +module_param(disable_acpi_c3, int, 0644); MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support"); /* Change CPU voltage with frequency. Very usefull to save * power, but most VIA C3 processors aren't supporting it. */ -module_param (scale_voltage, int, 0644); +module_param(scale_voltage, int, 0644); MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); /* Force revision key to 0 for processors which doesn't * support voltage scaling, but are introducing itself as @@ -1019,9 +1025,9 @@ MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); module_param(revid_errata, int, 0644); MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID"); -MODULE_AUTHOR ("Dave Jones "); -MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors."); -MODULE_LICENSE ("GPL"); +MODULE_AUTHOR("Dave Jones "); +MODULE_DESCRIPTION("Longhaul driver for VIA Cyrix processors."); +MODULE_LICENSE("GPL"); late_initcall(longhaul_init); module_exit(longhaul_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.h b/arch/x86/kernel/cpu/cpufreq/longhaul.h index 4fcc320997d..e2360a469f7 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.h +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.h @@ -49,14 +49,14 @@ union msr_longhaul { /* * Clock ratio tables. Div/Mod by 10 to get ratio. - * The eblcr ones specify the ratio read from the CPU. - * The clock_ratio ones specify what to write to the CPU. + * The eblcr values specify the ratio read from the CPU. + * The mults values specify what to write to the CPU. */ /* * VIA C3 Samuel 1 & Samuel 2 (stepping 0) */ -static const int __initdata samuel1_clock_ratio[16] = { +static const int __initdata samuel1_mults[16] = { -1, /* 0000 -> RESERVED */ 30, /* 0001 -> 3.0x */ 40, /* 0010 -> 4.0x */ @@ -119,7 +119,7 @@ static const int __initdata samuel2_eblcr[16] = { /* * VIA C3 Ezra */ -static const int __initdata ezra_clock_ratio[16] = { +static const int __initdata ezra_mults[16] = { 100, /* 0000 -> 10.0x */ 30, /* 0001 -> 3.0x */ 40, /* 0010 -> 4.0x */ @@ -160,7 +160,7 @@ static const int __initdata ezra_eblcr[16] = { /* * VIA C3 (Ezra-T) [C5M]. */ -static const int __initdata ezrat_clock_ratio[32] = { +static const int __initdata ezrat_mults[32] = { 100, /* 0000 -> 10.0x */ 30, /* 0001 -> 3.0x */ 40, /* 0010 -> 4.0x */ @@ -235,7 +235,7 @@ static const int __initdata ezrat_eblcr[32] = { /* * VIA C3 Nehemiah */ -static const int __initdata nehemiah_clock_ratio[32] = { +static const int __initdata nehemiah_mults[32] = { 100, /* 0000 -> 10.0x */ -1, /* 0001 -> 16.0x */ 40, /* 0010 -> 4.0x */ -- cgit v1.2.3-70-g09d2 From 48ee923a666d4cc54e48d55fc573c57492501122 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 17 Jan 2009 23:32:50 -0500 Subject: [CPUFREQ] checkpatch cleanups for longrun Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/longrun.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c index 777a7ff075d..da5f70fcb76 100644 --- a/arch/x86/kernel/cpu/cpufreq/longrun.c +++ b/arch/x86/kernel/cpu/cpufreq/longrun.c @@ -11,12 +11,13 @@ #include #include #include +#include #include #include -#include -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longrun", msg) +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ + "longrun", msg) static struct cpufreq_driver longrun_driver; @@ -51,7 +52,7 @@ static void __init longrun_get_policy(struct cpufreq_policy *policy) msr_lo &= 0x0000007F; msr_hi &= 0x0000007F; - if ( longrun_high_freq <= longrun_low_freq ) { + if (longrun_high_freq <= longrun_low_freq) { /* Assume degenerate Longrun table */ policy->min = policy->max = longrun_high_freq; } else { @@ -79,7 +80,7 @@ static int longrun_set_policy(struct cpufreq_policy *policy) if (!policy) return -EINVAL; - if ( longrun_high_freq <= longrun_low_freq ) { + if (longrun_high_freq <= longrun_low_freq) { /* Assume degenerate Longrun table */ pctg_lo = pctg_hi = 100; } else { @@ -152,7 +153,7 @@ static unsigned int longrun_get(unsigned int cpu) cpuid(0x80860007, &eax, &ebx, &ecx, &edx); dprintk("cpuid eax is %u\n", eax); - return (eax * 1000); + return eax * 1000; } /** @@ -196,7 +197,8 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi); *high_freq = msr_lo * 1000; /* to kHz */ - dprintk("longrun table interface told %u - %u kHz\n", *low_freq, *high_freq); + dprintk("longrun table interface told %u - %u kHz\n", + *low_freq, *high_freq); if (*low_freq > *high_freq) *low_freq = *high_freq; @@ -219,7 +221,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, cpuid(0x80860007, &eax, &ebx, &ecx, &edx); /* try decreasing in 10% steps, some processors react only * on some barrier values */ - for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -=10) { + for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -= 10) { /* set to 0 to try_hi perf_pctg */ msr_lo &= 0xFFFFFF80; msr_hi &= 0xFFFFFF80; @@ -236,7 +238,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, /* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq) * eqals - * low_freq * ( 1 - perf_pctg) = (cur_freq - high_freq * perf_pctg) + * low_freq * (1 - perf_pctg) = (cur_freq - high_freq * perf_pctg) * * high_freq * perf_pctg is stored tempoarily into "ebx". */ @@ -317,9 +319,10 @@ static void __exit longrun_exit(void) } -MODULE_AUTHOR ("Dominik Brodowski "); -MODULE_DESCRIPTION ("LongRun driver for Transmeta Crusoe and Efficeon processors."); -MODULE_LICENSE ("GPL"); +MODULE_AUTHOR("Dominik Brodowski "); +MODULE_DESCRIPTION("LongRun driver for Transmeta Crusoe and " + "Efficeon processors."); +MODULE_LICENSE("GPL"); module_init(longrun_init); module_exit(longrun_exit); -- cgit v1.2.3-70-g09d2 From 14a6650f13b958aabc30ddd575b0902384b22457 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sun, 18 Jan 2009 00:00:04 -0500 Subject: [CPUFREQ] checkpatch cleanups for powernow-k6 Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k6.c | 44 ++++++++++++++++++------------- 1 file changed, 26 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c index c1ac5790c63..f10dea409f4 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c @@ -1,6 +1,7 @@ /* * This file was based upon code in Powertweak Linux (http://powertweak.sf.net) - * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä, Dominik Brodowski. + * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä, + * Dominik Brodowski. * * Licensed under the terms of the GNU GPL License version 2. * @@ -13,14 +14,15 @@ #include #include #include - -#include #include #include +#include + #define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long as it is unused */ +#define PFX "powernow-k6: " static unsigned int busfreq; /* FSB, in 10 kHz */ static unsigned int max_multiplier; @@ -47,8 +49,8 @@ static struct cpufreq_frequency_table clock_ratio[] = { */ static int powernow_k6_get_cpu_multiplier(void) { - u64 invalue = 0; - u32 msrval; + u64 invalue = 0; + u32 msrval; msrval = POWERNOW_IOPORT + 0x1; wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ @@ -68,12 +70,12 @@ static int powernow_k6_get_cpu_multiplier(void) */ static void powernow_k6_set_state(unsigned int best_i) { - unsigned long outvalue = 0, invalue = 0; - unsigned long msrval; - struct cpufreq_freqs freqs; + unsigned long outvalue = 0, invalue = 0; + unsigned long msrval; + struct cpufreq_freqs freqs; if (clock_ratio[best_i].index > max_multiplier) { - printk(KERN_ERR "cpufreq: invalid target frequency\n"); + printk(KERN_ERR PFX "invalid target frequency\n"); return; } @@ -119,7 +121,8 @@ static int powernow_k6_verify(struct cpufreq_policy *policy) * powernow_k6_setpolicy - sets a new CPUFreq policy * @policy: new policy * @target_freq: the target frequency - * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) + * @relation: how that frequency relates to achieved frequency + * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) * * sets a new CPUFreq policy */ @@ -127,9 +130,10 @@ static int powernow_k6_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { - unsigned int newstate = 0; + unsigned int newstate = 0; - if (cpufreq_frequency_table_target(policy, &clock_ratio[0], target_freq, relation, &newstate)) + if (cpufreq_frequency_table_target(policy, &clock_ratio[0], + target_freq, relation, &newstate)) return -EINVAL; powernow_k6_set_state(newstate); @@ -140,7 +144,7 @@ static int powernow_k6_target(struct cpufreq_policy *policy, static int powernow_k6_cpu_init(struct cpufreq_policy *policy) { - unsigned int i; + unsigned int i, f; int result; if (policy->cpu != 0) @@ -152,10 +156,11 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) /* table init */ for (i = 0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { - if (clock_ratio[i].index > max_multiplier) + f = clock_ratio[i].index; + if (f > max_multiplier) clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID; else - clock_ratio[i].frequency = busfreq * clock_ratio[i].index; + clock_ratio[i].frequency = busfreq * f; } /* cpuinfo and default policy values */ @@ -185,7 +190,9 @@ static int powernow_k6_cpu_exit(struct cpufreq_policy *policy) static unsigned int powernow_k6_get(unsigned int cpu) { - return busfreq * powernow_k6_get_cpu_multiplier(); + unsigned int ret; + ret = (busfreq * powernow_k6_get_cpu_multiplier()); + return ret; } static struct freq_attr *powernow_k6_attr[] = { @@ -221,7 +228,7 @@ static int __init powernow_k6_init(void) return -ENODEV; if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) { - printk("cpufreq: PowerNow IOPORT region already used.\n"); + printk(KERN_INFO PFX "PowerNow IOPORT region already used.\n"); return -EIO; } @@ -246,7 +253,8 @@ static void __exit powernow_k6_exit(void) } -MODULE_AUTHOR("Arjan van de Ven, Dave Jones , Dominik Brodowski "); +MODULE_AUTHOR("Arjan van de Ven, Dave Jones , " + "Dominik Brodowski "); MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); MODULE_LICENSE("GPL"); -- cgit v1.2.3-70-g09d2 From 6072ace436800a011c3cb9a75ee49276bd90445a Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sun, 18 Jan 2009 01:27:35 -0500 Subject: [CPUFREQ] checkpatch cleanups for sc520 Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/sc520_freq.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c index 42da9bd677d..435a996a613 100644 --- a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c +++ b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c @@ -19,17 +19,19 @@ #include #include +#include +#include #include -#include -#include #define MMCR_BASE 0xfffef000 /* The default base address */ #define OFFS_CPUCTL 0x2 /* CPU Control Register */ static __u8 __iomem *cpuctl; -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "sc520_freq", msg) +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ + "sc520_freq", msg) +#define PFX "sc520_freq: " static struct cpufreq_frequency_table sc520_freq_table[] = { {0x01, 100000}, @@ -43,7 +45,8 @@ static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu) switch (clockspeed_reg & 0x03) { default: - printk(KERN_ERR "sc520_freq: error: cpuctl register has unexpected value %02x\n", clockspeed_reg); + printk(KERN_ERR PFX "error: cpuctl register has unexpected " + "value %02x\n", clockspeed_reg); case 0x01: return 100000; case 0x02: @@ -51,7 +54,7 @@ static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu) } } -static void sc520_freq_set_cpu_state (unsigned int state) +static void sc520_freq_set_cpu_state(unsigned int state) { struct cpufreq_freqs freqs; @@ -76,18 +79,19 @@ static void sc520_freq_set_cpu_state (unsigned int state) cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); }; -static int sc520_freq_verify (struct cpufreq_policy *policy) +static int sc520_freq_verify(struct cpufreq_policy *policy) { return cpufreq_frequency_table_verify(policy, &sc520_freq_table[0]); } -static int sc520_freq_target (struct cpufreq_policy *policy, +static int sc520_freq_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { unsigned int newstate = 0; - if (cpufreq_frequency_table_target(policy, sc520_freq_table, target_freq, relation, &newstate)) + if (cpufreq_frequency_table_target(policy, sc520_freq_table, + target_freq, relation, &newstate)) return -EINVAL; sc520_freq_set_cpu_state(newstate); @@ -116,7 +120,7 @@ static int sc520_freq_cpu_init(struct cpufreq_policy *policy) result = cpufreq_frequency_table_cpuinfo(policy, sc520_freq_table); if (result) - return (result); + return result; cpufreq_frequency_table_get_attr(sc520_freq_table, policy->cpu); @@ -131,7 +135,7 @@ static int sc520_freq_cpu_exit(struct cpufreq_policy *policy) } -static struct freq_attr* sc520_freq_attr[] = { +static struct freq_attr *sc520_freq_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, NULL, }; @@ -155,13 +159,13 @@ static int __init sc520_freq_init(void) int err; /* Test if we have the right hardware */ - if(c->x86_vendor != X86_VENDOR_AMD || - c->x86 != 4 || c->x86_model != 9) { + if (c->x86_vendor != X86_VENDOR_AMD || + c->x86 != 4 || c->x86_model != 9) { dprintk("no Elan SC520 processor found!\n"); return -ENODEV; } cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1); - if(!cpuctl) { + if (!cpuctl) { printk(KERN_ERR "sc520_freq: error: failed to remap memory\n"); return -ENOMEM; } -- cgit v1.2.3-70-g09d2 From bbfebd66554b934b270c4c49442f4fe5e62df0e5 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 17 Jan 2009 23:55:22 -0500 Subject: [CPUFREQ] checkpatch cleanups for speedstep related drivers. Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | 60 +++++----- arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | 70 ++++++------ arch/x86/kernel/cpu/cpufreq/speedstep-lib.c | 129 ++++++++++++--------- arch/x86/kernel/cpu/cpufreq/speedstep-lib.h | 18 +-- arch/x86/kernel/cpu/cpufreq/speedstep-smi.c | 166 +++++++++++++++++----------- 5 files changed, 258 insertions(+), 185 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index b585e04cbc9..46a2a7a5314 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -27,15 +27,16 @@ #include #include #include +#include #include #include -#include #include "speedstep-lib.h" #define PFX "p4-clockmod: " -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "p4-clockmod", msg) +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ + "p4-clockmod", msg) /* * Duty Cycle (3bits), note DC_DISABLE is not specified in @@ -58,7 +59,8 @@ static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate) { u32 l, h; - if (!cpu_online(cpu) || (newstate > DC_DISABLE) || (newstate == DC_RESV)) + if (!cpu_online(cpu) || + (newstate > DC_DISABLE) || (newstate == DC_RESV)) return -EINVAL; rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h); @@ -66,7 +68,8 @@ static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate) if (l & 0x01) dprintk("CPU#%d currently thermal throttled\n", cpu); - if (has_N44_O17_errata[cpu] && (newstate == DC_25PT || newstate == DC_DFLT)) + if (has_N44_O17_errata[cpu] && + (newstate == DC_25PT || newstate == DC_DFLT)) newstate = DC_38PT; rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h); @@ -112,7 +115,8 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy, struct cpufreq_freqs freqs; int i; - if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0], target_freq, relation, &newstate)) + if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0], + target_freq, relation, &newstate)) return -EINVAL; freqs.old = cpufreq_p4_get(policy->cpu); @@ -127,7 +131,8 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy, cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); } - /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software + /* run on each logical CPU, + * see section 13.15.3 of IA32 Intel Architecture Software * Developer's Manual, Volume 3 */ for_each_cpu(i, policy->cpus) @@ -153,28 +158,30 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) { if (c->x86 == 0x06) { if (cpu_has(c, X86_FEATURE_EST)) - printk(KERN_WARNING PFX "Warning: EST-capable CPU detected. " - "The acpi-cpufreq module offers voltage scaling" - " in addition of frequency scaling. You should use " - "that instead of p4-clockmod, if possible.\n"); + printk(KERN_WARNING PFX "Warning: EST-capable CPU " + "detected. The acpi-cpufreq module offers " + "voltage scaling in addition of frequency " + "scaling. You should use that instead of " + "p4-clockmod, if possible.\n"); switch (c->x86_model) { case 0x0E: /* Core */ case 0x0F: /* Core Duo */ case 0x16: /* Celeron Core */ p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; - return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE); + return speedstep_get_frequency(SPEEDSTEP_CPU_PCORE); case 0x0D: /* Pentium M (Dothan) */ p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; /* fall through */ case 0x09: /* Pentium M (Banias) */ - return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM); + return speedstep_get_frequency(SPEEDSTEP_CPU_PM); } } if (c->x86 != 0xF) { if (!cpu_has(c, X86_FEATURE_EST)) - printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. " - "Please send an e-mail to \n"); + printk(KERN_WARNING PFX "Unknown CPU. " + "Please send an e-mail to " + "\n"); return 0; } @@ -182,16 +189,16 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) * throttling is active or not. */ p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; - if (speedstep_detect_processor() == SPEEDSTEP_PROCESSOR_P4M) { + if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4M) { printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. " "The speedstep-ich or acpi cpufreq modules offer " "voltage scaling in addition of frequency scaling. " "You should use either one instead of p4-clockmod, " "if possible.\n"); - return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4M); + return speedstep_get_frequency(SPEEDSTEP_CPU_P4M); } - return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4D); + return speedstep_get_frequency(SPEEDSTEP_CPU_P4D); } @@ -223,8 +230,8 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) return -EINVAL; /* table init */ - for (i=1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) { - if ((i<2) && (has_N44_O17_errata[policy->cpu])) + for (i = 1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) { + if ((i < 2) && (has_N44_O17_errata[policy->cpu])) p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID; else p4clockmod_table[i].frequency = (stock_freq * i)/8; @@ -258,12 +265,12 @@ static unsigned int cpufreq_p4_get(unsigned int cpu) l = DC_DISABLE; if (l != DC_DISABLE) - return (stock_freq * l / 8); + return stock_freq * l / 8; return stock_freq; } -static struct freq_attr* p4clockmod_attr[] = { +static struct freq_attr *p4clockmod_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, NULL, }; @@ -299,9 +306,10 @@ static int __init cpufreq_p4_init(void) ret = cpufreq_register_driver(&p4clockmod_driver); if (!ret) - printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock Modulation available\n"); + printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock " + "Modulation available\n"); - return (ret); + return ret; } @@ -311,9 +319,9 @@ static void __exit cpufreq_p4_exit(void) } -MODULE_AUTHOR ("Zwane Mwaikambo "); -MODULE_DESCRIPTION ("cpufreq driver for Pentium(TM) 4/Xeon(TM)"); -MODULE_LICENSE ("GPL"); +MODULE_AUTHOR("Zwane Mwaikambo "); +MODULE_DESCRIPTION("cpufreq driver for Pentium(TM) 4/Xeon(TM)"); +MODULE_LICENSE("GPL"); late_initcall(cpufreq_p4_init); module_exit(cpufreq_p4_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index dedc1e98f16..8bbb11adb31 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -39,7 +39,7 @@ static struct pci_dev *speedstep_chipset_dev; /* speedstep_processor */ -static unsigned int speedstep_processor = 0; +static unsigned int speedstep_processor; static u32 pmbase; @@ -54,7 +54,8 @@ static struct cpufreq_frequency_table speedstep_freqs[] = { }; -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-ich", msg) +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ + "speedstep-ich", msg) /** @@ -62,7 +63,7 @@ static struct cpufreq_frequency_table speedstep_freqs[] = { * * Returns: -ENODEV if no register could be found */ -static int speedstep_find_register (void) +static int speedstep_find_register(void) { if (!speedstep_chipset_dev) return -ENODEV; @@ -90,7 +91,7 @@ static int speedstep_find_register (void) * * Tries to change the SpeedStep state. */ -static void speedstep_set_state (unsigned int state) +static void speedstep_set_state(unsigned int state) { u8 pm2_blk; u8 value; @@ -133,11 +134,11 @@ static void speedstep_set_state (unsigned int state) dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value); - if (state == (value & 0x1)) { - dprintk("change to %u MHz succeeded\n", (speedstep_get_processor_frequency(speedstep_processor) / 1000)); - } else { - printk (KERN_ERR "cpufreq: change failed - I/O error\n"); - } + if (state == (value & 0x1)) + dprintk("change to %u MHz succeeded\n", + speedstep_get_frequency(speedstep_processor) / 1000); + else + printk(KERN_ERR "cpufreq: change failed - I/O error\n"); return; } @@ -149,7 +150,7 @@ static void speedstep_set_state (unsigned int state) * Tries to activate the SpeedStep status and control registers. * Returns -EINVAL on an unsupported chipset, and zero on success. */ -static int speedstep_activate (void) +static int speedstep_activate(void) { u16 value = 0; @@ -175,20 +176,18 @@ static int speedstep_activate (void) * functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected * chipset, or zero on failure. */ -static unsigned int speedstep_detect_chipset (void) +static unsigned int speedstep_detect_chipset(void) { speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12, - PCI_ANY_ID, - PCI_ANY_ID, + PCI_ANY_ID, PCI_ANY_ID, NULL); if (speedstep_chipset_dev) return 4; /* 4-M */ speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12, - PCI_ANY_ID, - PCI_ANY_ID, + PCI_ANY_ID, PCI_ANY_ID, NULL); if (speedstep_chipset_dev) return 3; /* 3-M */ @@ -196,8 +195,7 @@ static unsigned int speedstep_detect_chipset (void) speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_10, - PCI_ANY_ID, - PCI_ANY_ID, + PCI_ANY_ID, PCI_ANY_ID, NULL); if (speedstep_chipset_dev) { /* speedstep.c causes lockups on Dell Inspirons 8000 and @@ -208,8 +206,7 @@ static unsigned int speedstep_detect_chipset (void) hostbridge = pci_get_subsys(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82815_MC, - PCI_ANY_ID, - PCI_ANY_ID, + PCI_ANY_ID, PCI_ANY_ID, NULL); if (!hostbridge) @@ -236,7 +233,7 @@ static unsigned int _speedstep_get(const struct cpumask *cpus) cpus_allowed = current->cpus_allowed; set_cpus_allowed_ptr(current, cpus); - speed = speedstep_get_processor_frequency(speedstep_processor); + speed = speedstep_get_frequency(speedstep_processor); set_cpus_allowed_ptr(current, &cpus_allowed); dprintk("detected %u kHz as current frequency\n", speed); return speed; @@ -251,11 +248,12 @@ static unsigned int speedstep_get(unsigned int cpu) * speedstep_target - set a new CPUFreq policy * @policy: new policy * @target_freq: the target frequency - * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) + * @relation: how that frequency relates to achieved frequency + * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) * * Sets a new CPUFreq policy. */ -static int speedstep_target (struct cpufreq_policy *policy, +static int speedstep_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { @@ -264,7 +262,8 @@ static int speedstep_target (struct cpufreq_policy *policy, cpumask_t cpus_allowed; int i; - if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) + if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], + target_freq, relation, &newstate)) return -EINVAL; freqs.old = _speedstep_get(policy->cpus); @@ -308,7 +307,7 @@ static int speedstep_target (struct cpufreq_policy *policy, * Limit must be within speedstep_low_freq and speedstep_high_freq, with * at least one border included. */ -static int speedstep_verify (struct cpufreq_policy *policy) +static int speedstep_verify(struct cpufreq_policy *policy) { return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); } @@ -344,7 +343,8 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) return -EIO; dprintk("currently at %s speed setting - %i MHz\n", - (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high", + (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) + ? "low" : "high", (speed / 1000)); /* cpuinfo and default policy values */ @@ -352,9 +352,9 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); if (result) - return (result); + return result; - cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); + cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); return 0; } @@ -366,7 +366,7 @@ static int speedstep_cpu_exit(struct cpufreq_policy *policy) return 0; } -static struct freq_attr* speedstep_attr[] = { +static struct freq_attr *speedstep_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, NULL, }; @@ -396,13 +396,15 @@ static int __init speedstep_init(void) /* detect processor */ speedstep_processor = speedstep_detect_processor(); if (!speedstep_processor) { - dprintk("Intel(R) SpeedStep(TM) capable processor not found\n"); + dprintk("Intel(R) SpeedStep(TM) capable processor " + "not found\n"); return -ENODEV; } /* detect chipset */ if (!speedstep_detect_chipset()) { - dprintk("Intel(R) SpeedStep(TM) for this chipset not (yet) available.\n"); + dprintk("Intel(R) SpeedStep(TM) for this chipset not " + "(yet) available.\n"); return -ENODEV; } @@ -431,9 +433,11 @@ static void __exit speedstep_exit(void) } -MODULE_AUTHOR ("Dave Jones , Dominik Brodowski "); -MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges."); -MODULE_LICENSE ("GPL"); +MODULE_AUTHOR("Dave Jones , " + "Dominik Brodowski "); +MODULE_DESCRIPTION("Speedstep driver for Intel mobile processors on chipsets " + "with ICH-M southbridges."); +MODULE_LICENSE("GPL"); module_init(speedstep_init); module_exit(speedstep_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index cdac7d62369..55c696daa05 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c @@ -18,10 +18,13 @@ #include #include "speedstep-lib.h" -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-lib", msg) +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ + "speedstep-lib", msg) + +#define PFX "speedstep-lib: " #ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK -static int relaxed_check = 0; +static int relaxed_check; #else #define relaxed_check 0 #endif @@ -30,14 +33,14 @@ static int relaxed_check = 0; * GET PROCESSOR CORE SPEED IN KHZ * *********************************************************************/ -static unsigned int pentium3_get_frequency (unsigned int processor) +static unsigned int pentium3_get_frequency(unsigned int processor) { - /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ + /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ struct { unsigned int ratio; /* Frequency Multiplier (x10) */ u8 bitmap; /* power on configuration bits [27, 25:22] (in MSR 0x2a) */ - } msr_decode_mult [] = { + } msr_decode_mult[] = { { 30, 0x01 }, { 35, 0x05 }, { 40, 0x02 }, @@ -52,7 +55,7 @@ static unsigned int pentium3_get_frequency (unsigned int processor) { 85, 0x26 }, { 90, 0x20 }, { 100, 0x2b }, - { 0, 0xff } /* error or unknown value */ + { 0, 0xff } /* error or unknown value */ }; /* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */ @@ -60,7 +63,7 @@ static unsigned int pentium3_get_frequency (unsigned int processor) unsigned int value; /* Front Side Bus speed in MHz */ u8 bitmap; /* power on configuration bits [18: 19] (in MSR 0x2a) */ - } msr_decode_fsb [] = { + } msr_decode_fsb[] = { { 66, 0x0 }, { 100, 0x2 }, { 133, 0x1 }, @@ -85,7 +88,7 @@ static unsigned int pentium3_get_frequency (unsigned int processor) } /* decode the multiplier */ - if (processor == SPEEDSTEP_PROCESSOR_PIII_C_EARLY) { + if (processor == SPEEDSTEP_CPU_PIII_C_EARLY) { dprintk("workaround for early PIIIs\n"); msr_lo &= 0x03c00000; } else @@ -97,9 +100,10 @@ static unsigned int pentium3_get_frequency (unsigned int processor) j++; } - dprintk("speed is %u\n", (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100)); + dprintk("speed is %u\n", + (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100)); - return (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100); + return msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100; } @@ -112,20 +116,23 @@ static unsigned int pentiumM_get_frequency(void) /* see table B-2 of 24547212.pdf */ if (msr_lo & 0x00040000) { - printk(KERN_DEBUG "speedstep-lib: PM - invalid FSB: 0x%x 0x%x\n", msr_lo, msr_tmp); + printk(KERN_DEBUG PFX "PM - invalid FSB: 0x%x 0x%x\n", + msr_lo, msr_tmp); return 0; } msr_tmp = (msr_lo >> 22) & 0x1f; - dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * 100 * 1000)); + dprintk("bits 22-26 are 0x%x, speed is %u\n", + msr_tmp, (msr_tmp * 100 * 1000)); - return (msr_tmp * 100 * 1000); + return msr_tmp * 100 * 1000; } static unsigned int pentium_core_get_frequency(void) { u32 fsb = 0; u32 msr_lo, msr_tmp; + int ret; rdmsr(MSR_FSB_FREQ, msr_lo, msr_tmp); /* see table B-2 of 25366920.pdf */ @@ -153,12 +160,15 @@ static unsigned int pentium_core_get_frequency(void) } rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); - dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); + dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", + msr_lo, msr_tmp); msr_tmp = (msr_lo >> 22) & 0x1f; - dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * fsb)); + dprintk("bits 22-26 are 0x%x, speed is %u\n", + msr_tmp, (msr_tmp * fsb)); - return (msr_tmp * fsb); + ret = (msr_tmp * fsb); + return ret; } @@ -167,6 +177,7 @@ static unsigned int pentium4_get_frequency(void) struct cpuinfo_x86 *c = &boot_cpu_data; u32 msr_lo, msr_hi, mult; unsigned int fsb = 0; + unsigned int ret; rdmsr(0x2c, msr_lo, msr_hi); @@ -195,44 +206,47 @@ static unsigned int pentium4_get_frequency(void) } if (!fsb) - printk(KERN_DEBUG "speedstep-lib: couldn't detect FSB speed. Please send an e-mail to \n"); + printk(KERN_DEBUG PFX "couldn't detect FSB speed. " + "Please send an e-mail to \n"); /* Multiplier. */ mult = msr_lo >> 24; - dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n", fsb, mult, (fsb * mult)); + dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n", + fsb, mult, (fsb * mult)); - return (fsb * mult); + ret = (fsb * mult); + return ret; } -unsigned int speedstep_get_processor_frequency(unsigned int processor) +unsigned int speedstep_get_frequency(unsigned int processor) { switch (processor) { - case SPEEDSTEP_PROCESSOR_PCORE: + case SPEEDSTEP_CPU_PCORE: return pentium_core_get_frequency(); - case SPEEDSTEP_PROCESSOR_PM: + case SPEEDSTEP_CPU_PM: return pentiumM_get_frequency(); - case SPEEDSTEP_PROCESSOR_P4D: - case SPEEDSTEP_PROCESSOR_P4M: + case SPEEDSTEP_CPU_P4D: + case SPEEDSTEP_CPU_P4M: return pentium4_get_frequency(); - case SPEEDSTEP_PROCESSOR_PIII_T: - case SPEEDSTEP_PROCESSOR_PIII_C: - case SPEEDSTEP_PROCESSOR_PIII_C_EARLY: + case SPEEDSTEP_CPU_PIII_T: + case SPEEDSTEP_CPU_PIII_C: + case SPEEDSTEP_CPU_PIII_C_EARLY: return pentium3_get_frequency(processor); default: return 0; }; return 0; } -EXPORT_SYMBOL_GPL(speedstep_get_processor_frequency); +EXPORT_SYMBOL_GPL(speedstep_get_frequency); /********************************************************************* * DETECT SPEEDSTEP-CAPABLE PROCESSOR * *********************************************************************/ -unsigned int speedstep_detect_processor (void) +unsigned int speedstep_detect_processor(void) { struct cpuinfo_x86 *c = &cpu_data(0); u32 ebx, msr_lo, msr_hi; @@ -261,7 +275,7 @@ unsigned int speedstep_detect_processor (void) * sample has ebx = 0x0f, production has 0x0e. */ if ((ebx == 0x0e) || (ebx == 0x0f)) - return SPEEDSTEP_PROCESSOR_P4M; + return SPEEDSTEP_CPU_P4M; break; case 7: /* @@ -272,7 +286,7 @@ unsigned int speedstep_detect_processor (void) * samples are only of B-stepping... */ if (ebx == 0x0e) - return SPEEDSTEP_PROCESSOR_P4M; + return SPEEDSTEP_CPU_P4M; break; case 9: /* @@ -288,10 +302,13 @@ unsigned int speedstep_detect_processor (void) * M-P4-Ms may have either ebx=0xe or 0xf [see above] * M-P4/533 have either ebx=0xe or 0xf. [25317607.pdf] * also, M-P4M HTs have ebx=0x8, too - * For now, they are distinguished by the model_id string + * For now, they are distinguished by the model_id + * string */ - if ((ebx == 0x0e) || (strstr(c->x86_model_id,"Mobile Intel(R) Pentium(R) 4") != NULL)) - return SPEEDSTEP_PROCESSOR_P4M; + if ((ebx == 0x0e) || + (strstr(c->x86_model_id, + "Mobile Intel(R) Pentium(R) 4") != NULL)) + return SPEEDSTEP_CPU_P4M; break; default: break; @@ -301,7 +318,8 @@ unsigned int speedstep_detect_processor (void) switch (c->x86_model) { case 0x0B: /* Intel PIII [Tualatin] */ - /* cpuid_ebx(1) is 0x04 for desktop PIII, 0x06 for mobile PIII-M */ + /* cpuid_ebx(1) is 0x04 for desktop PIII, + * 0x06 for mobile PIII-M */ ebx = cpuid_ebx(0x00000001); dprintk("ebx is %x\n", ebx); @@ -313,14 +331,15 @@ unsigned int speedstep_detect_processor (void) /* So far all PIII-M processors support SpeedStep. See * Intel's 24540640.pdf of June 2003 */ - return SPEEDSTEP_PROCESSOR_PIII_T; + return SPEEDSTEP_CPU_PIII_T; case 0x08: /* Intel PIII [Coppermine] */ /* all mobile PIII Coppermines have FSB 100 MHz * ==> sort out a few desktop PIIIs. */ rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi); - dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n", msr_lo, msr_hi); + dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n", + msr_lo, msr_hi); msr_lo &= 0x00c0000; if (msr_lo != 0x0080000) return 0; @@ -332,13 +351,15 @@ unsigned int speedstep_detect_processor (void) * bit 56 or 57 is set */ rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi); - dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n", msr_lo, msr_hi); - if ((msr_hi & (1<<18)) && (relaxed_check ? 1 : (msr_hi & (3<<24)))) { + dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n", + msr_lo, msr_hi); + if ((msr_hi & (1<<18)) && + (relaxed_check ? 1 : (msr_hi & (3<<24)))) { if (c->x86_mask == 0x01) { dprintk("early PIII version\n"); - return SPEEDSTEP_PROCESSOR_PIII_C_EARLY; + return SPEEDSTEP_CPU_PIII_C_EARLY; } else - return SPEEDSTEP_PROCESSOR_PIII_C; + return SPEEDSTEP_CPU_PIII_C; } default: @@ -369,7 +390,7 @@ unsigned int speedstep_get_freqs(unsigned int processor, dprintk("trying to determine both speeds\n"); /* get current speed */ - prev_speed = speedstep_get_processor_frequency(processor); + prev_speed = speedstep_get_frequency(processor); if (!prev_speed) return -EIO; @@ -379,7 +400,7 @@ unsigned int speedstep_get_freqs(unsigned int processor, /* switch to low state */ set_state(SPEEDSTEP_LOW); - *low_speed = speedstep_get_processor_frequency(processor); + *low_speed = speedstep_get_frequency(processor); if (!*low_speed) { ret = -EIO; goto out; @@ -398,7 +419,7 @@ unsigned int speedstep_get_freqs(unsigned int processor, if (transition_latency) do_gettimeofday(&tv2); - *high_speed = speedstep_get_processor_frequency(processor); + *high_speed = speedstep_get_frequency(processor); if (!*high_speed) { ret = -EIO; goto out; @@ -426,9 +447,12 @@ unsigned int speedstep_get_freqs(unsigned int processor, /* check if the latency measurement is too high or too low * and set it to a safe value (500uSec) in that case */ - if (*transition_latency > 10000000 || *transition_latency < 50000) { - printk (KERN_WARNING "speedstep: frequency transition measured seems out of " - "range (%u nSec), falling back to a safe one of %u nSec.\n", + if (*transition_latency > 10000000 || + *transition_latency < 50000) { + printk(KERN_WARNING PFX "frequency transition " + "measured seems out of range (%u " + "nSec), falling back to a safe one of" + "%u nSec.\n", *transition_latency, 500000); *transition_latency = 500000; } @@ -436,15 +460,16 @@ unsigned int speedstep_get_freqs(unsigned int processor, out: local_irq_restore(flags); - return (ret); + return ret; } EXPORT_SYMBOL_GPL(speedstep_get_freqs); #ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK module_param(relaxed_check, int, 0444); -MODULE_PARM_DESC(relaxed_check, "Don't do all checks for speedstep capability."); +MODULE_PARM_DESC(relaxed_check, + "Don't do all checks for speedstep capability."); #endif -MODULE_AUTHOR ("Dominik Brodowski "); -MODULE_DESCRIPTION ("Library for Intel SpeedStep 1 or 2 cpufreq drivers."); -MODULE_LICENSE ("GPL"); +MODULE_AUTHOR("Dominik Brodowski "); +MODULE_DESCRIPTION("Library for Intel SpeedStep 1 or 2 cpufreq drivers."); +MODULE_LICENSE("GPL"); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h index b11bcc608ca..2b6c04e5a30 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h @@ -12,17 +12,17 @@ /* processors */ -#define SPEEDSTEP_PROCESSOR_PIII_C_EARLY 0x00000001 /* Coppermine core */ -#define SPEEDSTEP_PROCESSOR_PIII_C 0x00000002 /* Coppermine core */ -#define SPEEDSTEP_PROCESSOR_PIII_T 0x00000003 /* Tualatin core */ -#define SPEEDSTEP_PROCESSOR_P4M 0x00000004 /* P4-M */ +#define SPEEDSTEP_CPU_PIII_C_EARLY 0x00000001 /* Coppermine core */ +#define SPEEDSTEP_CPU_PIII_C 0x00000002 /* Coppermine core */ +#define SPEEDSTEP_CPU_PIII_T 0x00000003 /* Tualatin core */ +#define SPEEDSTEP_CPU_P4M 0x00000004 /* P4-M */ /* the following processors are not speedstep-capable and are not auto-detected * in speedstep_detect_processor(). However, their speed can be detected using - * the speedstep_get_processor_frequency() call. */ -#define SPEEDSTEP_PROCESSOR_PM 0xFFFFFF03 /* Pentium M */ -#define SPEEDSTEP_PROCESSOR_P4D 0xFFFFFF04 /* desktop P4 */ -#define SPEEDSTEP_PROCESSOR_PCORE 0xFFFFFF05 /* Core */ + * the speedstep_get_frequency() call. */ +#define SPEEDSTEP_CPU_PM 0xFFFFFF03 /* Pentium M */ +#define SPEEDSTEP_CPU_P4D 0xFFFFFF04 /* desktop P4 */ +#define SPEEDSTEP_CPU_PCORE 0xFFFFFF05 /* Core */ /* speedstep states -- only two of them */ @@ -34,7 +34,7 @@ extern unsigned int speedstep_detect_processor (void); /* detect the current speed (in khz) of the processor */ -extern unsigned int speedstep_get_processor_frequency(unsigned int processor); +extern unsigned int speedstep_get_frequency(unsigned int processor); /* detect the low and high speeds of the processor. The callback diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c index 8a85c93bd62..befea088e4f 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c @@ -19,8 +19,8 @@ #include #include #include +#include #include -#include #include "speedstep-lib.h" @@ -30,12 +30,12 @@ * If user gives it, these are used. * */ -static int smi_port = 0; -static int smi_cmd = 0; -static unsigned int smi_sig = 0; +static int smi_port; +static int smi_cmd; +static unsigned int smi_sig; /* info about the processor */ -static unsigned int speedstep_processor = 0; +static unsigned int speedstep_processor; /* * There are only two frequency states for each processor. Values @@ -56,12 +56,13 @@ static struct cpufreq_frequency_table speedstep_freqs[] = { * of DMA activity going on? */ #define SMI_TRIES 5 -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-smi", msg) +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ + "speedstep-smi", msg) /** * speedstep_smi_ownership */ -static int speedstep_smi_ownership (void) +static int speedstep_smi_ownership(void) { u32 command, result, magic, dummy; u32 function = GET_SPEEDSTEP_OWNER; @@ -70,16 +71,18 @@ static int speedstep_smi_ownership (void) command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); magic = virt_to_phys(magic_data); - dprintk("trying to obtain ownership with command %x at port %x\n", command, smi_port); + dprintk("trying to obtain ownership with command %x at port %x\n", + command, smi_port); __asm__ __volatile__( "push %%ebp\n" "out %%al, (%%dx)\n" "pop %%ebp\n" - : "=D" (result), "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy), - "=S" (dummy) + : "=D" (result), + "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy), + "=S" (dummy) : "a" (command), "b" (function), "c" (0), "d" (smi_port), - "D" (0), "S" (magic) + "D" (0), "S" (magic) : "memory" ); @@ -97,10 +100,10 @@ static int speedstep_smi_ownership (void) * even hangs [cf. bugme.osdl.org # 1422] on earlier systems. Empirical testing * shows that the latter occurs if !(ist_info.event & 0xFFFF). */ -static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) +static int speedstep_smi_get_freqs(unsigned int *low, unsigned int *high) { u32 command, result = 0, edi, high_mhz, low_mhz, dummy; - u32 state=0; + u32 state = 0; u32 function = GET_SPEEDSTEP_FREQS; if (!(ist_info.event & 0xFFFF)) { @@ -110,17 +113,25 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); - dprintk("trying to determine frequencies with command %x at port %x\n", command, smi_port); + dprintk("trying to determine frequencies with command %x at port %x\n", + command, smi_port); __asm__ __volatile__( "push %%ebp\n" "out %%al, (%%dx)\n" "pop %%ebp" - : "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi), "=S" (dummy) - : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0), "D" (0) + : "=a" (result), + "=b" (high_mhz), + "=c" (low_mhz), + "=d" (state), "=D" (edi), "=S" (dummy) + : "a" (command), + "b" (function), + "c" (state), + "d" (smi_port), "S" (0), "D" (0) ); - dprintk("result %x, low_freq %u, high_freq %u\n", result, low_mhz, high_mhz); + dprintk("result %x, low_freq %u, high_freq %u\n", + result, low_mhz, high_mhz); /* abort if results are obviously incorrect... */ if ((high_mhz + low_mhz) < 600) @@ -137,26 +148,30 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) * @state: processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) * */ -static int speedstep_get_state (void) +static int speedstep_get_state(void) { - u32 function=GET_SPEEDSTEP_STATE; + u32 function = GET_SPEEDSTEP_STATE; u32 result, state, edi, command, dummy; command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); - dprintk("trying to determine current setting with command %x at port %x\n", command, smi_port); + dprintk("trying to determine current setting with command %x " + "at port %x\n", command, smi_port); __asm__ __volatile__( "push %%ebp\n" "out %%al, (%%dx)\n" "pop %%ebp\n" - : "=a" (result), "=b" (state), "=D" (edi), "=c" (dummy), "=d" (dummy), "=S" (dummy) - : "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0), "D" (0) + : "=a" (result), + "=b" (state), "=D" (edi), + "=c" (dummy), "=d" (dummy), "=S" (dummy) + : "a" (command), "b" (function), "c" (0), + "d" (smi_port), "S" (0), "D" (0) ); dprintk("state is %x, result is %x\n", state, result); - return (state & 1); + return state & 1; } @@ -165,11 +180,11 @@ static int speedstep_get_state (void) * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) * */ -static void speedstep_set_state (unsigned int state) +static void speedstep_set_state(unsigned int state) { unsigned int result = 0, command, new_state, dummy; unsigned long flags; - unsigned int function=SET_SPEEDSTEP_STATE; + unsigned int function = SET_SPEEDSTEP_STATE; unsigned int retry = 0; if (state > 0x1) @@ -180,11 +195,14 @@ static void speedstep_set_state (unsigned int state) command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); - dprintk("trying to set frequency to state %u with command %x at port %x\n", state, command, smi_port); + dprintk("trying to set frequency to state %u " + "with command %x at port %x\n", + state, command, smi_port); do { if (retry) { - dprintk("retry %u, previous result %u, waiting...\n", retry, result); + dprintk("retry %u, previous result %u, waiting...\n", + retry, result); mdelay(retry * 50); } retry++; @@ -192,20 +210,26 @@ static void speedstep_set_state (unsigned int state) "push %%ebp\n" "out %%al, (%%dx)\n" "pop %%ebp" - : "=b" (new_state), "=D" (result), "=c" (dummy), "=a" (dummy), - "=d" (dummy), "=S" (dummy) - : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0), "D" (0) + : "=b" (new_state), "=D" (result), + "=c" (dummy), "=a" (dummy), + "=d" (dummy), "=S" (dummy) + : "a" (command), "b" (function), "c" (state), + "d" (smi_port), "S" (0), "D" (0) ); } while ((new_state != state) && (retry <= SMI_TRIES)); /* enable IRQs */ local_irq_restore(flags); - if (new_state == state) { - dprintk("change to %u MHz succeeded after %u tries with result %u\n", (speedstep_freqs[new_state].frequency / 1000), retry, result); - } else { - printk(KERN_ERR "cpufreq: change to state %u failed with new_state %u and result %u\n", state, new_state, result); - } + if (new_state == state) + dprintk("change to %u MHz succeeded after %u tries " + "with result %u\n", + (speedstep_freqs[new_state].frequency / 1000), + retry, result); + else + printk(KERN_ERR "cpufreq: change to state %u " + "failed with new_state %u and result %u\n", + state, new_state, result); return; } @@ -219,13 +243,14 @@ static void speedstep_set_state (unsigned int state) * * Sets a new CPUFreq policy/freq. */ -static int speedstep_target (struct cpufreq_policy *policy, +static int speedstep_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { unsigned int newstate = 0; struct cpufreq_freqs freqs; - if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) + if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], + target_freq, relation, &newstate)) return -EINVAL; freqs.old = speedstep_freqs[speedstep_get_state()].frequency; @@ -250,7 +275,7 @@ static int speedstep_target (struct cpufreq_policy *policy, * Limit must be within speedstep_low_freq and speedstep_high_freq, with * at least one border included. */ -static int speedstep_verify (struct cpufreq_policy *policy) +static int speedstep_verify(struct cpufreq_policy *policy) { return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); } @@ -259,7 +284,8 @@ static int speedstep_verify (struct cpufreq_policy *policy) static int speedstep_cpu_init(struct cpufreq_policy *policy) { int result; - unsigned int speed,state; + unsigned int speed, state; + unsigned int *low, *high; /* capability check */ if (policy->cpu != 0) @@ -272,19 +298,23 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) } /* detect low and high frequency */ - result = speedstep_smi_get_freqs(&speedstep_freqs[SPEEDSTEP_LOW].frequency, - &speedstep_freqs[SPEEDSTEP_HIGH].frequency); + low = &speedstep_freqs[SPEEDSTEP_LOW].frequency; + high = &speedstep_freqs[SPEEDSTEP_HIGH].frequency; + + result = speedstep_smi_get_freqs(low, high); if (result) { - /* fall back to speedstep_lib.c dection mechanism: try both states out */ - dprintk("could not detect low and high frequencies by SMI call.\n"); + /* fall back to speedstep_lib.c dection mechanism: + * try both states out */ + dprintk("could not detect low and high frequencies " + "by SMI call.\n"); result = speedstep_get_freqs(speedstep_processor, - &speedstep_freqs[SPEEDSTEP_LOW].frequency, - &speedstep_freqs[SPEEDSTEP_HIGH].frequency, + low, high, NULL, &speedstep_set_state); if (result) { - dprintk("could not detect two different speeds -- aborting.\n"); + dprintk("could not detect two different speeds" + " -- aborting.\n"); return result; } else dprintk("workaround worked.\n"); @@ -295,7 +325,8 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) speed = speedstep_freqs[state].frequency; dprintk("currently at %s speed setting - %i MHz\n", - (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high", + (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) + ? "low" : "high", (speed / 1000)); /* cpuinfo and default policy values */ @@ -304,7 +335,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); if (result) - return (result); + return result; cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); @@ -321,7 +352,7 @@ static unsigned int speedstep_get(unsigned int cpu) { if (cpu) return -ENODEV; - return speedstep_get_processor_frequency(speedstep_processor); + return speedstep_get_frequency(speedstep_processor); } @@ -335,7 +366,7 @@ static int speedstep_resume(struct cpufreq_policy *policy) return result; } -static struct freq_attr* speedstep_attr[] = { +static struct freq_attr *speedstep_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, NULL, }; @@ -364,21 +395,23 @@ static int __init speedstep_init(void) speedstep_processor = speedstep_detect_processor(); switch (speedstep_processor) { - case SPEEDSTEP_PROCESSOR_PIII_T: - case SPEEDSTEP_PROCESSOR_PIII_C: - case SPEEDSTEP_PROCESSOR_PIII_C_EARLY: + case SPEEDSTEP_CPU_PIII_T: + case SPEEDSTEP_CPU_PIII_C: + case SPEEDSTEP_CPU_PIII_C_EARLY: break; default: speedstep_processor = 0; } if (!speedstep_processor) { - dprintk ("No supported Intel CPU detected.\n"); + dprintk("No supported Intel CPU detected.\n"); return -ENODEV; } - dprintk("signature:0x%.8lx, command:0x%.8lx, event:0x%.8lx, perf_level:0x%.8lx.\n", - ist_info.signature, ist_info.command, ist_info.event, ist_info.perf_level); + dprintk("signature:0x%.8lx, command:0x%.8lx, " + "event:0x%.8lx, perf_level:0x%.8lx.\n", + ist_info.signature, ist_info.command, + ist_info.event, ist_info.perf_level); /* Error if no IST-SMI BIOS or no PARM sig= 'ISGE' aka 'Intel Speedstep Gate E' */ @@ -416,17 +449,20 @@ static void __exit speedstep_exit(void) cpufreq_unregister_driver(&speedstep_driver); } -module_param(smi_port, int, 0444); -module_param(smi_cmd, int, 0444); -module_param(smi_sig, uint, 0444); +module_param(smi_port, int, 0444); +module_param(smi_cmd, int, 0444); +module_param(smi_sig, uint, 0444); -MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value -- Intel's default setting is 0xb2"); -MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value -- Intel's default setting is 0x82"); -MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the SMI interface."); +MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value " + "-- Intel's default setting is 0xb2"); +MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value " + "-- Intel's default setting is 0x82"); +MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the " + "SMI interface."); -MODULE_AUTHOR ("Hiroshi Miura"); -MODULE_DESCRIPTION ("Speedstep driver for IST applet SMI interface."); -MODULE_LICENSE ("GPL"); +MODULE_AUTHOR("Hiroshi Miura"); +MODULE_DESCRIPTION("Speedstep driver for IST applet SMI interface."); +MODULE_LICENSE("GPL"); module_init(speedstep_init); module_exit(speedstep_exit); -- cgit v1.2.3-70-g09d2 From b9e7638a301b1245d4675087a05fa90fb4fa1845 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sun, 18 Jan 2009 00:32:26 -0500 Subject: [CPUFREQ] checkpatch cleanups for powernow-k7 The asm/timer.h warning can be ignored, it's needed for recalibrate_cpu_khz() Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k7.c | 237 +++++++++++++++++------------- 1 file changed, 137 insertions(+), 100 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index 9cd73d15743..3c28ccd4974 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -6,10 +6,12 @@ * Licensed under the terms of the GNU GPL License version 2. * Based upon datasheets & sample CPUs kindly provided by AMD. * - * Errata 5: Processor may fail to execute a FID/VID change in presence of interrupt. - * - We cli/sti on stepping A0 CPUs around the FID/VID transition. - * Errata 15: Processors with half frequency multipliers may hang upon wakeup from disconnect. - * - We disable half multipliers if ACPI is used on A0 stepping CPUs. + * Errata 5: + * CPU may fail to execute a FID/VID change in presence of interrupt. + * - We cli/sti on stepping A0 CPUs around the FID/VID transition. + * Errata 15: + * CPU with half frequency multipliers may hang upon wakeup from disconnect. + * - We disable half multipliers if ACPI is used on A0 stepping CPUs. */ #include @@ -20,11 +22,11 @@ #include #include #include +#include +#include +#include /* Needed for recalibrate_cpu_khz() */ #include -#include -#include -#include #include #ifdef CONFIG_X86_POWERNOW_K7_ACPI @@ -58,9 +60,9 @@ struct pst_s { union powernow_acpi_control_t { struct { unsigned long fid:5, - vid:5, - sgtc:20, - res1:2; + vid:5, + sgtc:20, + res1:2; } bits; unsigned long val; }; @@ -101,7 +103,8 @@ static unsigned int fsb; static unsigned int latency; static char have_a0; -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k7", msg) +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ + "powernow-k7", msg) static int check_fsb(unsigned int fsbspeed) { @@ -109,7 +112,7 @@ static int check_fsb(unsigned int fsbspeed) unsigned int f = fsb / 1000; delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed; - return (delta < 5); + return delta < 5; } static int check_powernow(void) @@ -117,24 +120,26 @@ static int check_powernow(void) struct cpuinfo_x86 *c = &cpu_data(0); unsigned int maxei, eax, ebx, ecx, edx; - if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 !=6)) { + if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 6)) { #ifdef MODULE - printk (KERN_INFO PFX "This module only works with AMD K7 CPUs\n"); + printk(KERN_INFO PFX "This module only works with " + "AMD K7 CPUs\n"); #endif return 0; } /* Get maximum capabilities */ - maxei = cpuid_eax (0x80000000); + maxei = cpuid_eax(0x80000000); if (maxei < 0x80000007) { /* Any powernow info ? */ #ifdef MODULE - printk (KERN_INFO PFX "No powernow capabilities detected\n"); + printk(KERN_INFO PFX "No powernow capabilities detected\n"); #endif return 0; } if ((c->x86_model == 6) && (c->x86_mask == 0)) { - printk (KERN_INFO PFX "K7 660[A0] core detected, enabling errata workarounds\n"); + printk(KERN_INFO PFX "K7 660[A0] core detected, " + "enabling errata workarounds\n"); have_a0 = 1; } @@ -144,37 +149,42 @@ static int check_powernow(void) if (!(edx & (1 << 1 | 1 << 2))) return 0; - printk (KERN_INFO PFX "PowerNOW! Technology present. Can scale: "); + printk(KERN_INFO PFX "PowerNOW! Technology present. Can scale: "); if (edx & 1 << 1) { - printk ("frequency"); - can_scale_bus=1; + printk("frequency"); + can_scale_bus = 1; } if ((edx & (1 << 1 | 1 << 2)) == 0x6) - printk (" and "); + printk(" and "); if (edx & 1 << 2) { - printk ("voltage"); - can_scale_vid=1; + printk("voltage"); + can_scale_vid = 1; } - printk (".\n"); + printk(".\n"); return 1; } +static void invalidate_entry(unsigned int entry) +{ + powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; +} -static int get_ranges (unsigned char *pst) +static int get_ranges(unsigned char *pst) { unsigned int j; unsigned int speed; u8 fid, vid; - powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * (number_scales + 1)), GFP_KERNEL); + powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * + (number_scales + 1)), GFP_KERNEL); if (!powernow_table) return -ENOMEM; - for (j=0 ; j < number_scales; j++) { + for (j = 0 ; j < number_scales; j++) { fid = *pst++; powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10; @@ -182,10 +192,10 @@ static int get_ranges (unsigned char *pst) speed = powernow_table[j].frequency; - if ((fid_codes[fid] % 10)==5) { + if ((fid_codes[fid] % 10) == 5) { #ifdef CONFIG_X86_POWERNOW_K7_ACPI if (have_a0 == 1) - powernow_table[j].frequency = CPUFREQ_ENTRY_INVALID; + invalidate_entry(j); #endif } @@ -197,7 +207,7 @@ static int get_ranges (unsigned char *pst) vid = *pst++; powernow_table[j].index |= (vid << 8); /* upper 8 bits */ - dprintk (" FID: 0x%x (%d.%dx [%dMHz]) " + dprintk(" FID: 0x%x (%d.%dx [%dMHz]) " "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, fid_codes[fid] % 10, speed/1000, vid, mobile_vid_table[vid]/1000, @@ -214,13 +224,13 @@ static void change_FID(int fid) { union msr_fidvidctl fidvidctl; - rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); + rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); if (fidvidctl.bits.FID != fid) { fidvidctl.bits.SGTC = latency; fidvidctl.bits.FID = fid; fidvidctl.bits.VIDC = 0; fidvidctl.bits.FIDC = 1; - wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); + wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); } } @@ -229,18 +239,18 @@ static void change_VID(int vid) { union msr_fidvidctl fidvidctl; - rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); + rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); if (fidvidctl.bits.VID != vid) { fidvidctl.bits.SGTC = latency; fidvidctl.bits.VID = vid; fidvidctl.bits.FIDC = 0; fidvidctl.bits.VIDC = 1; - wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); + wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val); } } -static void change_speed (unsigned int index) +static void change_speed(unsigned int index) { u8 fid, vid; struct cpufreq_freqs freqs; @@ -257,7 +267,7 @@ static void change_speed (unsigned int index) freqs.cpu = 0; - rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); + rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val); cfid = fidvidstatus.bits.CFID; freqs.old = fsb * fid_codes[cfid] / 10; @@ -321,12 +331,14 @@ static int powernow_acpi_init(void) goto err1; } - if (acpi_processor_perf->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) { + if (acpi_processor_perf->control_register.space_id != + ACPI_ADR_SPACE_FIXED_HARDWARE) { retval = -ENODEV; goto err2; } - if (acpi_processor_perf->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) { + if (acpi_processor_perf->status_register.space_id != + ACPI_ADR_SPACE_FIXED_HARDWARE) { retval = -ENODEV; goto err2; } @@ -338,7 +350,8 @@ static int powernow_acpi_init(void) goto err2; } - powernow_table = kzalloc((number_scales + 1) * (sizeof(struct cpufreq_frequency_table)), GFP_KERNEL); + powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * + (number_scales + 1)), GFP_KERNEL); if (!powernow_table) { retval = -ENOMEM; goto err2; @@ -352,7 +365,7 @@ static int powernow_acpi_init(void) unsigned int speed, speed_mhz; pc.val = (unsigned long) state->control; - dprintk ("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n", + dprintk("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n", i, (u32) state->core_frequency, (u32) state->power, @@ -381,12 +394,12 @@ static int powernow_acpi_init(void) if (speed % 1000 > 0) speed_mhz++; - if ((fid_codes[fid] % 10)==5) { + if ((fid_codes[fid] % 10) == 5) { if (have_a0 == 1) - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + invalidate_entry(i); } - dprintk (" FID: 0x%x (%d.%dx [%dMHz]) " + dprintk(" FID: 0x%x (%d.%dx [%dMHz]) " "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, fid_codes[fid] % 10, speed_mhz, vid, mobile_vid_table[vid]/1000, @@ -422,7 +435,8 @@ err1: err05: kfree(acpi_processor_perf); err0: - printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n"); + printk(KERN_WARNING PFX "ACPI perflib can not be used on " + "this platform\n"); acpi_processor_perf = NULL; return retval; } @@ -435,7 +449,14 @@ static int powernow_acpi_init(void) } #endif -static int powernow_decode_bios (int maxfid, int startvid) +static void print_pst_entry(struct pst_s *pst, unsigned int j) +{ + dprintk("PST:%d (@%p)\n", j, pst); + dprintk(" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n", + pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid); +} + +static int powernow_decode_bios(int maxfid, int startvid) { struct psb_s *psb; struct pst_s *pst; @@ -446,61 +467,67 @@ static int powernow_decode_bios (int maxfid, int startvid) etuple = cpuid_eax(0x80000001); - for (i=0xC0000; i < 0xffff0 ; i+=16) { + for (i = 0xC0000; i < 0xffff0 ; i += 16) { p = phys_to_virt(i); - if (memcmp(p, "AMDK7PNOW!", 10) == 0){ - dprintk ("Found PSB header at %p\n", p); + if (memcmp(p, "AMDK7PNOW!", 10) == 0) { + dprintk("Found PSB header at %p\n", p); psb = (struct psb_s *) p; - dprintk ("Table version: 0x%x\n", psb->tableversion); + dprintk("Table version: 0x%x\n", psb->tableversion); if (psb->tableversion != 0x12) { - printk (KERN_INFO PFX "Sorry, only v1.2 tables supported right now\n"); + printk(KERN_INFO PFX "Sorry, only v1.2 tables" + " supported right now\n"); return -ENODEV; } - dprintk ("Flags: 0x%x\n", psb->flags); - if ((psb->flags & 1)==0) { - dprintk ("Mobile voltage regulator\n"); - } else { - dprintk ("Desktop voltage regulator\n"); - } + dprintk("Flags: 0x%x\n", psb->flags); + if ((psb->flags & 1) == 0) + dprintk("Mobile voltage regulator\n"); + else + dprintk("Desktop voltage regulator\n"); latency = psb->settlingtime; if (latency < 100) { - printk(KERN_INFO PFX "BIOS set settling time to %d microseconds. " - "Should be at least 100. Correcting.\n", latency); + printk(KERN_INFO PFX "BIOS set settling time " + "to %d microseconds. " + "Should be at least 100. " + "Correcting.\n", latency); latency = 100; } - dprintk ("Settling Time: %d microseconds.\n", psb->settlingtime); - dprintk ("Has %d PST tables. (Only dumping ones relevant to this CPU).\n", psb->numpst); + dprintk("Settling Time: %d microseconds.\n", + psb->settlingtime); + dprintk("Has %d PST tables. (Only dumping ones " + "relevant to this CPU).\n", + psb->numpst); - p += sizeof (struct psb_s); + p += sizeof(struct psb_s); pst = (struct pst_s *) p; - for (j=0; jnumpst; j++) { + for (j = 0; j < psb->numpst; j++) { pst = (struct pst_s *) p; number_scales = pst->numpstates; - if ((etuple == pst->cpuid) && check_fsb(pst->fsbspeed) && - (maxfid==pst->maxfid) && (startvid==pst->startvid)) - { - dprintk ("PST:%d (@%p)\n", j, pst); - dprintk (" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n", - pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid); - - ret = get_ranges ((char *) pst + sizeof (struct pst_s)); + if ((etuple == pst->cpuid) && + check_fsb(pst->fsbspeed) && + (maxfid == pst->maxfid) && + (startvid == pst->startvid)) { + print_pst_entry(pst, j); + p = (char *)pst + sizeof(struct pst_s); + ret = get_ranges(p); return ret; } else { unsigned int k; - p = (char *) pst + sizeof (struct pst_s); - for (k=0; kident); - printk(KERN_WARNING "You need to downgrade to 3A21 (09/09/2002), or try a newer BIOS than 3A71 (01/20/2003)\n"); - printk(KERN_WARNING "cpufreq scaling has been disabled as a result of this.\n"); + printk(KERN_WARNING PFX + "%s laptop with broken PST tables in BIOS detected.\n", + d->ident); + printk(KERN_WARNING PFX + "You need to downgrade to 3A21 (09/09/2002), or try a newer " + "BIOS than 3A71 (01/20/2003)\n"); + printk(KERN_WARNING PFX + "cpufreq scaling has been disabled as a result of this.\n"); return 0; } @@ -598,7 +631,7 @@ static struct dmi_system_id __initdata powernow_dmi_table[] = { { } }; -static int __init powernow_cpu_init (struct cpufreq_policy *policy) +static int __init powernow_cpu_init(struct cpufreq_policy *policy) { union msr_fidvidstatus fidvidstatus; int result; @@ -606,7 +639,7 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy) if (policy->cpu != 0) return -ENODEV; - rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); + rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val); recalibrate_cpu_khz(); @@ -618,19 +651,21 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy) dprintk("FSB: %3dMHz\n", fsb/1000); if (dmi_check_system(powernow_dmi_table) || acpi_force) { - printk (KERN_INFO PFX "PSB/PST known to be broken. Trying ACPI instead\n"); + printk(KERN_INFO PFX "PSB/PST known to be broken. " + "Trying ACPI instead\n"); result = powernow_acpi_init(); } else { - result = powernow_decode_bios(fidvidstatus.bits.MFID, fidvidstatus.bits.SVID); + result = powernow_decode_bios(fidvidstatus.bits.MFID, + fidvidstatus.bits.SVID); if (result) { - printk (KERN_INFO PFX "Trying ACPI perflib\n"); + printk(KERN_INFO PFX "Trying ACPI perflib\n"); maximum_speed = 0; minimum_speed = -1; latency = 0; result = powernow_acpi_init(); if (result) { - printk (KERN_INFO PFX "ACPI and legacy methods failed\n"); - printk (KERN_INFO PFX "See http://www.codemonkey.org.uk/projects/cpufreq/powernow-k7.html\n"); + printk(KERN_INFO PFX + "ACPI and legacy methods failed\n"); } } else { /* SGTC use the bus clock as timer */ @@ -642,10 +677,11 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy) if (result) return result; - printk (KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n", + printk(KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n", minimum_speed/1000, maximum_speed/1000); - policy->cpuinfo.transition_latency = cpufreq_scale(2000000UL, fsb, latency); + policy->cpuinfo.transition_latency = + cpufreq_scale(2000000UL, fsb, latency); policy->cur = powernow_get(0); @@ -654,7 +690,8 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy) return cpufreq_frequency_table_cpuinfo(policy, powernow_table); } -static int powernow_cpu_exit (struct cpufreq_policy *policy) { +static int powernow_cpu_exit(struct cpufreq_policy *policy) +{ cpufreq_frequency_table_put_attr(policy->cpu); #ifdef CONFIG_X86_POWERNOW_K7_ACPI @@ -669,7 +706,7 @@ static int powernow_cpu_exit (struct cpufreq_policy *policy) { return 0; } -static struct freq_attr* powernow_table_attr[] = { +static struct freq_attr *powernow_table_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, NULL, }; @@ -685,15 +722,15 @@ static struct cpufreq_driver powernow_driver = { .attr = powernow_table_attr, }; -static int __init powernow_init (void) +static int __init powernow_init(void) { - if (check_powernow()==0) + if (check_powernow() == 0) return -ENODEV; return cpufreq_register_driver(&powernow_driver); } -static void __exit powernow_exit (void) +static void __exit powernow_exit(void) { cpufreq_unregister_driver(&powernow_driver); } @@ -701,9 +738,9 @@ static void __exit powernow_exit (void) module_param(acpi_force, int, 0444); MODULE_PARM_DESC(acpi_force, "Force ACPI to be used."); -MODULE_AUTHOR ("Dave Jones "); -MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors."); -MODULE_LICENSE ("GPL"); +MODULE_AUTHOR("Dave Jones "); +MODULE_DESCRIPTION("Powernow driver for AMD K7 processors."); +MODULE_LICENSE("GPL"); late_initcall(powernow_init); module_exit(powernow_exit); -- cgit v1.2.3-70-g09d2 From 0e64a0c982c06a6b8f5e2a7f29eb108fdf257b2f Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 4 Feb 2009 14:37:50 -0500 Subject: [CPUFREQ] checkpatch cleanups for powernow-k8 This driver has so many long function names, and deep nested if's The remaining warnings will need some code restructuring to clean up. Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 351 +++++++++++++++++++----------- 1 file changed, 226 insertions(+), 125 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 6428aa17b40..4dd7e3bdee2 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -33,10 +33,10 @@ #include #include #include /* for current / set_cpus_allowed() */ +#include +#include #include -#include -#include #ifdef CONFIG_X86_POWERNOW_K8_ACPI #include @@ -71,7 +71,8 @@ static u32 find_khz_freq_from_fid(u32 fid) return 1000 * find_freq_from_fid(fid); } -static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 pstate) +static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, + u32 pstate) { return data[pstate].frequency; } @@ -186,7 +187,9 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid) return 1; } - lo = fid | (data->currvid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID; + lo = fid; + lo |= (data->currvid << MSR_C_LO_VID_SHIFT); + lo |= MSR_C_LO_INIT_FID_VID; dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n", fid, lo, data->plllock * PLL_LOCK_CONVERSION); @@ -194,7 +197,9 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid) do { wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION); if (i++ > 100) { - printk(KERN_ERR PFX "Hardware error - pending bit very stuck - no further pstate changes possible\n"); + printk(KERN_ERR PFX + "Hardware error - pending bit very stuck - " + "no further pstate changes possible\n"); return 1; } } while (query_current_values_with_pending_wait(data)); @@ -202,14 +207,16 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid) count_off_irt(data); if (savevid != data->currvid) { - printk(KERN_ERR PFX "vid change on fid trans, old 0x%x, new 0x%x\n", - savevid, data->currvid); + printk(KERN_ERR PFX + "vid change on fid trans, old 0x%x, new 0x%x\n", + savevid, data->currvid); return 1; } if (fid != data->currfid) { - printk(KERN_ERR PFX "fid trans failed, fid 0x%x, curr 0x%x\n", fid, - data->currfid); + printk(KERN_ERR PFX + "fid trans failed, fid 0x%x, curr 0x%x\n", fid, + data->currfid); return 1; } @@ -228,7 +235,9 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid) return 1; } - lo = data->currfid | (vid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID; + lo = data->currfid; + lo |= (vid << MSR_C_LO_VID_SHIFT); + lo |= MSR_C_LO_INIT_FID_VID; dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n", vid, lo, STOP_GRANT_5NS); @@ -236,20 +245,24 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid) do { wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS); if (i++ > 100) { - printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n"); + printk(KERN_ERR PFX "internal error - pending bit " + "very stuck - no further pstate " + "changes possible\n"); return 1; } } while (query_current_values_with_pending_wait(data)); if (savefid != data->currfid) { - printk(KERN_ERR PFX "fid changed on vid trans, old 0x%x new 0x%x\n", + printk(KERN_ERR PFX "fid changed on vid trans, old " + "0x%x new 0x%x\n", savefid, data->currfid); return 1; } if (vid != data->currvid) { - printk(KERN_ERR PFX "vid trans failed, vid 0x%x, curr 0x%x\n", vid, - data->currvid); + printk(KERN_ERR PFX "vid trans failed, vid 0x%x, " + "curr 0x%x\n", + vid, data->currvid); return 1; } @@ -261,7 +274,8 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid) * Decreasing vid codes represent increasing voltages: * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off. */ -static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid, u32 step) +static int decrease_vid_code_by_step(struct powernow_k8_data *data, + u32 reqvid, u32 step) { if ((data->currvid - reqvid) > step) reqvid = data->currvid - step; @@ -283,7 +297,8 @@ static int transition_pstate(struct powernow_k8_data *data, u32 pstate) } /* Change Opteron/Athlon64 fid and vid, by the 3 phases. */ -static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 reqvid) +static int transition_fid_vid(struct powernow_k8_data *data, + u32 reqfid, u32 reqvid) { if (core_voltage_pre_transition(data, reqvid)) return 1; @@ -298,7 +313,8 @@ static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 req return 1; if ((reqfid != data->currfid) || (reqvid != data->currvid)) { - printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, curr 0x%x 0x%x\n", + printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, " + "curr 0x%x 0x%x\n", smp_processor_id(), reqfid, reqvid, data->currfid, data->currvid); return 1; @@ -311,13 +327,15 @@ static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 req } /* Phase 1 - core voltage transition ... setup voltage */ -static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid) +static int core_voltage_pre_transition(struct powernow_k8_data *data, + u32 reqvid) { u32 rvosteps = data->rvo; u32 savefid = data->currfid; u32 maxvid, lo; - dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, reqvid 0x%x, rvo 0x%x\n", + dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, " + "reqvid 0x%x, rvo 0x%x\n", smp_processor_id(), data->currfid, data->currvid, reqvid, data->rvo); @@ -340,7 +358,7 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid } else { dprintk("ph1: changing vid for rvo, req 0x%x\n", data->currvid - 1); - if (decrease_vid_code_by_step(data, data->currvid - 1, 1)) + if (decrease_vid_code_by_step(data, data->currvid-1, 1)) return 1; rvosteps--; } @@ -350,7 +368,8 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid return 1; if (savefid != data->currfid) { - printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", data->currfid); + printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", + data->currfid); return 1; } @@ -363,20 +382,24 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid /* Phase 2 - core frequency transition */ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) { - u32 vcoreqfid, vcocurrfid, vcofiddiff, fid_interval, savevid = data->currvid; + u32 vcoreqfid, vcocurrfid, vcofiddiff; + u32 fid_interval, savevid = data->currvid; - if ((reqfid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) { - printk(KERN_ERR PFX "ph2: illegal lo-lo transition 0x%x 0x%x\n", - reqfid, data->currfid); + if ((reqfid < HI_FID_TABLE_BOTTOM) && + (data->currfid < HI_FID_TABLE_BOTTOM)) { + printk(KERN_ERR PFX "ph2: illegal lo-lo transition " + "0x%x 0x%x\n", reqfid, data->currfid); return 1; } if (data->currfid == reqfid) { - printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", data->currfid); + printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", + data->currfid); return 0; } - dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, reqfid 0x%x\n", + dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, " + "reqfid 0x%x\n", smp_processor_id(), data->currfid, data->currvid, reqfid); @@ -390,14 +413,14 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) if (reqfid > data->currfid) { if (data->currfid > LO_FID_TABLE_TOP) { - if (write_new_fid(data, data->currfid + fid_interval)) { + if (write_new_fid(data, + data->currfid + fid_interval)) return 1; - } } else { if (write_new_fid - (data, 2 + convert_fid_to_vco_fid(data->currfid))) { + (data, + 2 + convert_fid_to_vco_fid(data->currfid))) return 1; - } } } else { if (write_new_fid(data, data->currfid - fid_interval)) @@ -417,7 +440,8 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) if (data->currfid != reqfid) { printk(KERN_ERR PFX - "ph2: mismatch, failed fid transition, curr 0x%x, req 0x%x\n", + "ph2: mismatch, failed fid transition, " + "curr 0x%x, req 0x%x\n", data->currfid, reqfid); return 1; } @@ -435,7 +459,8 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) } /* Phase 3 - core voltage transition flow ... jump to the final vid. */ -static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid) +static int core_voltage_post_transition(struct powernow_k8_data *data, + u32 reqvid) { u32 savefid = data->currfid; u32 savereqvid = reqvid; @@ -457,7 +482,8 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvi if (data->currvid != reqvid) { printk(KERN_ERR PFX - "ph3: failed vid transition\n, req 0x%x, curr 0x%x", + "ph3: failed vid transition\n, " + "req 0x%x, curr 0x%x", reqvid, data->currvid); return 1; } @@ -508,7 +534,8 @@ static int check_supported_cpu(unsigned int cpu) if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) { - printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax); + printk(KERN_INFO PFX + "Processor cpuid %x not supported\n", eax); goto out; } @@ -520,8 +547,10 @@ static int check_supported_cpu(unsigned int cpu) } cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); - if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) { - printk(KERN_INFO PFX "Power state transitions not supported\n"); + if ((edx & P_STATE_TRANSITION_CAPABLE) + != P_STATE_TRANSITION_CAPABLE) { + printk(KERN_INFO PFX + "Power state transitions not supported\n"); goto out; } } else { /* must be a HW Pstate capable processor */ @@ -539,7 +568,8 @@ out: return rc; } -static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid) +static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, + u8 maxvid) { unsigned int j; u8 lastfid = 0xff; @@ -550,12 +580,14 @@ static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 j, pst[j].vid); return -EINVAL; } - if (pst[j].vid < data->rvo) { /* vid + rvo >= 0 */ + if (pst[j].vid < data->rvo) { + /* vid + rvo >= 0 */ printk(KERN_ERR FW_BUG PFX "0 vid exceeded with pstate" " %d\n", j); return -ENODEV; } - if (pst[j].vid < maxvid + data->rvo) { /* vid + rvo >= maxvid */ + if (pst[j].vid < maxvid + data->rvo) { + /* vid + rvo >= maxvid */ printk(KERN_ERR FW_BUG PFX "maxvid exceeded with pstate" " %d\n", j); return -ENODEV; @@ -579,23 +611,31 @@ static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 return -EINVAL; } if (lastfid > LO_FID_TABLE_TOP) - printk(KERN_INFO FW_BUG PFX "first fid not from lo freq table\n"); + printk(KERN_INFO FW_BUG PFX + "first fid not from lo freq table\n"); return 0; } +static void invalidate_entry(struct powernow_k8_data *data, unsigned int entry) +{ + data->powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; +} + static void print_basics(struct powernow_k8_data *data) { int j; for (j = 0; j < data->numps; j++) { - if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) { + if (data->powernow_table[j].frequency != + CPUFREQ_ENTRY_INVALID) { if (cpu_family == CPU_HW_PSTATE) { - printk(KERN_INFO PFX " %d : pstate %d (%d MHz)\n", - j, + printk(KERN_INFO PFX + " %d : pstate %d (%d MHz)\n", j, data->powernow_table[j].index, data->powernow_table[j].frequency/1000); } else { - printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x\n", + printk(KERN_INFO PFX + " %d : fid 0x%x (%d MHz), vid 0x%x\n", j, data->powernow_table[j].index & 0xff, data->powernow_table[j].frequency/1000, @@ -604,20 +644,25 @@ static void print_basics(struct powernow_k8_data *data) } } if (data->batps) - printk(KERN_INFO PFX "Only %d pstates on battery\n", data->batps); + printk(KERN_INFO PFX "Only %d pstates on battery\n", + data->batps); } -static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid) +static int fill_powernow_table(struct powernow_k8_data *data, + struct pst_s *pst, u8 maxvid) { struct cpufreq_frequency_table *powernow_table; unsigned int j; - if (data->batps) { /* use ACPI support to get full speed on mains power */ - printk(KERN_WARNING PFX "Only %d pstates usable (use ACPI driver for full range\n", data->batps); + if (data->batps) { + /* use ACPI support to get full speed on mains power */ + printk(KERN_WARNING PFX + "Only %d pstates usable (use ACPI driver for full " + "range\n", data->batps); data->numps = data->batps; } - for ( j=1; jnumps; j++ ) { + for (j = 1; j < data->numps; j++) { if (pst[j-1].fid >= pst[j].fid) { printk(KERN_ERR PFX "PST out of sequence\n"); return -EINVAL; @@ -640,9 +685,11 @@ static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, } for (j = 0; j < data->numps; j++) { + int freq; powernow_table[j].index = pst[j].fid; /* lower 8 bits */ powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */ - powernow_table[j].frequency = find_khz_freq_from_fid(pst[j].fid); + freq = find_khz_freq_from_fid(pst[j].fid); + powernow_table[j].frequency = freq; } powernow_table[data->numps].frequency = CPUFREQ_TABLE_END; powernow_table[data->numps].index = 0; @@ -658,7 +705,8 @@ static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, print_basics(data); for (j = 0; j < data->numps; j++) - if ((pst[j].fid==data->currfid) && (pst[j].vid==data->currvid)) + if ((pst[j].fid == data->currfid) && + (pst[j].vid == data->currvid)) return 0; dprintk("currfid/vid do not match PST, ignoring\n"); @@ -698,7 +746,8 @@ static int find_psb_table(struct powernow_k8_data *data) } data->vstable = psb->vstable; - dprintk("voltage stabilization time: %d(*20us)\n", data->vstable); + dprintk("voltage stabilization time: %d(*20us)\n", + data->vstable); dprintk("flags2: 0x%x\n", psb->flags2); data->rvo = psb->flags2 & 3; @@ -713,11 +762,12 @@ static int find_psb_table(struct powernow_k8_data *data) dprintk("numpst: 0x%x\n", psb->num_tables); cpst = psb->num_tables; - if ((psb->cpuid == 0x00000fc0) || (psb->cpuid == 0x00000fe0) ){ + if ((psb->cpuid == 0x00000fc0) || + (psb->cpuid == 0x00000fe0)) { thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); - if ((thiscpuid == 0x00000fc0) || (thiscpuid == 0x00000fe0) ) { + if ((thiscpuid == 0x00000fc0) || + (thiscpuid == 0x00000fe0)) cpst = 1; - } } if (cpst != 1) { printk(KERN_ERR FW_BUG PFX "numpst must be 1\n"); @@ -732,7 +782,8 @@ static int find_psb_table(struct powernow_k8_data *data) data->numps = psb->numps; dprintk("numpstates: 0x%x\n", data->numps); - return fill_powernow_table(data, (struct pst_s *)(psb+1), maxvid); + return fill_powernow_table(data, + (struct pst_s *)(psb+1), maxvid); } /* * If you see this message, complain to BIOS manufacturer. If @@ -750,23 +801,27 @@ static int find_psb_table(struct powernow_k8_data *data) } #ifdef CONFIG_X86_POWERNOW_K8_ACPI -static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) +static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, + unsigned int index) { + acpi_integer control; + if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) return; - data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK; - data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK; - data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; - data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; - data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK); - data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK; -} + control = data->acpi_data.states[index].control; data->irt = (control + >> IRT_SHIFT) & IRT_MASK; data->rvo = (control >> + RVO_SHIFT) & RVO_MASK; data->exttype = (control + >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; + data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK; data->vidmvs = 1 + << ((control >> MVS_SHIFT) & MVS_MASK); data->vstable = + (control >> VST_SHIFT) & VST_MASK; } static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { struct cpufreq_frequency_table *powernow_table; int ret_val = -ENODEV; + acpi_integer space_id; if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { dprintk("register performance failed: bad ACPI data\n"); @@ -779,11 +834,12 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) goto err_out; } - if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || - (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { + space_id = data->acpi_data.control_register.space_id; + if ((space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || + (space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { dprintk("Invalid control/status registers (%x - %x)\n", data->acpi_data.control_register.space_id, - data->acpi_data.status_register.space_id); + space_id); goto err_out; } @@ -802,7 +858,8 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) if (ret_val) goto err_out_mem; - powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END; + powernow_table[data->acpi_data.state_count].frequency = + CPUFREQ_TABLE_END; powernow_table[data->acpi_data.state_count].index = 0; data->powernow_table = powernow_table; @@ -830,13 +887,15 @@ err_out_mem: err_out: acpi_processor_unregister_performance(&data->acpi_data, data->cpu); - /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ + /* data->acpi_data.state_count informs us at ->exit() + * whether ACPI was used */ data->acpi_data.state_count = 0; return ret_val; } -static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) +static int fill_powernow_table_pstate(struct powernow_k8_data *data, + struct cpufreq_frequency_table *powernow_table) { int i; u32 hi = 0, lo = 0; @@ -848,84 +907,101 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf index = data->acpi_data.states[i].control & HW_PSTATE_MASK; if (index > data->max_hw_pstate) { - printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index); - printk(KERN_ERR PFX "Please report to BIOS manufacturer\n"); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + printk(KERN_ERR PFX "invalid pstate %d - " + "bad value %d.\n", i, index); + printk(KERN_ERR PFX "Please report to BIOS " + "manufacturer\n"); + invalidate_entry(data, i); continue; } rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi); if (!(hi & HW_PSTATE_VALID_MASK)) { dprintk("invalid pstate %d, ignoring\n", index); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + invalidate_entry(data, i); continue; } powernow_table[i].index = index; - powernow_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000; + powernow_table[i].frequency = + data->acpi_data.states[i].core_frequency * 1000; } return 0; } -static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) +static int fill_powernow_table_fidvid(struct powernow_k8_data *data, + struct cpufreq_frequency_table *powernow_table) { int i; int cntlofreq = 0; + for (i = 0; i < data->acpi_data.state_count; i++) { u32 fid; u32 vid; + u32 freq, index; + acpi_integer status, control; if (data->exttype) { - fid = data->acpi_data.states[i].status & EXT_FID_MASK; - vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK; + status = data->acpi_data.states[i].status; + fid = status & EXT_FID_MASK; + vid = (status >> VID_SHIFT) & EXT_VID_MASK; } else { - fid = data->acpi_data.states[i].control & FID_MASK; - vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK; + control = data->acpi_data.states[i].control; + fid = control & FID_MASK; + vid = (control >> VID_SHIFT) & VID_MASK; } dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); - powernow_table[i].index = fid; /* lower 8 bits */ - powernow_table[i].index |= (vid << 8); /* upper 8 bits */ - powernow_table[i].frequency = find_khz_freq_from_fid(fid); + index = fid | (vid<<8); + powernow_table[i].index = index; + + freq = find_khz_freq_from_fid(fid); + powernow_table[i].frequency = freq; /* verify frequency is OK */ - if ((powernow_table[i].frequency > (MAX_FREQ * 1000)) || - (powernow_table[i].frequency < (MIN_FREQ * 1000))) { - dprintk("invalid freq %u kHz, ignoring\n", powernow_table[i].frequency); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) { + dprintk("invalid freq %u kHz, ignoring\n", freq); + invalidate_entry(data, i); continue; } - /* verify voltage is OK - BIOSs are using "off" to indicate invalid */ + /* verify voltage is OK - + * BIOSs are using "off" to indicate invalid */ if (vid == VID_OFF) { dprintk("invalid vid %u, ignoring\n", vid); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + invalidate_entry(data, i); continue; } /* verify only 1 entry from the lo frequency table */ if (fid < HI_FID_TABLE_BOTTOM) { if (cntlofreq) { - /* if both entries are the same, ignore this one ... */ - if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) || - (powernow_table[i].index != powernow_table[cntlofreq].index)) { - printk(KERN_ERR PFX "Too many lo freq table entries\n"); + /* if both entries are the same, + * ignore this one ... */ + if ((freq != powernow_table[cntlofreq].frequency) || + (index != powernow_table[cntlofreq].index)) { + printk(KERN_ERR PFX + "Too many lo freq table " + "entries\n"); return 1; } - dprintk("double low frequency table entry, ignoring it.\n"); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + dprintk("double low frequency table entry, " + "ignoring it.\n"); + invalidate_entry(data, i); continue; } else cntlofreq = i; } - if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { - printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", - powernow_table[i].frequency, - (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + if (freq != (data->acpi_data.states[i].core_frequency * 1000)) { + printk(KERN_INFO PFX "invalid freq entries " + "%u kHz vs. %u kHz\n", freq, + (unsigned int) + (data->acpi_data.states[i].core_frequency + * 1000)); + invalidate_entry(data, i); continue; } } @@ -935,7 +1011,8 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { if (data->acpi_data.state_count) - acpi_processor_unregister_performance(&data->acpi_data, data->cpu); + acpi_processor_unregister_performance(&data->acpi_data, + data->cpu); free_cpumask_var(data->acpi_data.shared_cpu_map); } @@ -954,14 +1031,25 @@ static int get_transition_latency(struct powernow_k8_data *data) } #else -static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } -static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } -static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } +static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) +{ + return -ENODEV; +} +static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) +{ + return; +} +static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, + unsigned int index) +{ + return; +} static int get_transition_latency(struct powernow_k8_data *data) { return 0; } #endif /* CONFIG_X86_POWERNOW_K8_ACPI */ /* Take a frequency, and issue the fid/vid transition command */ -static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned int index) +static int transition_frequency_fidvid(struct powernow_k8_data *data, + unsigned int index) { u32 fid = 0; u32 vid = 0; @@ -989,7 +1077,8 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i return 0; } - if ((fid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) { + if ((fid < HI_FID_TABLE_BOTTOM) && + (data->currfid < HI_FID_TABLE_BOTTOM)) { printk(KERN_ERR PFX "ignoring illegal change in lo freq table-%x to 0x%x\n", data->currfid, fid); @@ -1017,7 +1106,8 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i } /* Take a frequency, and issue the hardware pstate transition command */ -static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned int index) +static int transition_frequency_pstate(struct powernow_k8_data *data, + unsigned int index) { u32 pstate = 0; int res, i; @@ -1029,7 +1119,8 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i pstate = index & HW_PSTATE_MASK; if (pstate > data->max_hw_pstate) return 0; - freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); + freqs.old = find_khz_freq_from_pstate(data->powernow_table, + data->currpstate); freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); for_each_cpu_mask_nr(i, *(data->available_cores)) { @@ -1048,7 +1139,8 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i } /* Driver entry point to switch to the target frequency */ -static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) +static int powernowk8_target(struct cpufreq_policy *pol, + unsigned targfreq, unsigned relation) { cpumask_t oldmask; struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); @@ -1087,14 +1179,18 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi dprintk("targ: curr fid 0x%x, vid 0x%x\n", data->currfid, data->currvid); - if ((checkvid != data->currvid) || (checkfid != data->currfid)) { + if ((checkvid != data->currvid) || + (checkfid != data->currfid)) { printk(KERN_INFO PFX - "error - out of sync, fix 0x%x 0x%x, vid 0x%x 0x%x\n", - checkfid, data->currfid, checkvid, data->currvid); + "error - out of sync, fix 0x%x 0x%x, " + "vid 0x%x 0x%x\n", + checkfid, data->currfid, + checkvid, data->currvid); } } - if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate)) + if (cpufreq_frequency_table_target(pol, data->powernow_table, + targfreq, relation, &newstate)) goto err_out; mutex_lock(&fidvid_mutex); @@ -1114,7 +1210,8 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi mutex_unlock(&fidvid_mutex); if (cpu_family == CPU_HW_PSTATE) - pol->cur = find_khz_freq_from_pstate(data->powernow_table, newstate); + pol->cur = find_khz_freq_from_pstate(data->powernow_table, + newstate); else pol->cur = find_khz_freq_from_fid(data->currfid); ret = 0; @@ -1164,10 +1261,11 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) */ if (num_online_cpus() != 1) { #ifndef CONFIG_ACPI_PROCESSOR - printk(KERN_ERR PFX "ACPI Processor support is required " - "for SMP systems but is absent. Please load the " - "ACPI Processor module before starting this " - "driver.\n"); + printk(KERN_ERR PFX + "ACPI Processor support is required for " + "SMP systems but is absent. Please load the " + "ACPI Processor module before starting this " + "driver.\n"); #else printk(KERN_ERR FW_BUG PFX "Your BIOS does not provide" " ACPI _PSS objects in a way that Linux " @@ -1228,7 +1326,8 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) data->available_cores = pol->cpus; if (cpu_family == CPU_HW_PSTATE) - pol->cur = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); + pol->cur = find_khz_freq_from_pstate(data->powernow_table, + data->currpstate); else pol->cur = find_khz_freq_from_fid(data->currfid); dprintk("policy current frequency %d kHz\n", pol->cur); @@ -1245,7 +1344,8 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); if (cpu_family == CPU_HW_PSTATE) - dprintk("cpu_init done, current pstate 0x%x\n", data->currpstate); + dprintk("cpu_init done, current pstate 0x%x\n", + data->currpstate); else dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n", data->currfid, data->currvid); @@ -1262,7 +1362,7 @@ err_out: return -ENODEV; } -static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol) +static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol) { struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); @@ -1279,7 +1379,7 @@ static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol) return 0; } -static unsigned int powernowk8_get (unsigned int cpu) +static unsigned int powernowk8_get(unsigned int cpu) { struct powernow_k8_data *data; cpumask_t oldmask = current->cpus_allowed; @@ -1315,7 +1415,7 @@ out: return khz; } -static struct freq_attr* powernow_k8_attr[] = { +static struct freq_attr *powernow_k8_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, NULL, }; @@ -1360,7 +1460,8 @@ static void __exit powernowk8_exit(void) cpufreq_unregister_driver(&cpufreq_amd64_driver); } -MODULE_AUTHOR("Paul Devriendt and Mark Langsdorf "); +MODULE_AUTHOR("Paul Devriendt and " + "Mark Langsdorf "); MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver."); MODULE_LICENSE("GPL"); -- cgit v1.2.3-70-g09d2 From 57f4fa699195b761cbea90db5e38b4bc15610c7c Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Wed, 4 Feb 2009 01:17:45 +0100 Subject: [CPUFREQ] powernow-k8: Always compile powernow-k8 driver with ACPI support powernow-k8 driver should always try to get cpufreq info from ACPI. Otherwise it will not be able to detect the transition latency correctly which results in ondemand governor taking a wrong sampling rate which will then result in sever performance loss. Let the user not shoot himself in the foot and always compile in ACPI support for powernow-k8. This also fixes a wrong message if ACPI_PROCESSOR is compiled as a module and #ifndef CONFIG_ACPI_PROCESSOR path is chosen. Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/Kconfig | 19 ++----------------- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 28 ---------------------------- arch/x86/kernel/cpu/cpufreq/powernow-k8.h | 5 +---- 3 files changed, 3 insertions(+), 49 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig index 65792c2cc46..52c83987547 100644 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig @@ -87,30 +87,15 @@ config X86_POWERNOW_K7_ACPI config X86_POWERNOW_K8 tristate "AMD Opteron/Athlon64 PowerNow!" select CPU_FREQ_TABLE + depends on ACPI && ACPI_PROCESSOR help - This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors. + This adds the CPUFreq driver for K8/K10 Opteron/Athlon64 processors. To compile this driver as a module, choose M here: the module will be called powernow-k8. For details, take a look at . - If in doubt, say N. - -config X86_POWERNOW_K8_ACPI - bool - prompt "ACPI Support" if X86_32 - depends on ACPI && X86_POWERNOW_K8 && ACPI_PROCESSOR - depends on !(X86_POWERNOW_K8 = y && ACPI_PROCESSOR = m) - default y - help - This provides access to the K8s Processor Performance States via ACPI. - This driver is probably required for CPUFreq to work with multi-socket and - SMP systems. It is not required on at least some single-socket yet - multi-core systems, even if SMP is enabled. - - It is safe to say Y here. - config X86_GX_SUSPMOD tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" depends on X86_32 && PCI diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 4dd7e3bdee2..acc06b03194 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -38,11 +38,9 @@ #include -#ifdef CONFIG_X86_POWERNOW_K8_ACPI #include #include #include -#endif #define PFX "powernow-k8: " #define VERSION "version 2.20.00" @@ -800,7 +798,6 @@ static int find_psb_table(struct powernow_k8_data *data) return -ENODEV; } -#ifdef CONFIG_X86_POWERNOW_K8_ACPI static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { @@ -1030,23 +1027,6 @@ static int get_transition_latency(struct powernow_k8_data *data) return 1000 * max_latency; } -#else -static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) -{ - return -ENODEV; -} -static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) -{ - return; -} -static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, - unsigned int index) -{ - return; -} -static int get_transition_latency(struct powernow_k8_data *data) { return 0; } -#endif /* CONFIG_X86_POWERNOW_K8_ACPI */ - /* Take a frequency, and issue the fid/vid transition command */ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned int index) @@ -1260,19 +1240,11 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) * an UP version, and is deprecated by AMD. */ if (num_online_cpus() != 1) { -#ifndef CONFIG_ACPI_PROCESSOR - printk(KERN_ERR PFX - "ACPI Processor support is required for " - "SMP systems but is absent. Please load the " - "ACPI Processor module before starting this " - "driver.\n"); -#else printk(KERN_ERR FW_BUG PFX "Your BIOS does not provide" " ACPI _PSS objects in a way that Linux " "understands. Please report this to the Linux " "ACPI maintainers and complain to your BIOS " "vendor.\n"); -#endif kfree(data); return -ENODEV; } diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index 8ecc75b6c7c..6c6698feade 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h @@ -45,11 +45,10 @@ struct powernow_k8_data { * frequency is in kHz */ struct cpufreq_frequency_table *powernow_table; -#ifdef CONFIG_X86_POWERNOW_K8_ACPI /* the acpi table needs to be kept. it's only available if ACPI was * used to determine valid frequency/vid/fid states */ struct acpi_processor_performance acpi_data; -#endif + /* we need to keep track of associated cores, but let cpufreq * handle hotplug events - so just point at cpufreq pol->cpus * structure */ @@ -222,10 +221,8 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid); static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index); -#ifdef CONFIG_X86_POWERNOW_K8_ACPI static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); -#endif #ifdef CONFIG_SMP static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) -- cgit v1.2.3-70-g09d2 From 79cc56af9fdbeaa91f50289b932d0959b41f9467 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Wed, 4 Feb 2009 11:56:11 +0100 Subject: [CPUFREQ] powernow-k8: Only print error message once, not per core. This is the typical message you get if you plug in a CPU which is newer than your BIOS. It's annoying seeing this message for each core. Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index acc06b03194..c44853fc827 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -794,7 +794,7 @@ static int find_psb_table(struct powernow_k8_data *data) * BIOS and Kernel Developer's Guide, which is available on * www.amd.com */ - printk(KERN_ERR PFX "BIOS error - no PSB or ACPI _PSS objects\n"); + printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n"); return -ENODEV; } @@ -1218,6 +1218,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) struct powernow_k8_data *data; cpumask_t oldmask; int rc; + static int print_once; if (!cpu_online(pol->cpu)) return -ENODEV; @@ -1240,11 +1241,19 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) * an UP version, and is deprecated by AMD. */ if (num_online_cpus() != 1) { - printk(KERN_ERR FW_BUG PFX "Your BIOS does not provide" - " ACPI _PSS objects in a way that Linux " - "understands. Please report this to the Linux " - "ACPI maintainers and complain to your BIOS " - "vendor.\n"); + /* + * Replace this one with print_once as soon as such a + * thing gets introduced + */ + if (!print_once) { + WARN_ONCE(1, KERN_ERR FW_BUG PFX "Your BIOS " + "does not provide ACPI _PSS objects " + "in a way that Linux understands. " + "Please report this to the Linux ACPI" + " maintainers and complain to your " + "BIOS vendor.\n"); + print_once++; + } kfree(data); return -ENODEV; } -- cgit v1.2.3-70-g09d2 From 3a58df35a64a1e0ac32c30ea629a513dec2fe711 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 17 Jan 2009 22:36:14 -0500 Subject: [CPUFREQ] checkpatch cleanups for acpi-cpufreq Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 36 +++++++++++++++--------------- 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 4b1c319d30c..3babe1f1e91 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -1,5 +1,5 @@ /* - * acpi-cpufreq.c - ACPI Processor P-States Driver ($Revision: 1.4 $) + * acpi-cpufreq.c - ACPI Processor P-States Driver * * Copyright (C) 2001, 2002 Andy Grover * Copyright (C) 2001, 2002 Paul Diefenbaugh @@ -36,16 +36,18 @@ #include #include +#include +#include +#include + #include -#include #include #include #include -#include -#include -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg) +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ + "acpi-cpufreq", msg) MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski"); MODULE_DESCRIPTION("ACPI Processor P-States Driver"); @@ -95,7 +97,7 @@ static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) perf = data->acpi_data; - for (i=0; istate_count; i++) { + for (i = 0; i < perf->state_count; i++) { if (value == perf->states[i].status) return data->freq_table[i].frequency; } @@ -110,7 +112,7 @@ static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data) msr &= INTEL_MSR_RANGE; perf = data->acpi_data; - for (i=0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { + for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { if (msr == perf->states[data->freq_table[i].index].status) return data->freq_table[i].frequency; } @@ -138,15 +140,13 @@ struct io_addr { u8 bit_width; }; -typedef union { - struct msr_addr msr; - struct io_addr io; -} drv_addr_union; - struct drv_cmd { unsigned int type; const struct cpumask *mask; - drv_addr_union addr; + union { + struct msr_addr msr; + struct io_addr io; + } addr; u32 val; }; @@ -369,7 +369,7 @@ static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq, unsigned int cur_freq; unsigned int i; - for (i=0; i<100; i++) { + for (i = 0; i < 100; i++) { cur_freq = extract_freq(get_cur_val(mask), data); if (cur_freq == freq) return 1; @@ -494,7 +494,7 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) unsigned long freq; unsigned long freqn = perf->states[0].core_frequency * 1000; - for (i=0; i<(perf->state_count-1); i++) { + for (i = 0; i < (perf->state_count-1); i++) { freq = freqn; freqn = perf->states[i+1].core_frequency * 1000; if ((2 * cpu_khz) > (freqn + freq)) { @@ -673,7 +673,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) /* detect transition latency */ policy->cpuinfo.transition_latency = 0; - for (i=0; istate_count; i++) { + for (i = 0; i < perf->state_count; i++) { if ((perf->states[i].transition_latency * 1000) > policy->cpuinfo.transition_latency) policy->cpuinfo.transition_latency = @@ -682,8 +682,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) data->max_freq = perf->states[0].core_frequency * 1000; /* table init */ - for (i=0; istate_count; i++) { - if (i>0 && perf->states[i].core_frequency >= + for (i = 0; i < perf->state_count; i++) { + if (i > 0 && perf->states[i].core_frequency >= data->freq_table[valid_states-1].frequency / 1000) continue; -- cgit v1.2.3-70-g09d2 From 91420220d278584693d11a800b78fdc20e8fe10e Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 4 Feb 2009 15:28:54 -0500 Subject: [CPUFREQ] Use swap() in longhaul.c Remove hand-coded implementation of swap() Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/longhaul.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index dc03622a83a..f1c51aea064 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -511,13 +511,10 @@ static int __init longhaul_get_ranges(void) } } if (min_i != j) { - unsigned int temp; - temp = longhaul_table[j].frequency; - longhaul_table[j].frequency = longhaul_table[min_i].frequency; - longhaul_table[min_i].frequency = temp; - temp = longhaul_table[j].index; - longhaul_table[j].index = longhaul_table[min_i].index; - longhaul_table[min_i].index = temp; + swap(longhaul_table[j].frequency, + longhaul_table[min_i].frequency); + swap(longhaul_table[j].index, + longhaul_table[min_i].index); } } -- cgit v1.2.3-70-g09d2 From de3ed81d746d7cfc22a0c645244ed9fa71e4a833 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Wed, 18 Feb 2009 18:28:22 +0000 Subject: [CPUFREQ] Change link order of x86 cpufreq modules Change the link order of the cpufreq modules to ensure that they're probed in the preferred order when statically linked in. Signed-off-by: Matthew Garrett Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile index 560f7760dae..509296df294 100644 --- a/arch/x86/kernel/cpu/cpufreq/Makefile +++ b/arch/x86/kernel/cpu/cpufreq/Makefile @@ -1,6 +1,11 @@ +# Link order matters. K8 is preferred to ACPI because of firmware bugs in early +# K8 systems. ACPI is preferred to all other hardware-specific drivers. +# speedstep-* is preferred over p4-clockmod. + +obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o +obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o -obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o obj-$(CONFIG_X86_LONGHAUL) += longhaul.o obj-$(CONFIG_X86_E_POWERSAVER) += e_powersaver.o obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o @@ -10,7 +15,6 @@ obj-$(CONFIG_X86_GX_SUSPMOD) += gx-suspmod.o obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o obj-$(CONFIG_X86_SPEEDSTEP_SMI) += speedstep-smi.o -obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o -- cgit v1.2.3-70-g09d2 From 0cb8bc256093e716d2a0a4a721f36c625a3f7634 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 18 Feb 2009 14:11:00 -0500 Subject: [CPUFREQ] powernow-k8: Use a common exit path. a0abd520fd69295f4a3735e29a9448a32e101d47 introduced a slew of extra kfree/return -ENODEV pairs. This replaces them all with gotos. Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index c44853fc827..a15ac94e0b9 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1254,21 +1254,18 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) "BIOS vendor.\n"); print_once++; } - kfree(data); - return -ENODEV; + goto err_out; } if (pol->cpu != 0) { printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " "CPU other than CPU0. Complain to your BIOS " "vendor.\n"); - kfree(data); - return -ENODEV; + goto err_out; } rc = find_psb_table(data); - if (rc) { - kfree(data); - return -ENODEV; - } + if (rc) + goto err_out; + /* Take a crude guess here. * That guess was in microseconds, so multiply with 1000 */ pol->cpuinfo.transition_latency = ( @@ -1283,16 +1280,16 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) if (smp_processor_id() != pol->cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); - goto err_out; + goto err_out_unmask; } if (pending_bit_stuck()) { printk(KERN_ERR PFX "failing init, change pending bit set\n"); - goto err_out; + goto err_out_unmask; } if (query_current_values_with_pending_wait(data)) - goto err_out; + goto err_out_unmask; if (cpu_family == CPU_OPTERON) fidvid_msr_init(); @@ -1335,10 +1332,11 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) return 0; -err_out: +err_out_unmask: set_cpus_allowed_ptr(current, &oldmask); powernow_k8_cpu_exit_acpi(data); +err_out: kfree(data); return -ENODEV; } -- cgit v1.2.3-70-g09d2 From 199785eac892a1fa1b71cc22bec58e8b156d9311 Mon Sep 17 00:00:00 2001 From: Matthias-Christian Ott Date: Fri, 20 Feb 2009 20:52:17 -0500 Subject: [CPUFREQ] p4-clockmod reports wrong frequency. http://bugzilla.kernel.org/show_bug.cgi?id=10968 [ Updated for current tree, and fixed compile failure when p4-clockmod was built modular -- davej] From: Matthias-Christian Ott Signed-off-by: Dominik Brodowski Signed-off-by: Dave Jones --- arch/x86/include/asm/timer.h | 2 +- arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | 7 ++++++ arch/x86/kernel/cpu/cpufreq/speedstep-lib.c | 34 +++++++++++++++++------------ arch/x86/kernel/tsc.c | 3 --- 4 files changed, 28 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 2bb6a835c45..4f5c2472485 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -11,8 +11,8 @@ unsigned long native_calibrate_tsc(void); #ifdef CONFIG_X86_32 extern int timer_ack; +#endif extern int recalibrate_cpu_khz(void); -#endif /* CONFIG_X86_32 */ extern int no_timer_check; diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 46a2a7a5314..1778402305e 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -31,6 +31,7 @@ #include #include +#include #include "speedstep-lib.h" @@ -224,6 +225,12 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) dprintk("has errata -- disabling low frequencies\n"); } + if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4D && + c->x86_model < 2) { + /* switch to maximum frequency and measure result */ + cpufreq_p4_setdc(policy->cpu, DC_DISABLE); + recalibrate_cpu_khz(); + } /* get max frequency */ stock_freq = cpufreq_p4_get_frequency(c); if (!stock_freq) diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index 55c696daa05..2e3c6862657 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c @@ -16,6 +16,7 @@ #include #include +#include #include "speedstep-lib.h" #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ @@ -178,6 +179,15 @@ static unsigned int pentium4_get_frequency(void) u32 msr_lo, msr_hi, mult; unsigned int fsb = 0; unsigned int ret; + u8 fsb_code; + + /* Pentium 4 Model 0 and 1 do not have the Core Clock Frequency + * to System Bus Frequency Ratio Field in the Processor Frequency + * Configuration Register of the MSR. Therefore the current + * frequency cannot be calculated and has to be measured. + */ + if (c->x86_model < 2) + return cpu_khz; rdmsr(0x2c, msr_lo, msr_hi); @@ -188,21 +198,17 @@ static unsigned int pentium4_get_frequency(void) * revision #12 in Table B-1: MSRs in the Pentium 4 and * Intel Xeon Processors, on page B-4 and B-5. */ - if (c->x86_model < 2) + fsb_code = (msr_lo >> 16) & 0x7; + switch (fsb_code) { + case 0: fsb = 100 * 1000; - else { - u8 fsb_code = (msr_lo >> 16) & 0x7; - switch (fsb_code) { - case 0: - fsb = 100 * 1000; - break; - case 1: - fsb = 13333 * 10; - break; - case 2: - fsb = 200 * 1000; - break; - } + break; + case 1: + fsb = 13333 * 10; + break; + case 2: + fsb = 200 * 1000; + break; } if (!fsb) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 599e5816863..5ad22f8f5f3 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -523,8 +523,6 @@ unsigned long native_calibrate_tsc(void) return tsc_pit_min; } -#ifdef CONFIG_X86_32 -/* Only called from the Powernow K7 cpu freq driver */ int recalibrate_cpu_khz(void) { #ifndef CONFIG_SMP @@ -546,7 +544,6 @@ int recalibrate_cpu_khz(void) EXPORT_SYMBOL(recalibrate_cpu_khz); -#endif /* CONFIG_X86_32 */ /* Accelerators for sched_clock() * convert from cycles(64bits) => nanoseconds (64bits) -- cgit v1.2.3-70-g09d2 From eb3092cee79e4efa5d3e9c81c7e6ca90318cebb8 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Sat, 21 Feb 2009 01:58:47 +0000 Subject: [CPUFREQ] Make cpufreq-nforce2 less obnoxious Not owning an nforce2 is a sign of good taste, not an error. Signed-off-by: Matthew Garrett Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c index 99262906838..733093d6043 100644 --- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c +++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c @@ -424,7 +424,7 @@ static int __init nforce2_init(void) /* detect chipset */ if (nforce2_detect_chipset()) { - printk(KERN_ERR PFX "No nForce2 chipset.\n"); + printk(KERN_INFO PFX "No nForce2 chipset.\n"); return -ENODEV; } -- cgit v1.2.3-70-g09d2 From 4ab0d47d0ab311eb181532c1ecb6d02905685071 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Tue, 24 Feb 2009 17:35:12 -0800 Subject: gpu/drm, x86, PAT: io_mapping_create_wc and resource_size_t io_mapping_create_wc should take a resource_size_t parameter in place of unsigned long. With unsigned long, there will be no way to map greater than 4GB address in i386/32 bit. On x86, greater than 4GB addresses cannot be mapped on i386 without PAE. Return error for such a case. Patch also adds a structure for io_mapping, that saves the base, size and type on HAVE_ATOMIC_IOMAP archs, that can be used to verify the offset on io_mapping_map calls. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Cc: Dave Airlie Cc: Jesse Barnes Cc: Eric Anholt Cc: Keith Packard Signed-off-by: Ingo Molnar --- arch/x86/include/asm/iomap.h | 3 +++ arch/x86/mm/iomap_32.c | 18 +++++++++++++++++ include/linux/io-mapping.h | 46 +++++++++++++++++++++++++++++++++----------- 3 files changed, 56 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h index c1f06289b14..86af26091d6 100644 --- a/arch/x86/include/asm/iomap.h +++ b/arch/x86/include/asm/iomap.h @@ -23,6 +23,9 @@ #include #include +int +is_io_mapping_possible(resource_size_t base, unsigned long size); + void * iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index ca53224fc56..6c2b1af1692 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -20,6 +20,24 @@ #include #include +#ifdef CONFIG_X86_PAE +int +is_io_mapping_possible(resource_size_t base, unsigned long size) +{ + return 1; +} +#else +int +is_io_mapping_possible(resource_size_t base, unsigned long size) +{ + /* There is no way to map greater than 1 << 32 address without PAE */ + if (base + size > 0x100000000ULL) + return 0; + + return 1; +} +#endif + /* Map 'pfn' using fixed map 'type' and protections 'prot' */ void * diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 82df31726a5..cbc2f0cd631 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -30,11 +30,14 @@ * See Documentation/io_mapping.txt */ -/* this struct isn't actually defined anywhere */ -struct io_mapping; - #ifdef CONFIG_HAVE_ATOMIC_IOMAP +struct io_mapping { + resource_size_t base; + unsigned long size; + pgprot_t prot; +}; + /* * For small address space machines, mapping large objects * into the kernel virtual space isn't practical. Where @@ -43,23 +46,40 @@ struct io_mapping; */ static inline struct io_mapping * -io_mapping_create_wc(unsigned long base, unsigned long size) +io_mapping_create_wc(resource_size_t base, unsigned long size) { - return (struct io_mapping *) base; + struct io_mapping *iomap; + + if (!is_io_mapping_possible(base, size)) + return NULL; + + iomap = kmalloc(sizeof(*iomap), GFP_KERNEL); + if (!iomap) + return NULL; + + iomap->base = base; + iomap->size = size; + iomap->prot = pgprot_writecombine(__pgprot(__PAGE_KERNEL)); + return iomap; } static inline void io_mapping_free(struct io_mapping *mapping) { + kfree(mapping); } /* Atomic map/unmap */ static inline void * io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset) { - offset += (unsigned long) mapping; - return iomap_atomic_prot_pfn(offset >> PAGE_SHIFT, KM_USER0, - __pgprot(__PAGE_KERNEL_WC)); + resource_size_t phys_addr; + unsigned long pfn; + + BUG_ON(offset >= mapping->size); + phys_addr = mapping->base + offset; + pfn = (unsigned long) (phys_addr >> PAGE_SHIFT); + return iomap_atomic_prot_pfn(pfn, KM_USER0, mapping->prot); } static inline void @@ -71,8 +91,9 @@ io_mapping_unmap_atomic(void *vaddr) static inline void * io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) { - offset += (unsigned long) mapping; - return ioremap_wc(offset, PAGE_SIZE); + BUG_ON(offset >= mapping->size); + resource_size_t phys_addr = mapping->base + offset; + return ioremap_wc(phys_addr, PAGE_SIZE); } static inline void @@ -83,9 +104,12 @@ io_mapping_unmap(void *vaddr) #else +/* this struct isn't actually defined anywhere */ +struct io_mapping; + /* Create the io_mapping object*/ static inline struct io_mapping * -io_mapping_create_wc(unsigned long base, unsigned long size) +io_mapping_create_wc(resource_size_t base, unsigned long size) { return (struct io_mapping *) ioremap_wc(base, size); } -- cgit v1.2.3-70-g09d2 From 55d8085671863fe4ee6a17b7814bd38180a44e1d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 25 Feb 2009 09:42:25 -0800 Subject: xen: disable interrupts early, as start_kernel expects This avoids a lockdep warning from: if (DEBUG_LOCKS_WARN_ON(unlikely(!early_boot_irqs_enabled))) return; in trace_hardirqs_on_caller(); Signed-off-by: Jeremy Fitzhardinge Cc: Mark McLoughlin Cc: Xen-devel Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bea215230b2..b58e9633814 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1672,6 +1672,9 @@ asmlinkage void __init xen_start_kernel(void) possible map and a non-dummy shared_info. */ per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; + local_irq_disable(); + early_boot_irqs_off(); + xen_raw_console_write("mapping kernel into physical memory\n"); pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); -- cgit v1.2.3-70-g09d2 From f6be37fdc62d0c0214bc49815d1180ebfbd716e2 Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Thu, 26 Feb 2009 12:57:56 -0500 Subject: x86: enable DMAR by default Now that the obvious bugs have been worked out, specifically the iwlagn issue, and the write buffer errata, DMAR should be safe to turn back on by default. (We've had it on since those patches were first written a few weeks ago, without any noticeable bug reports (most have been due to the dma-api debug patchset.)) Signed-off-by: Kyle McMartin Acked-by: David Woodhouse Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9c39095b33f..bc2fbadff9f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1803,7 +1803,7 @@ config DMAR remapping devices. config DMAR_DEFAULT_ON - def_bool n + def_bool y prompt "Enable DMA Remapping Devices by default" depends on DMAR help -- cgit v1.2.3-70-g09d2 From 83ce400928680a6c8123d492684b27857f5a2d95 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 26 Feb 2009 20:16:58 +0100 Subject: x86: set X86_FEATURE_TSC_RELIABLE If the TSC is constant and non-stop, also set it reliable. (We will turn this off in DMI quirks for multi-chassis systems) The performance number on a 16-way Nehalem system running 32 tasks that context-switch between each other is significant: sched_clock_stable=0 sched_clock_stable=1 .................... .................... 22.456925 million/sec 24.306972 million/sec [+8.2%] lmbench's "lat_ctx -s 0 2" goes from 0.63 microseconds to 0.59 microseconds - a 6.7% increase in context-switching performance. Perfstat of 1 million pipe context switches between two tasks: Performance counter stats for './pipe-test-1m': [before] [after] ............ ............ 37621.421089 36436.848378 task clock ticks (msecs) 0 0 CPU migrations (events) 2000274 2000189 context switches (events) 194 193 pagefaults (events) 8433799643 8171016416 CPU cycles (events) -3.21% 8370133368 8180999694 instructions (events) -2.31% 4158565 3895941 cache references (events) -6.74% 44312 46264 cache misses (events) 2349.287976 2279.362465 wall-time (msecs) -3.06% The speedup comes straight from the reduction in the instruction count. sched_clock_cpu() got simpler and the whole workload thus executes faster. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 24ff26a38ad..5fff00c70de 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -56,11 +57,16 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) /* * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate - * with P/T states and does not stop in deep C-states + * with P/T states and does not stop in deep C-states. + * + * It is also reliable across cores and sockets. (but not across + * cabinets - we turn it off in that case explicitly.) */ if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); + set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); + sched_clock_stable = 1; } } -- cgit v1.2.3-70-g09d2 From 92b9af9e4f144535c65aee673cfad309f25fa465 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 28 Feb 2009 14:09:27 +0100 Subject: x86: i915 needs pgprot_writecombine() and is_io_mapping_possible() Impact: build fix Theodore Ts reported that the i915 driver needs these symbols: ERROR: "pgprot_writecombine" [drivers/gpu/drm/i915/i915.ko] undefined! ERROR: "is_io_mapping_possible" [drivers/gpu/drm/i915/i915.ko] undefined! Reported-by: Theodore Ts'o wrote: Signed-off-by: Ingo Molnar --- arch/x86/mm/iomap_32.c | 15 ++++----------- arch/x86/mm/pat.c | 2 ++ 2 files changed, 6 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 6c2b1af1692..04102d42ff4 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -20,23 +20,16 @@ #include #include -#ifdef CONFIG_X86_PAE -int -is_io_mapping_possible(resource_size_t base, unsigned long size) -{ - return 1; -} -#else -int -is_io_mapping_possible(resource_size_t base, unsigned long size) +int is_io_mapping_possible(resource_size_t base, unsigned long size) { +#ifndef CONFIG_X86_PAE /* There is no way to map greater than 1 << 32 address without PAE */ if (base + size > 0x100000000ULL) return 0; - +#endif return 1; } -#endif +EXPORT_SYMBOL_GPL(is_io_mapping_possible); /* Map 'pfn' using fixed map 'type' and protections 'prot' */ diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index aebbf67a79d..e0ab173b697 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -868,6 +869,7 @@ pgprot_t pgprot_writecombine(pgprot_t prot) else return pgprot_noncached(prot); } +EXPORT_SYMBOL_GPL(pgprot_writecombine); #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) -- cgit v1.2.3-70-g09d2 From fab852aaf761a00cfe16330429b7cac15cceaeb9 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Sun, 1 Mar 2009 16:09:14 +0200 Subject: x86: count errors in testmmiotrace.ko Check the read values against the written values in the MMIO read/write test. This test shows if the given MMIO test area really works as memory, which is a prerequisite for a successful mmiotrace test. Signed-off-by: Pekka Paalanen Cc: Stuart Bennett Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/mm/testmmiotrace.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index ab50a8d7402..4b29f3b0ee0 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c @@ -1,5 +1,5 @@ /* - * Written by Pekka Paalanen, 2008 + * Written by Pekka Paalanen, 2008-2009 */ #include #include @@ -11,28 +11,51 @@ static unsigned long mmio_address; module_param(mmio_address, ulong, 0); MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB."); +static unsigned v16(unsigned i) +{ + return i * 12 + 7; +} + +static unsigned v32(unsigned i) +{ + return i * 212371 + 13; +} + static void do_write_test(void __iomem *p) { unsigned int i; mmiotrace_printk("Write test.\n"); + for (i = 0; i < 256; i++) iowrite8(i, p + i); + for (i = 1024; i < (5 * 1024); i += 2) - iowrite16(i * 12 + 7, p + i); + iowrite16(v16(i), p + i); + for (i = (5 * 1024); i < (16 * 1024); i += 4) - iowrite32(i * 212371 + 13, p + i); + iowrite32(v32(i), p + i); } static void do_read_test(void __iomem *p) { unsigned int i; + unsigned errs[3] = { 0 }; mmiotrace_printk("Read test.\n"); + for (i = 0; i < 256; i++) - ioread8(p + i); + if (ioread8(p + i) != i) + ++errs[0]; + for (i = 1024; i < (5 * 1024); i += 2) - ioread16(p + i); + if (ioread16(p + i) != v16(i)) + ++errs[1]; + for (i = (5 * 1024); i < (16 * 1024); i += 4) - ioread32(p + i); + if (ioread32(p + i) != v32(i)) + ++errs[2]; + + mmiotrace_printk("Read errors: 8-bit %d, 16-bit %d, 32-bit %d.\n", + errs[0], errs[1], errs[2]); } static void do_test(void) -- cgit v1.2.3-70-g09d2 From 5ff93697fcfe1e2b9e61db82961d8f50d1ad5d57 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Sun, 1 Mar 2009 16:10:08 +0200 Subject: x86: add far read test to testmmiotrace Apparently pages far into an ioremapped region might not actually be mapped during ioremap(). Add an optional read test to try to trigger a multiply faulting MMIO access. Also add more messages to the kernel log to help debugging. This patch is based on a patch suggested by Stuart Bennett who discovered bugs in mmiotrace related to normal kernel space faults. Signed-off-by: Pekka Paalanen Cc: Stuart Bennett Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/mm/testmmiotrace.c | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index 4b29f3b0ee0..427fd1b56df 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c @@ -9,7 +9,13 @@ static unsigned long mmio_address; module_param(mmio_address, ulong, 0); -MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB."); +MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB " + "(or 8 MB if read_far is non-zero)."); + +static unsigned long read_far = 0x400100; +module_param(read_far, ulong, 0); +MODULE_PARM_DESC(read_far, " Offset of a 32-bit read within 8 MB " + "(default: 0x400100)."); static unsigned v16(unsigned i) { @@ -24,6 +30,7 @@ static unsigned v32(unsigned i) static void do_write_test(void __iomem *p) { unsigned int i; + pr_info(MODULE_NAME ": write test.\n"); mmiotrace_printk("Write test.\n"); for (i = 0; i < 256; i++) @@ -40,6 +47,7 @@ static void do_read_test(void __iomem *p) { unsigned int i; unsigned errs[3] = { 0 }; + pr_info(MODULE_NAME ": read test.\n"); mmiotrace_printk("Read test.\n"); for (i = 0; i < 256; i++) @@ -58,9 +66,17 @@ static void do_read_test(void __iomem *p) errs[0], errs[1], errs[2]); } -static void do_test(void) +static void do_read_far_test(void __iomem *p) +{ + pr_info(MODULE_NAME ": read far test.\n"); + mmiotrace_printk("Read far test.\n"); + + ioread32(p + read_far); +} + +static void do_test(unsigned long size) { - void __iomem *p = ioremap_nocache(mmio_address, 0x4000); + void __iomem *p = ioremap_nocache(mmio_address, size); if (!p) { pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); return; @@ -68,11 +84,15 @@ static void do_test(void) mmiotrace_printk("ioremap returned %p.\n", p); do_write_test(p); do_read_test(p); + if (read_far && read_far < size - 4) + do_read_far_test(p); iounmap(p); } static int __init init(void) { + unsigned long size = (read_far) ? (8 << 20) : (16 << 10); + if (mmio_address == 0) { pr_err(MODULE_NAME ": you have to use the module argument " "mmio_address.\n"); @@ -81,10 +101,11 @@ static int __init init(void) return -ENXIO; } - pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx " - "in PCI address space, and writing " - "rubbish in there.\n", mmio_address); - do_test(); + pr_warning(MODULE_NAME ": WARNING: mapping %lu kB @ 0x%08lx in PCI " + "address space, and writing 16 kB of rubbish in there.\n", + size >> 10, mmio_address); + do_test(size); + pr_info(MODULE_NAME ": All done.\n"); return 0; } -- cgit v1.2.3-70-g09d2 From e9d54cae8f03e7f963a12f44bd50d68f49b9ea36 Mon Sep 17 00:00:00 2001 From: Stuart Bennett Date: Fri, 30 Jan 2009 17:38:59 +0000 Subject: x86 mmiotrace: WARN_ONCE if dis/arming a page fails Print a full warning once, if arming or disarming a page fails. Also, if initial arming fails, do not handle the page further. This avoids the possibility of a page failing to arm and then later claiming to have handled any fault on that page. WARN_ONCE added by Pekka Paalanen. Signed-off-by: Stuart Bennett Signed-off-by: Pekka Paalanen Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 93d82038af4..fb1f11546fc 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -105,7 +105,7 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) return NULL; } -static void set_page_present(unsigned long addr, bool present, +static int set_page_present(unsigned long addr, bool present, unsigned int *pglevel) { pteval_t pteval; @@ -116,7 +116,7 @@ static void set_page_present(unsigned long addr, bool present, if (!pte) { pr_err("kmmio: no pte for page 0x%08lx\n", addr); - return; + return -1; } if (pglevel) @@ -140,22 +140,27 @@ static void set_page_present(unsigned long addr, bool present, default: pr_err("kmmio: unexpected page level 0x%x.\n", level); - return; + return -1; } __flush_tlb_one(addr); + + return 0; } /** Mark the given page as not present. Access to it will trigger a fault. */ -static void arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) +static int arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) { - set_page_present(page & PAGE_MASK, false, pglevel); + int ret = set_page_present(page & PAGE_MASK, false, pglevel); + WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", page); + return ret; } /** Mark the given page as present. */ static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) { - set_page_present(page & PAGE_MASK, true, pglevel); + int ret = set_page_present(page & PAGE_MASK, true, pglevel); + WARN_ONCE(ret < 0, KERN_ERR "kmmio disarming 0x%08lx failed.\n", page); } /* @@ -326,9 +331,13 @@ static int add_kmmio_fault_page(unsigned long page) f->count = 1; f->page = page; - list_add_rcu(&f->list, kmmio_page_list(f->page)); - arm_kmmio_fault_page(f->page, NULL); + if (arm_kmmio_fault_page(f->page, NULL)) { + kfree(f); + return -1; + } + + list_add_rcu(&f->list, kmmio_page_list(f->page)); return 0; } -- cgit v1.2.3-70-g09d2 From 5359b585fb5edb3db34d6cd491e1475b098c61d3 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Sun, 1 Mar 2009 16:11:58 +0200 Subject: x86 mmiotrace: fix save/restore page table state From baa99e2b32449ec7bf147c234adfa444caecac8a Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Sun, 22 Feb 2009 20:02:43 +0200 Blindly setting _PAGE_PRESENT in disarm_kmmio_fault_page() overlooks the possibility, that the page was not present when it was armed. Make arm_kmmio_fault_page() store the previous page presence in struct kmmio_fault_page and use it on disarm. This patch was originally written by Stuart Bennett, but Pekka Paalanen rewrote it a little different. Signed-off-by: Pekka Paalanen Cc: Stuart Bennett Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 66 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index fb1f11546fc..be361eb828c 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -32,6 +32,8 @@ struct kmmio_fault_page { struct list_head list; struct kmmio_fault_page *release_next; unsigned long page; /* location of the fault page */ + bool old_presence; /* page presence prior to arming */ + bool armed; /* * Number of times this page has been registered as a part @@ -105,8 +107,7 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) return NULL; } -static int set_page_present(unsigned long addr, bool present, - unsigned int *pglevel) +static int set_page_presence(unsigned long addr, bool present, bool *old) { pteval_t pteval; pmdval_t pmdval; @@ -119,20 +120,21 @@ static int set_page_present(unsigned long addr, bool present, return -1; } - if (pglevel) - *pglevel = level; - switch (level) { case PG_LEVEL_2M: pmd = (pmd_t *)pte; - pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT; + pmdval = pmd_val(*pmd); + *old = !!(pmdval & _PAGE_PRESENT); + pmdval &= ~_PAGE_PRESENT; if (present) pmdval |= _PAGE_PRESENT; set_pmd(pmd, __pmd(pmdval)); break; case PG_LEVEL_4K: - pteval = pte_val(*pte) & ~_PAGE_PRESENT; + pteval = pte_val(*pte); + *old = !!(pteval & _PAGE_PRESENT); + pteval &= ~_PAGE_PRESENT; if (present) pteval |= _PAGE_PRESENT; set_pte_atomic(pte, __pte(pteval)); @@ -148,19 +150,39 @@ static int set_page_present(unsigned long addr, bool present, return 0; } -/** Mark the given page as not present. Access to it will trigger a fault. */ -static int arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) +/* + * Mark the given page as not present. Access to it will trigger a fault. + * + * Struct kmmio_fault_page is protected by RCU and kmmio_lock, but the + * protection is ignored here. RCU read lock is assumed held, so the struct + * will not disappear unexpectedly. Furthermore, the caller must guarantee, + * that double arming the same virtual address (page) cannot occur. + * + * Double disarming on the other hand is allowed, and may occur when a fault + * and mmiotrace shutdown happen simultaneously. + */ +static int arm_kmmio_fault_page(struct kmmio_fault_page *f) { - int ret = set_page_present(page & PAGE_MASK, false, pglevel); - WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", page); + int ret; + WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n"); + if (f->armed) { + pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n", + f->page, f->count, f->old_presence); + } + ret = set_page_presence(f->page, false, &f->old_presence); + WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page); + f->armed = true; return ret; } -/** Mark the given page as present. */ -static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) +/** Restore the given page to saved presence state. */ +static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) { - int ret = set_page_present(page & PAGE_MASK, true, pglevel); - WARN_ONCE(ret < 0, KERN_ERR "kmmio disarming 0x%08lx failed.\n", page); + bool tmp; + int ret = set_page_presence(f->page, f->old_presence, &tmp); + WARN_ONCE(ret < 0, + KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); + f->armed = false; } /* @@ -207,7 +229,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) ctx = &get_cpu_var(kmmio_ctx); if (ctx->active) { - disarm_kmmio_fault_page(faultpage->page, NULL); + disarm_kmmio_fault_page(faultpage); if (addr == ctx->addr) { /* * On SMP we sometimes get recursive probe hits on the @@ -249,7 +271,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) regs->flags &= ~X86_EFLAGS_IF; /* Now we set present bit in PTE and single step. */ - disarm_kmmio_fault_page(ctx->fpage->page, NULL); + disarm_kmmio_fault_page(ctx->fpage); /* * If another cpu accesses the same page while we are stepping, @@ -288,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) if (ctx->probe && ctx->probe->post_handler) ctx->probe->post_handler(ctx->probe, condition, regs); - arm_kmmio_fault_page(ctx->fpage->page, NULL); + arm_kmmio_fault_page(ctx->fpage); regs->flags &= ~X86_EFLAGS_TF; regs->flags |= ctx->saved_flags; @@ -320,19 +342,19 @@ static int add_kmmio_fault_page(unsigned long page) f = get_kmmio_fault_page(page); if (f) { if (!f->count) - arm_kmmio_fault_page(f->page, NULL); + arm_kmmio_fault_page(f); f->count++; return 0; } - f = kmalloc(sizeof(*f), GFP_ATOMIC); + f = kzalloc(sizeof(*f), GFP_ATOMIC); if (!f) return -1; f->count = 1; f->page = page; - if (arm_kmmio_fault_page(f->page, NULL)) { + if (arm_kmmio_fault_page(f)) { kfree(f); return -1; } @@ -356,7 +378,7 @@ static void release_kmmio_fault_page(unsigned long page, f->count--; BUG_ON(f->count < 0); if (!f->count) { - disarm_kmmio_fault_page(f->page, NULL); + disarm_kmmio_fault_page(f); f->release_next = *release_list; *release_list = f; } -- cgit v1.2.3-70-g09d2 From 0b700a6a253b6a3b3059bb9a9247a73490ee33fb Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Sun, 1 Mar 2009 16:12:48 +0200 Subject: x86 mmiotrace: split set_page_presence() From 36772dcb6ffbbb68254cbfc379a103acd2fbfefc Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Sat, 28 Feb 2009 21:34:59 +0200 Split set_page_presence() in kmmio.c into two more functions set_pmd_presence() and set_pte_presence(). Purely code reorganization, no functional changes. Signed-off-by: Pekka Paalanen Cc: Stuart Bennett Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index be361eb828c..d29777520af 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -107,12 +107,29 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) return NULL; } +static void set_pmd_presence(pmd_t *pmd, bool present, bool *old) +{ + pmdval_t v = pmd_val(*pmd); + *old = !!(v & _PAGE_PRESENT); + v &= ~_PAGE_PRESENT; + if (present) + v |= _PAGE_PRESENT; + set_pmd(pmd, __pmd(v)); +} + +static void set_pte_presence(pte_t *pte, bool present, bool *old) +{ + pteval_t v = pte_val(*pte); + *old = !!(v & _PAGE_PRESENT); + v &= ~_PAGE_PRESENT; + if (present) + v |= _PAGE_PRESENT; + set_pte_atomic(pte, __pte(v)); +} + static int set_page_presence(unsigned long addr, bool present, bool *old) { - pteval_t pteval; - pmdval_t pmdval; unsigned int level; - pmd_t *pmd; pte_t *pte = lookup_address(addr, &level); if (!pte) { @@ -122,31 +139,17 @@ static int set_page_presence(unsigned long addr, bool present, bool *old) switch (level) { case PG_LEVEL_2M: - pmd = (pmd_t *)pte; - pmdval = pmd_val(*pmd); - *old = !!(pmdval & _PAGE_PRESENT); - pmdval &= ~_PAGE_PRESENT; - if (present) - pmdval |= _PAGE_PRESENT; - set_pmd(pmd, __pmd(pmdval)); + set_pmd_presence((pmd_t *)pte, present, old); break; - case PG_LEVEL_4K: - pteval = pte_val(*pte); - *old = !!(pteval & _PAGE_PRESENT); - pteval &= ~_PAGE_PRESENT; - if (present) - pteval |= _PAGE_PRESENT; - set_pte_atomic(pte, __pte(pteval)); + set_pte_presence(pte, present, old); break; - default: pr_err("kmmio: unexpected page level 0x%x.\n", level); return -1; } __flush_tlb_one(addr); - return 0; } -- cgit v1.2.3-70-g09d2 From 3e39aa156a24ce386da378784edd0f748c770087 Mon Sep 17 00:00:00 2001 From: Stuart Bennett Date: Thu, 5 Feb 2009 11:02:02 +0000 Subject: x86 mmiotrace: improve handling of secondary faults Upgrade some kmmio.c debug messages to warnings. Allow secondary faults on probed pages to fall through, and only log secondary faults that are not due to non-present pages. Patch edited by Pekka Paalanen. Signed-off-by: Stuart Bennett Signed-off-by: Pekka Paalanen Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index d29777520af..4c66bd3a240 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -232,28 +232,32 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) ctx = &get_cpu_var(kmmio_ctx); if (ctx->active) { - disarm_kmmio_fault_page(faultpage); if (addr == ctx->addr) { /* - * On SMP we sometimes get recursive probe hits on the - * same address. Context is already saved, fall out. + * A second fault on the same page means some other + * condition needs handling by do_page_fault(), the + * page really not being present is the most common. */ - pr_debug("kmmio: duplicate probe hit on CPU %d, for " - "address 0x%08lx.\n", - smp_processor_id(), addr); - ret = 1; - goto no_kmmio_ctx; - } - /* - * Prevent overwriting already in-flight context. - * This should not happen, let's hope disarming at least - * prevents a panic. - */ - pr_emerg("kmmio: recursive probe hit on CPU %d, " + pr_debug("kmmio: secondary hit for 0x%08lx CPU %d.\n", + addr, smp_processor_id()); + + if (!faultpage->old_presence) + pr_info("kmmio: unexpected secondary hit for " + "address 0x%08lx on CPU %d.\n", addr, + smp_processor_id()); + } else { + /* + * Prevent overwriting already in-flight context. + * This should not happen, let's hope disarming at + * least prevents a panic. + */ + pr_emerg("kmmio: recursive probe hit on CPU %d, " "for address 0x%08lx. Ignoring.\n", smp_processor_id(), addr); - pr_emerg("kmmio: previous hit was at 0x%08lx.\n", - ctx->addr); + pr_emerg("kmmio: previous hit was at 0x%08lx.\n", + ctx->addr); + disarm_kmmio_fault_page(faultpage); + } goto no_kmmio_ctx; } ctx->active++; @@ -305,7 +309,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); if (!ctx->active) { - pr_debug("kmmio: spurious debug trap on CPU %d.\n", + pr_warning("kmmio: spurious debug trap on CPU %d.\n", smp_processor_id()); goto out; } -- cgit v1.2.3-70-g09d2 From 340430c572f7b2b275d39965e88bafa71693cb23 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Tue, 24 Feb 2009 21:44:15 +0200 Subject: x86 mmiotrace: fix race with release_kmmio_fault_page() There was a theoretical possibility to a race between arming a page in post_kmmio_handler() and disarming the page in release_kmmio_fault_page(): cpu0 cpu1 ------------------------------------------------------------------ mmiotrace shutdown enter release_kmmio_fault_page fault on the page disarm the page disarm the page handle the MMIO access re-arm the page put the page on release list remove_kmmio_fault_pages() fault on the page page not known to mmiotrace fall back to do_page_fault() *KABOOM* (This scenario also shows the double disarm case which is allowed.) Fixed by acquiring kmmio_lock in post_kmmio_handler() and checking if the page is being released from mmiotrace. Signed-off-by: Pekka Paalanen Cc: Stuart Bennett Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 4c66bd3a240..9f205030d9a 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -38,7 +38,8 @@ struct kmmio_fault_page { /* * Number of times this page has been registered as a part * of a probe. If zero, page is disarmed and this may be freed. - * Used only by writers (RCU). + * Used only by writers (RCU) and post_kmmio_handler(). + * Protected by kmmio_lock, when linked into kmmio_page_table. */ int count; }; @@ -317,7 +318,11 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) if (ctx->probe && ctx->probe->post_handler) ctx->probe->post_handler(ctx->probe, condition, regs); - arm_kmmio_fault_page(ctx->fpage); + /* Prevent racing against release_kmmio_fault_page(). */ + spin_lock(&kmmio_lock); + if (ctx->fpage->count) + arm_kmmio_fault_page(ctx->fpage); + spin_unlock(&kmmio_lock); regs->flags &= ~X86_EFLAGS_TF; regs->flags |= ctx->saved_flags; -- cgit v1.2.3-70-g09d2 From ccbe495caa5e604b04d5a31d7459a6f6a76a756c Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Fri, 27 Feb 2009 19:03:24 -0800 Subject: x86-64: syscall-audit: fix 32/64 syscall hole On x86-64, a 32-bit process (TIF_IA32) can switch to 64-bit mode with ljmp, and then use the "syscall" instruction to make a 64-bit system call. A 64-bit process make a 32-bit system call with int $0x80. In both these cases, audit_syscall_entry() will use the wrong system call number table and the wrong system call argument registers. This could be used to circumvent a syscall audit configuration that filters based on the syscall numbers or argument details. Signed-off-by: Roland McGrath Signed-off-by: Linus Torvalds --- arch/x86/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 5a4c23d8989..06ca07f6ad8 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1388,7 +1388,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, #ifdef CONFIG_X86_32 # define IS_IA32 1 #elif defined CONFIG_IA32_EMULATION -# define IS_IA32 test_thread_flag(TIF_IA32) +# define IS_IA32 is_compat_task() #else # define IS_IA32 0 #endif -- cgit v1.2.3-70-g09d2 From 5b1017404aea6d2e552e991b3fd814d839e9cd67 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Fri, 27 Feb 2009 23:25:54 -0800 Subject: x86-64: seccomp: fix 32/64 syscall hole On x86-64, a 32-bit process (TIF_IA32) can switch to 64-bit mode with ljmp, and then use the "syscall" instruction to make a 64-bit system call. A 64-bit process make a 32-bit system call with int $0x80. In both these cases under CONFIG_SECCOMP=y, secure_computing() will use the wrong system call number table. The fix is simple: test TS_COMPAT instead of TIF_IA32. Here is an example exploit: /* test case for seccomp circumvention on x86-64 There are two failure modes: compile with -m64 or compile with -m32. The -m64 case is the worst one, because it does "chmod 777 ." (could be any chmod call). The -m32 case demonstrates it was able to do stat(), which can glean information but not harm anything directly. A buggy kernel will let the test do something, print, and exit 1; a fixed kernel will make it exit with SIGKILL before it does anything. */ #define _GNU_SOURCE #include #include #include #include #include #include #include int main (int argc, char **argv) { char buf[100]; static const char dot[] = "."; long ret; unsigned st[24]; if (prctl (PR_SET_SECCOMP, 1, 0, 0, 0) != 0) perror ("prctl(PR_SET_SECCOMP) -- not compiled into kernel?"); #ifdef __x86_64__ assert ((uintptr_t) dot < (1UL << 32)); asm ("int $0x80 # %0 <- %1(%2 %3)" : "=a" (ret) : "0" (15), "b" (dot), "c" (0777)); ret = snprintf (buf, sizeof buf, "result %ld (check mode on .!)\n", ret); #elif defined __i386__ asm (".code32\n" "pushl %%cs\n" "pushl $2f\n" "ljmpl $0x33, $1f\n" ".code64\n" "1: syscall # %0 <- %1(%2 %3)\n" "lretl\n" ".code32\n" "2:" : "=a" (ret) : "0" (4), "D" (dot), "S" (&st)); if (ret == 0) ret = snprintf (buf, sizeof buf, "stat . -> st_uid=%u\n", st[7]); else ret = snprintf (buf, sizeof buf, "result %ld\n", ret); #else # error "not this one" #endif write (1, buf, ret); syscall (__NR_exit, 1); return 2; } Signed-off-by: Roland McGrath [ I don't know if anybody actually uses seccomp, but it's enabled in at least both Fedora and SuSE kernels, so maybe somebody is. - Linus ] Signed-off-by: Linus Torvalds --- arch/mips/include/asm/seccomp.h | 1 - arch/powerpc/include/asm/compat.h | 5 +++++ arch/powerpc/include/asm/seccomp.h | 4 ---- arch/sparc/include/asm/compat.h | 5 +++++ arch/sparc/include/asm/seccomp.h | 6 ------ arch/x86/include/asm/seccomp_32.h | 6 ------ arch/x86/include/asm/seccomp_64.h | 8 -------- kernel/seccomp.c | 7 ++++--- 8 files changed, 14 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/mips/include/asm/seccomp.h b/arch/mips/include/asm/seccomp.h index 36ed4407025..a6772e9507f 100644 --- a/arch/mips/include/asm/seccomp.h +++ b/arch/mips/include/asm/seccomp.h @@ -1,6 +1,5 @@ #ifndef __ASM_SECCOMP_H -#include #include #define __NR_seccomp_read __NR_read diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h index d811a8cd7b5..4774c2f9223 100644 --- a/arch/powerpc/include/asm/compat.h +++ b/arch/powerpc/include/asm/compat.h @@ -210,5 +210,10 @@ struct compat_shmid64_ds { compat_ulong_t __unused6; }; +static inline int is_compat_task(void) +{ + return test_thread_flag(TIF_32BIT); +} + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_COMPAT_H */ diff --git a/arch/powerpc/include/asm/seccomp.h b/arch/powerpc/include/asm/seccomp.h index 853765eb1f6..00c1d9133cf 100644 --- a/arch/powerpc/include/asm/seccomp.h +++ b/arch/powerpc/include/asm/seccomp.h @@ -1,10 +1,6 @@ #ifndef _ASM_POWERPC_SECCOMP_H #define _ASM_POWERPC_SECCOMP_H -#ifdef __KERNEL__ -#include -#endif - #include #define __NR_seccomp_read __NR_read diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h index f260b58f5ce..0e706257918 100644 --- a/arch/sparc/include/asm/compat.h +++ b/arch/sparc/include/asm/compat.h @@ -240,4 +240,9 @@ struct compat_shmid64_ds { unsigned int __unused2; }; +static inline int is_compat_task(void) +{ + return test_thread_flag(TIF_32BIT); +} + #endif /* _ASM_SPARC64_COMPAT_H */ diff --git a/arch/sparc/include/asm/seccomp.h b/arch/sparc/include/asm/seccomp.h index 7fcd9968192..adca1bce41d 100644 --- a/arch/sparc/include/asm/seccomp.h +++ b/arch/sparc/include/asm/seccomp.h @@ -1,11 +1,5 @@ #ifndef _ASM_SECCOMP_H -#include /* already defines TIF_32BIT */ - -#ifndef TIF_32BIT -#error "unexpected TIF_32BIT on sparc64" -#endif - #include #define __NR_seccomp_read __NR_read diff --git a/arch/x86/include/asm/seccomp_32.h b/arch/x86/include/asm/seccomp_32.h index a6ad87b352c..b811d6f5780 100644 --- a/arch/x86/include/asm/seccomp_32.h +++ b/arch/x86/include/asm/seccomp_32.h @@ -1,12 +1,6 @@ #ifndef _ASM_X86_SECCOMP_32_H #define _ASM_X86_SECCOMP_32_H -#include - -#ifdef TIF_32BIT -#error "unexpected TIF_32BIT on i386" -#endif - #include #define __NR_seccomp_read __NR_read diff --git a/arch/x86/include/asm/seccomp_64.h b/arch/x86/include/asm/seccomp_64.h index 4171bb794e9..84ec1bd161a 100644 --- a/arch/x86/include/asm/seccomp_64.h +++ b/arch/x86/include/asm/seccomp_64.h @@ -1,14 +1,6 @@ #ifndef _ASM_X86_SECCOMP_64_H #define _ASM_X86_SECCOMP_64_H -#include - -#ifdef TIF_32BIT -#error "unexpected TIF_32BIT on x86_64" -#else -#define TIF_32BIT TIF_IA32 -#endif - #include #include diff --git a/kernel/seccomp.c b/kernel/seccomp.c index ad64fcb731f..57d4b13b631 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -8,6 +8,7 @@ #include #include +#include /* #define SECCOMP_DEBUG 1 */ #define NR_SECCOMP_MODES 1 @@ -22,7 +23,7 @@ static int mode1_syscalls[] = { 0, /* null terminated */ }; -#ifdef TIF_32BIT +#ifdef CONFIG_COMPAT static int mode1_syscalls_32[] = { __NR_seccomp_read_32, __NR_seccomp_write_32, __NR_seccomp_exit_32, __NR_seccomp_sigreturn_32, 0, /* null terminated */ @@ -37,8 +38,8 @@ void __secure_computing(int this_syscall) switch (mode) { case 1: syscall = mode1_syscalls; -#ifdef TIF_32BIT - if (test_thread_flag(TIF_32BIT)) +#ifdef CONFIG_COMPAT + if (is_compat_task()) syscall = mode1_syscalls_32; #endif do { -- cgit v1.2.3-70-g09d2 From 0fc59d3a01820765e5f3a723733728758b0cf577 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 2 Mar 2009 23:36:13 -0800 Subject: x86: fix init_memory_mapping() to handle small ranges Impact: fix failed EFI bootup in certain circumstances Ying Huang found init_memory_mapping() has problem with small ranges less than 2M when he tried to direct map the EFI runtime code out of max_low_pfn_mapped. It turns out we never considered that case and didn't check the range... Reported-by: Ying Huang Signed-off-by: Yinghai Lu Cc: Brian Maly LKML-Reference: <49ACDDED.1060508@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index e6d36b49025..b1352250096 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -714,6 +714,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, pos = start_pfn << PAGE_SHIFT; end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); + if (end_pfn > (end >> PAGE_SHIFT)) + end_pfn = end >> PAGE_SHIFT; if (start_pfn < end_pfn) { nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); pos = end_pfn << PAGE_SHIFT; -- cgit v1.2.3-70-g09d2 From 780eef9492b16a1543a3b2ae9f9526a735fc9856 Mon Sep 17 00:00:00 2001 From: Tim Blechmann Date: Thu, 19 Feb 2009 17:34:03 +0100 Subject: x86: oprofile: don't set counter width from cpuid on Core2 Impact: fix stuck NMIs and non-working oprofile on certain CPUs Resetting the counter width of the performance counters on Intel's Core2 CPUs, breaks the delivery of NMIs, when running in x86_64 mode. This should fix bug #12395: http://bugzilla.kernel.org/show_bug.cgi?id=12395 Signed-off-by: Tim Blechmann Signed-off-by: Robert Richter LKML-Reference: <20090303100412.GC10085@erda.amd.com> Cc: Signed-off-by: Ingo Molnar --- arch/x86/oprofile/op_model_ppro.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index e9f80c744cf..10131fbdaad 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -78,8 +78,18 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) if (cpu_has_arch_perfmon) { union cpuid10_eax eax; eax.full = cpuid_eax(0xa); - if (counter_width < eax.split.bit_width) - counter_width = eax.split.bit_width; + + /* + * For Core2 (family 6, model 15), don't reset the + * counter width: + */ + if (!(eax.split.version_id == 0 && + current_cpu_data.x86 == 6 && + current_cpu_data.x86_model == 15)) { + + if (counter_width < eax.split.bit_width) + counter_width = eax.split.bit_width; + } } /* clear all counters */ -- cgit v1.2.3-70-g09d2 From ff0c0874905fb312ca1491bbdac2653b0b48c20b Mon Sep 17 00:00:00 2001 From: Brian Maly Date: Tue, 3 Mar 2009 21:55:31 -0500 Subject: x86: fix DMI on EFI Impact: reactivate DMI quirks on EFI hardware DMI tables are loaded by EFI, so the dmi calls must happen after efi_init() and not before. Currently Apple hardware uses DMI to determine the framebuffer mappings for efifb. Without DMI working you also have no video on MacBook Pro. This patch resolves the DMI issue for EFI hardware (DMI is now properly detected at boot), and additionally efifb now loads on Apple hardware (i.e. video works). Signed-off-by: Brian Maly Acked-by: Yinghai Lu Cc: ying.huang@intel.com LKML-Reference: <49ADEDA3.1030406@redhat.com> Signed-off-by: Ingo Molnar arch/x86/kernel/setup.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) --- arch/x86/kernel/setup.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c461f6d6907..6a8811a6932 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -770,6 +770,9 @@ void __init setup_arch(char **cmdline_p) finish_e820_parsing(); + if (efi_enabled) + efi_init(); + dmi_scan_machine(); dmi_check_system(bad_bios_dmi_table); @@ -789,8 +792,6 @@ void __init setup_arch(char **cmdline_p) insert_resource(&iomem_resource, &data_resource); insert_resource(&iomem_resource, &bss_resource); - if (efi_enabled) - efi_init(); #ifdef CONFIG_X86_32 if (ppro_with_ram_bug()) { -- cgit v1.2.3-70-g09d2 From dd39ecf522ba86c70809715af46e6557f6491131 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 4 Mar 2009 10:58:33 +0800 Subject: x86: EFI: Back efi_ioremap with init_memory_mapping instead of FIX_MAP Impact: Fix boot failure on EFI system with large runtime memory range Brian Maly reported that some EFI system with large runtime memory range can not boot. Because the FIX_MAP used to map runtime memory range is smaller than run time memory range. This patch fixes this issue by re-implement efi_ioremap() with init_memory_mapping(). Reported-and-tested-by: Brian Maly Signed-off-by: Huang Ying Cc: Brian Maly Cc: Yinghai Lu LKML-Reference: <1236135513.6204.306.camel@yhuang-dev.sh.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 2 -- arch/x86/include/asm/fixmap_64.h | 4 ---- arch/x86/kernel/efi.c | 7 +++++-- arch/x86/kernel/efi_64.c | 21 ++++----------------- 4 files changed, 9 insertions(+), 25 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index ca5ffb2856b..edc90f23e70 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -37,8 +37,6 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...); #else /* !CONFIG_X86_32 */ -#define MAX_EFI_IO_PAGES 100 - extern u64 efi_call0(void *fp); extern u64 efi_call1(void *fp, u64 arg1); extern u64 efi_call2(void *fp, u64 arg1, u64 arg2); diff --git a/arch/x86/include/asm/fixmap_64.h b/arch/x86/include/asm/fixmap_64.h index 00a30ab9b1a..8be740977db 100644 --- a/arch/x86/include/asm/fixmap_64.h +++ b/arch/x86/include/asm/fixmap_64.h @@ -16,7 +16,6 @@ #include #include #include -#include /* * Here we define all the compile-time 'special' virtual @@ -43,9 +42,6 @@ enum fixed_addresses { FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ FIX_IO_APIC_BASE_0, FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, - FIX_EFI_IO_MAP_LAST_PAGE, - FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE - + MAX_EFI_IO_PAGES - 1, #ifdef CONFIG_PARAVIRT FIX_PARAVIRT_BOOTMAP, #endif diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 1119d247fe1..eb1ef3b67dd 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -467,7 +467,7 @@ void __init efi_enter_virtual_mode(void) efi_memory_desc_t *md; efi_status_t status; unsigned long size; - u64 end, systab, addr, npages; + u64 end, systab, addr, npages, end_pfn; void *p, *va; efi.systab = NULL; @@ -479,7 +479,10 @@ void __init efi_enter_virtual_mode(void) size = md->num_pages << EFI_PAGE_SHIFT; end = md->phys_addr + size; - if (PFN_UP(end) <= max_low_pfn_mapped) + end_pfn = PFN_UP(end); + if (end_pfn <= max_low_pfn_mapped + || (end_pfn > (1UL << (32 - PAGE_SHIFT)) + && end_pfn <= max_pfn_mapped)) va = __va(md->phys_addr); else va = efi_ioremap(md->phys_addr, size); diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index 652c5287215..cb783b92c50 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c @@ -99,24 +99,11 @@ void __init efi_call_phys_epilog(void) void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size) { - static unsigned pages_mapped __initdata; - unsigned i, pages; - unsigned long offset; + unsigned long last_map_pfn; - pages = PFN_UP(phys_addr + size) - PFN_DOWN(phys_addr); - offset = phys_addr & ~PAGE_MASK; - phys_addr &= PAGE_MASK; - - if (pages_mapped + pages > MAX_EFI_IO_PAGES) + last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); + if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) return NULL; - for (i = 0; i < pages; i++) { - __set_fixmap(FIX_EFI_IO_MAP_FIRST_PAGE - pages_mapped, - phys_addr, PAGE_KERNEL); - phys_addr += PAGE_SIZE; - pages_mapped++; - } - - return (void __iomem *)__fix_to_virt(FIX_EFI_IO_MAP_FIRST_PAGE - \ - (pages_mapped - pages)) + offset; + return (void __iomem *)__va(phys_addr); } -- cgit v1.2.3-70-g09d2 From ab9e18587f4cdb5f3fb3854c732f27a36f98e8f6 Mon Sep 17 00:00:00 2001 From: Daniel Glöckner Date: Wed, 4 Mar 2009 19:42:27 +0100 Subject: x86, math-emu: fix init_fpu for task != current MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: fix math-emu related crash while using GDB/ptrace init_fpu() calls finit to initialize a task's xstate, while finit always works on the current task. If we use PTRACE_GETFPREGS on another process and both processes did not already use floating point, we get a null pointer exception in finit. This patch creates a new function finit_task that takes a task_struct parameter. finit becomes a wrapper that simply calls finit_task with current. On the plus side this avoids many calls to get_current which would each resolve to an inline assembler mov instruction. An empty finit_task has been added to i387.h to avoid linker errors in case the compiler still emits the call in init_fpu when CONFIG_MATH_EMULATION is not defined. The declaration of finit in i387.h has been removed as the remaining code using this function gets its prototype from fpu_proto.h. Signed-off-by: Daniel Glöckner Cc: Suresh Siddha Cc: "Pallipadi Venkatesh" Cc: Arjan van de Ven Cc: Bill Metzenthen LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/include/asm/i387.h | 8 +++++++- arch/x86/kernel/i387.c | 2 +- arch/x86/math-emu/fpu_aux.c | 31 ++++++++++++++++++++----------- 3 files changed, 28 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 48f0004db8c..71c9e518398 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -172,7 +172,13 @@ static inline void __save_init_fpu(struct task_struct *tsk) #else /* CONFIG_X86_32 */ -extern void finit(void); +#ifdef CONFIG_MATH_EMULATION +extern void finit_task(struct task_struct *tsk); +#else +static inline void finit_task(struct task_struct *tsk) +{ +} +#endif static inline void tolerant_fwait(void) { diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index b0f61f0dcd0..f2f8540a7f3 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -136,7 +136,7 @@ int init_fpu(struct task_struct *tsk) #ifdef CONFIG_X86_32 if (!HAVE_HWFP) { memset(tsk->thread.xstate, 0, xstate_size); - finit(); + finit_task(tsk); set_stopped_child_used_math(tsk); return 0; } diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c index 491e737ce54..aa098708877 100644 --- a/arch/x86/math-emu/fpu_aux.c +++ b/arch/x86/math-emu/fpu_aux.c @@ -30,20 +30,29 @@ static void fclex(void) } /* Needs to be externally visible */ -void finit(void) +void finit_task(struct task_struct *tsk) { - control_word = 0x037f; - partial_status = 0; - top = 0; /* We don't keep top in the status word internally. */ - fpu_tag_word = 0xffff; + struct i387_soft_struct *soft = &tsk->thread.xstate->soft; + struct address *oaddr, *iaddr; + soft->cwd = 0x037f; + soft->swd = 0; + soft->ftop = 0; /* We don't keep top in the status word internally. */ + soft->twd = 0xffff; /* The behaviour is different from that detailed in Section 15.1.6 of the Intel manual */ - operand_address.offset = 0; - operand_address.selector = 0; - instruction_address.offset = 0; - instruction_address.selector = 0; - instruction_address.opcode = 0; - no_ip_update = 1; + oaddr = (struct address *)&soft->foo; + oaddr->offset = 0; + oaddr->selector = 0; + iaddr = (struct address *)&soft->fip; + iaddr->offset = 0; + iaddr->selector = 0; + iaddr->opcode = 0; + soft->no_update = 1; +} + +void finit(void) +{ + finit_task(current); } /* -- cgit v1.2.3-70-g09d2 From dd4124a8a06bca89c077a16437edac010f0bb993 Mon Sep 17 00:00:00 2001 From: Leann Ogasawara Date: Wed, 4 Mar 2009 11:53:00 -0800 Subject: x86: add Dell XPS710 reboot quirk Dell XPS710 will hang on reboot. This is resolved by adding a quirk to set bios reboot. Signed-off-by: Leann Ogasawara Signed-off-by: Tim Gardner Cc: "manoj.iyer" Cc: LKML-Reference: <1236196380.3231.89.camel@emiko> Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 2b46eb41643..4526b3a75ed 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -217,6 +217,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq"), }, }, + { /* Handle problems with rebooting on Dell XPS710 */ + .callback = set_bios_reboot, + .ident = "Dell XPS710", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Dell XPS710"), + }, + }, { } }; -- cgit v1.2.3-70-g09d2 From 36e8abf3edcd2d207193ec5741d1a2a645d470a5 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 5 Mar 2009 00:16:26 -0500 Subject: [CPUFREQ] Prevent p4-clockmod from auto-binding to the ondemand governor. The latency of p4-clockmod sucks so hard that scaling on a regular basis with ondemand is a really bad idea. Signed-off-by: Matthew Garrett Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 1778402305e..352cf9a4916 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -246,7 +246,10 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu); /* cpuinfo and default policy values */ - policy->cpuinfo.transition_latency = 1000000; /* assumed */ + + /* the transition latency is set to be 1 higher than the maximum + * transition latency of the ondemand governor */ + policy->cpuinfo.transition_latency = 10000001; policy->cur = stock_freq; return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]); -- cgit v1.2.3-70-g09d2 From 9ca0791dcaa666e8e8f4b4ca028b65b4bde9cb28 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Thu, 5 Mar 2009 08:49:54 +0100 Subject: x86, bts: remove bad warning In case a ptraced task is reaped (while the tracer is still attached), ds_exit_thread() is called before ptrace_exit(). The latter will release the bts_tracer and remove the thread's ds_ctx. The former will WARN() if the context is not NULL. Oleg Nesterov submitted patches that move ptrace_exit() before exit_thread() and thus reverse the order of the above calls. Remove the bad warning. I will add it again when Oleg's changes are in. Signed-off-by: Markus Metzger LKML-Reference: <20090305084954.A22000@sedona.ch.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/ds.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 169a120587b..de7cdbda1c3 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -1029,5 +1029,4 @@ void ds_copy_thread(struct task_struct *tsk, struct task_struct *father) void ds_exit_thread(struct task_struct *tsk) { - WARN_ON(tsk->thread.ds_ctx); } -- cgit v1.2.3-70-g09d2 From 73bf1b62f561fc8ecb00e2810efe4fe769f4933e Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Thu, 5 Mar 2009 08:57:21 +0100 Subject: x86, pebs: correct qualifier passed to ds_write_config() from ds_request_pebs() ds_write_config() can write the BTS as well as the PEBS part of the DS config. ds_request_pebs() passes the wrong qualifier, which results in the wrong configuration to be written. Reported-by: Stephane Eranian Signed-off-by: Markus Metzger LKML-Reference: <20090305085721.A22550@sedona.ch.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/ds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index de7cdbda1c3..87b67e3a765 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -729,7 +729,7 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task, spin_unlock_irqrestore(&ds_lock, irq); - ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); + ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); ds_resume_pebs(tracer); return tracer; -- cgit v1.2.3-70-g09d2 From d0fc63f7bd07cb779a06dc1cdd0c5a14e7f5d562 Mon Sep 17 00:00:00 2001 From: Stuart Bennett Date: Sun, 8 Mar 2009 20:21:35 +0200 Subject: x86 mmiotrace: fix remove_kmmio_fault_pages() Impact: fix race+crash in mmiotrace The list manipulation in remove_kmmio_fault_pages() was broken. If more than one consecutive kmmio_fault_page was re-added during the grace period between unregister_kmmio_probe() and remove_kmmio_fault_pages(), the list manipulation failed to remove pages from the release list. After a second grace period the pages get into rcu_free_kmmio_fault_pages() and raise a BUG_ON() kernel crash. The list manipulation is fixed to properly remove pages from the release list. This bug has been present from the very beginning of mmiotrace in the mainline kernel. It was introduced in 0fd0e3da ("x86: mmiotrace full patch, preview 1"); An urgent fix for Linus. Tested by Stuart (on 32-bit) and Pekka (on amd and intel 64-bit systems, nouveau and nvidia proprietary). Signed-off-by: Stuart Bennett Signed-off-by: Pekka Paalanen LKML-Reference: <20090308202135.34933feb@daedalus.pq.iki.fi> Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 9f205030d9a..6a518dd08a3 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -451,23 +451,24 @@ static void rcu_free_kmmio_fault_pages(struct rcu_head *head) static void remove_kmmio_fault_pages(struct rcu_head *head) { - struct kmmio_delayed_release *dr = container_of( - head, - struct kmmio_delayed_release, - rcu); + struct kmmio_delayed_release *dr = + container_of(head, struct kmmio_delayed_release, rcu); struct kmmio_fault_page *p = dr->release_list; struct kmmio_fault_page **prevp = &dr->release_list; unsigned long flags; + spin_lock_irqsave(&kmmio_lock, flags); while (p) { - if (!p->count) + if (!p->count) { list_del_rcu(&p->list); - else + prevp = &p->release_next; + } else { *prevp = p->release_next; - prevp = &p->release_next; + } p = p->release_next; } spin_unlock_irqrestore(&kmmio_lock, flags); + /* This is the real RCU destroy call. */ call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages); } -- cgit v1.2.3-70-g09d2 From cbd88c8e6f5cdb8d4b9af01df825305200240382 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 9 Mar 2009 10:06:22 -0600 Subject: lguest: fix crash 'unhandled trap 13 at ' Impact: fix lguest boot crash on modern Intel machines The code in early_init_intel does: if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { u64 misc_enable; rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); And that rdmsr faults (not allowed from non-0 PL). We can get around this by mugging the family ID part of the cpuid. 5 seems like a good number. Of course, this is a hack (how very lguest!). We could just indicate that we don't support MSRs, or implement lguest_rdmst. Reported-by: Patrick McHardy Signed-off-by: Rusty Russell Tested-by: Patrick McHardy --- arch/x86/lguest/boot.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 92f1c6f3e19..ba5c05e97f1 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -343,6 +343,11 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx, * flush_tlb_user() for both user and kernel mappings unless * the Page Global Enable (PGE) feature bit is set. */ *dx |= 0x00002000; + /* We also lie, and say we're family id 5. 6 or greater + * leads to a rdmsr in early_init_intel which we can't handle. + * Family ID is returned as bits 8-12 in ax. */ + *ax &= 0xFFFFF0FF; + *ax |= 0x00000500; break; case 0x80000000: /* Futureproof this a little: if they ask how much extended -- cgit v1.2.3-70-g09d2 From 6db6a5f3ae2ca6b874b0fd97ae16fdc9b5cdd6cc Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 9 Mar 2009 10:06:28 -0600 Subject: lguest: fix for CONFIG_SPARSE_IRQ=y Impact: remove lots of lguest boot WARN_ON() when CONFIG_SPARSE_IRQ=y We now need to call irq_to_desc_alloc_cpu() before set_irq_chip_and_handler_name(), but we can't do that from init_IRQ (no kmalloc available). So do it as we use interrupts instead. Also means we only alloc for irqs we use, which was the intent of CONFIG_SPARSE_IRQ anyway. Signed-off-by: Rusty Russell Cc: Ingo Molnar --- arch/x86/lguest/boot.c | 16 +++++++++------- drivers/lguest/lguest_device.c | 6 ++++++ 2 files changed, 15 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index ba5c05e97f1..960a8d9c049 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -594,19 +594,21 @@ static void __init lguest_init_IRQ(void) /* Some systems map "vectors" to interrupts weirdly. Lguest has * a straightforward 1 to 1 mapping, so force that here. */ __get_cpu_var(vector_irq)[vector] = i; - if (vector != SYSCALL_VECTOR) { - set_intr_gate(vector, - interrupt[vector-FIRST_EXTERNAL_VECTOR]); - set_irq_chip_and_handler_name(i, &lguest_irq_controller, - handle_level_irq, - "level"); - } + if (vector != SYSCALL_VECTOR) + set_intr_gate(vector, interrupt[i]); } /* This call is required to set up for 4k stacks, where we have * separate stacks for hard and soft interrupts. */ irq_ctx_init(smp_processor_id()); } +void lguest_setup_irq(unsigned int irq) +{ + irq_to_desc_alloc_cpu(irq, 0); + set_irq_chip_and_handler_name(irq, &lguest_irq_controller, + handle_level_irq, "level"); +} + /* * Time. * diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index b4d44e571d7..8132533d71f 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -212,6 +212,9 @@ static void lg_notify(struct virtqueue *vq) hcall(LHCALL_NOTIFY, lvq->config.pfn << PAGE_SHIFT, 0, 0); } +/* An extern declaration inside a C file is bad form. Don't do it. */ +extern void lguest_setup_irq(unsigned int irq); + /* This routine finds the first virtqueue described in the configuration of * this device and sets it up. * @@ -266,6 +269,9 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, goto unmap; } + /* Make sure the interrupt is allocated. */ + lguest_setup_irq(lvq->config.irq); + /* Tell the interrupt for this virtqueue to go to the virtio_ring * interrupt handler. */ /* FIXME: We used to have a flag for the Host to tell us we could use -- cgit v1.2.3-70-g09d2 From 129f8ae9b1b5be94517da76009ea956e89104ce8 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 9 Mar 2009 15:07:33 -0400 Subject: Revert "[CPUFREQ] Disable sysfs ui for p4-clockmod." This reverts commit e088e4c9cdb618675874becb91b2fd581ee707e6. Removing the sysfs interface for p4-clockmod was flagged as a regression in bug 12826. Course of action: - Find out the remaining causes of overheating, and fix them if possible. ACPI should be doing the right thing automatically. If it isn't, we need to fix that. - mark p4-clockmod ui as deprecated - try again with the removal in six months. It's not really feasible to printk about the deprecation, because it needs to happen at all the sysfs entry points, which means adding a lot of strcmp("p4-clockmod".. calls to the core, which.. bleuch. Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | 1 - drivers/cpufreq/cpufreq.c | 51 +++++++++++-------------------- include/linux/cpufreq.h | 1 - 3 files changed, 18 insertions(+), 35 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index b585e04cbc9..3178c3acd97 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -277,7 +277,6 @@ static struct cpufreq_driver p4clockmod_driver = { .name = "p4-clockmod", .owner = THIS_MODULE, .attr = p4clockmod_attr, - .hide_interface = 1, }; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index b55cb67435b..d6daf3c507d 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -754,11 +754,6 @@ static struct kobj_type ktype_cpufreq = { .release = cpufreq_sysfs_release, }; -static struct kobj_type ktype_empty_cpufreq = { - .sysfs_ops = &sysfs_ops, - .release = cpufreq_sysfs_release, -}; - /** * cpufreq_add_dev - add a CPU device @@ -892,36 +887,26 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) memcpy(&new_policy, policy, sizeof(struct cpufreq_policy)); /* prepare interface data */ - if (!cpufreq_driver->hide_interface) { - ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, - &sys_dev->kobj, "cpufreq"); + ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &sys_dev->kobj, + "cpufreq"); + if (ret) + goto err_out_driver_exit; + + /* set up files for this cpu device */ + drv_attr = cpufreq_driver->attr; + while ((drv_attr) && (*drv_attr)) { + ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr)); if (ret) goto err_out_driver_exit; - - /* set up files for this cpu device */ - drv_attr = cpufreq_driver->attr; - while ((drv_attr) && (*drv_attr)) { - ret = sysfs_create_file(&policy->kobj, - &((*drv_attr)->attr)); - if (ret) - goto err_out_driver_exit; - drv_attr++; - } - if (cpufreq_driver->get) { - ret = sysfs_create_file(&policy->kobj, - &cpuinfo_cur_freq.attr); - if (ret) - goto err_out_driver_exit; - } - if (cpufreq_driver->target) { - ret = sysfs_create_file(&policy->kobj, - &scaling_cur_freq.attr); - if (ret) - goto err_out_driver_exit; - } - } else { - ret = kobject_init_and_add(&policy->kobj, &ktype_empty_cpufreq, - &sys_dev->kobj, "cpufreq"); + drv_attr++; + } + if (cpufreq_driver->get) { + ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr); + if (ret) + goto err_out_driver_exit; + } + if (cpufreq_driver->target) { + ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr); if (ret) goto err_out_driver_exit; } diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 384b38d3e8e..161042746af 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -234,7 +234,6 @@ struct cpufreq_driver { int (*suspend) (struct cpufreq_policy *policy, pm_message_t pmsg); int (*resume) (struct cpufreq_policy *policy); struct freq_attr **attr; - bool hide_interface; }; /* flags */ -- cgit v1.2.3-70-g09d2 From f24ade3a3332811a512ed3b6c6aa69486719b1d8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 10 Mar 2009 19:02:30 +0100 Subject: x86, sched_clock(): mark variables read-mostly Impact: micro-optimization There's a number of variables in the sched_clock() path that are in .data/.bss - but not marked __read_mostly. This creates the danger of accidental false cacheline sharing with some other, write-often variable. So mark them __read_mostly. Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 599e5816863..9c8b71531ca 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -17,20 +17,21 @@ #include #include -unsigned int cpu_khz; /* TSC clocks / usec, not used here */ +unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); -unsigned int tsc_khz; + +unsigned int __read_mostly tsc_khz; EXPORT_SYMBOL(tsc_khz); /* * TSC can be unstable due to cpufreq or due to unsynced TSCs */ -static int tsc_unstable; +static int __read_mostly tsc_unstable; /* native_sched_clock() is called before tsc_init(), so we must start with the TSC soft disabled to prevent erroneous rdtsc usage on !cpu_has_tsc processors */ -static int tsc_disabled = -1; +static int __read_mostly tsc_disabled = -1; static int tsc_clocksource_reliable; /* -- cgit v1.2.3-70-g09d2 From 211b3d03c7400f48a781977a50104c9d12f4e229 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 10 Mar 2009 22:31:03 +0100 Subject: x86: work around Fedora-11 x86-32 kernel failures on Intel Atom CPUs Impact: work around boot crash Work around Intel Atom erratum AAH41 (probabilistically) - it's triggering in the field. Reported-by: Linus Torvalds Tested-by: Kyle McMartin Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 7be47d1a97e..7233bd7e357 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -515,6 +515,17 @@ static int split_large_page(pte_t *kpte, unsigned long address) * primary protection behavior: */ __set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE))); + + /* + * Intel Atom errata AAH41 workaround. + * + * The real fix should be in hw or in a microcode update, but + * we also probabilistically try to reduce the window of having + * a large TLB mixed with 4K TLBs while instruction fetches are + * going on. + */ + __flush_tlb_all(); + base = NULL; out_unlock: -- cgit v1.2.3-70-g09d2 From a6a80e1d8cf82b46a69f88e659da02749231eb36 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 17 Mar 2009 07:58:26 -0700 Subject: Fix potential fast PIT TSC calibration startup glitch During bootup, when we reprogram the PIT (programmable interval timer) to start counting down from 0xffff in order to use it for the fast TSC calibration, we should also make sure to delay a bit afterwards to allow the PIT hardware to actually start counting with the new value. That will happens at the next CLK pulse (1.193182 MHz), so the easiest way to do that is to just wait at least one microsecond after programming the new PIT counter value. We do that by just reading the counter value back once - which will take about 2us on PC hardware. Reported-and-tested-by: john stultz Signed-off-by: Linus Torvalds --- arch/x86/kernel/tsc.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 599e5816863..9e80207c96a 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -315,6 +315,15 @@ static unsigned long quick_pit_calibrate(void) outb(0xff, 0x42); outb(0xff, 0x42); + /* + * The PIT starts counting at the next edge, so we + * need to delay for a microsecond. The easiest way + * to do that is to just read back the 16-bit counter + * once from the PIT. + */ + inb(0x42); + inb(0x42); + if (pit_expect_msb(0xff)) { int i; u64 t1, t2, delta; -- cgit v1.2.3-70-g09d2 From 9e8912e04e612b43897b4b722205408b92f423e5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 17 Mar 2009 08:13:17 -0700 Subject: Fast TSC calibration: calculate proper frequency error bounds In order for ntpd to correctly synchronize the clocks, the frequency of the system clock must not be off by more than 500 ppm (or, put another way, 1:2000), or ntpd will end up giving up on trying to synchronize properly, and ends up reseting the clock in jumps instead. The fast TSC PIT calibration sometimes failed this test - it was assuming that the PIT reads always took about one microsecond each (2us for the two reads to get a 16-bit timer), and that calibrating TSC to the PIT over 15ms should thus be sufficient to get much closer than 500ppm (max 2us error on both sides giving 4us over 15ms: a 270 ppm error value). However, that assumption does not always hold: apparently some hardware is either very much slower at reading the PIT registers, or there was other noise causing at least one machine to get 700+ ppm errors. So instead of using a fixed 15ms timing loop, this changes the fast PIT calibration to read the TSC delta over the individual PIT timer reads, and use the result to calculate the error bars on the PIT read timing properly. We then successfully calibrate the TSC only if the maximum error bars fall below 500ppm. In the process, we also relax the timing to allow up to 25ms for the calibration, although it can happen much faster depending on hardware. Reported-and-tested-by: Jesper Krogh Cc: john stultz Cc: Thomas Gleixner Acked-by: Ingo Molnar Signed-off-by: Linus Torvalds --- arch/x86/kernel/tsc.c | 101 ++++++++++++++++++++++++++++---------------------- 1 file changed, 56 insertions(+), 45 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 9e80207c96a..d5cebb52d45 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -273,30 +273,43 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) * use the TSC value at the transitions to calculate a pretty * good value for the TSC frequencty. */ -static inline int pit_expect_msb(unsigned char val) +static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap) { - int count = 0; + int count; + u64 tsc = 0; for (count = 0; count < 50000; count++) { /* Ignore LSB */ inb(0x42); if (inb(0x42) != val) break; + tsc = get_cycles(); } - return count > 50; + *deltap = get_cycles() - tsc; + *tscp = tsc; + + /* + * We require _some_ success, but the quality control + * will be based on the error terms on the TSC values. + */ + return count > 5; } /* - * How many MSB values do we want to see? We aim for a - * 15ms calibration, which assuming a 2us counter read - * error should give us roughly 150 ppm precision for - * the calibration. + * How many MSB values do we want to see? We aim for + * a maximum error rate of 500ppm (in practice the + * real error is much smaller), but refuse to spend + * more than 25ms on it. */ -#define QUICK_PIT_MS 15 -#define QUICK_PIT_ITERATIONS (QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) +#define MAX_QUICK_PIT_MS 25 +#define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) static unsigned long quick_pit_calibrate(void) { + int i; + u64 tsc, delta; + unsigned long d1, d2; + /* Set the Gate high, disable speaker */ outb((inb(0x61) & ~0x02) | 0x01, 0x61); @@ -324,45 +337,43 @@ static unsigned long quick_pit_calibrate(void) inb(0x42); inb(0x42); - if (pit_expect_msb(0xff)) { - int i; - u64 t1, t2, delta; - unsigned char expect = 0xfe; - - t1 = get_cycles(); - for (i = 0; i < QUICK_PIT_ITERATIONS; i++, expect--) { - if (!pit_expect_msb(expect)) - goto failed; + if (pit_expect_msb(0xff, &tsc, &d1)) { + for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) { + if (!pit_expect_msb(0xff-i, &delta, &d2)) + break; + + /* + * Iterate until the error is less than 500 ppm + */ + delta -= tsc; + if (d1+d2 < delta >> 11) + goto success; } - t2 = get_cycles(); - - /* - * Make sure we can rely on the second TSC timestamp: - */ - if (!pit_expect_msb(expect)) - goto failed; - - /* - * Ok, if we get here, then we've seen the - * MSB of the PIT decrement QUICK_PIT_ITERATIONS - * times, and each MSB had many hits, so we never - * had any sudden jumps. - * - * As a result, we can depend on there not being - * any odd delays anywhere, and the TSC reads are - * reliable. - * - * kHz = ticks / time-in-seconds / 1000; - * kHz = (t2 - t1) / (QPI * 256 / PIT_TICK_RATE) / 1000 - * kHz = ((t2 - t1) * PIT_TICK_RATE) / (QPI * 256 * 1000) - */ - delta = (t2 - t1)*PIT_TICK_RATE; - do_div(delta, QUICK_PIT_ITERATIONS*256*1000); - printk("Fast TSC calibration using PIT\n"); - return delta; } -failed: + printk("Fast TSC calibration failed\n"); return 0; + +success: + /* + * Ok, if we get here, then we've seen the + * MSB of the PIT decrement 'i' times, and the + * error has shrunk to less than 500 ppm. + * + * As a result, we can depend on there not being + * any odd delays anywhere, and the TSC reads are + * reliable (within the error). We also adjust the + * delta to the middle of the error bars, just + * because it looks nicer. + * + * kHz = ticks / time-in-seconds / 1000; + * kHz = (t2 - t1) / (I * 256 / PIT_TICK_RATE) / 1000 + * kHz = ((t2 - t1) * PIT_TICK_RATE) / (I * 256 * 1000) + */ + delta += (long)(d2 - d1)/2; + delta *= PIT_TICK_RATE; + do_div(delta, i*256*1000); + printk("Fast TSC calibration using PIT\n"); + return delta; } /** -- cgit v1.2.3-70-g09d2 From 30390880debce4a68fd23e87a787f27609e4bf4a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 16 Mar 2009 18:57:22 -0400 Subject: prevent boosting kprobes on exception address Don't boost at the addresses which are listed on exception tables, because major page fault will occur on those addresses. In that case, kprobes can not ensure that when instruction buffer can be freed since some processes will sleep on the buffer. kprobes-ia64 already has same check. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Signed-off-by: Linus Torvalds --- arch/x86/kernel/kprobes.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index e948b28a5a9..4558dd3918c 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -193,6 +193,9 @@ static int __kprobes can_boost(kprobe_opcode_t *opcodes) kprobe_opcode_t opcode; kprobe_opcode_t *orig_opcodes = opcodes; + if (search_exception_tables(opcodes)) + return 0; /* Page fault may occur on this address. */ + retry: if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) return 0; -- cgit v1.2.3-70-g09d2 From f0b85051d0f0891378a83db22c0786f1d756fbff Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 25 Nov 2008 20:17:01 +0100 Subject: KVM: SVM: Clean up VINTR setting The current VINTR intercept setters don't look clean to me. To make the code easier to read and enable the possibilty to trap on a VINTR set, this uses a helper function to set the VINTR intercept. v2 uses two distinct functions for setting and clearing the bit Acked-by: Joerg Roedel Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a9e769e4e25..33407d95761 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -718,6 +718,16 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) to_svm(vcpu)->vmcb->save.rflags = rflags; } +static void svm_set_vintr(struct vcpu_svm *svm) +{ + svm->vmcb->control.intercept |= 1ULL << INTERCEPT_VINTR; +} + +static void svm_clear_vintr(struct vcpu_svm *svm) +{ + svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR); +} + static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg) { struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save; @@ -1380,7 +1390,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm, { KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler); - svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR); + svm_clear_vintr(svm); svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; /* * If the user space waits to inject interrupts, exit as soon as @@ -1593,7 +1603,7 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu) (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) || (vmcb->control.event_inj & SVM_EVTINJ_VALID)) { /* unable to deliver irq, set pending irq */ - vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR); + svm_set_vintr(svm); svm_inject_irq(svm, 0x0); goto out; } @@ -1652,9 +1662,9 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu, */ if (!svm->vcpu.arch.interrupt_window_open && (svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window)) - control->intercept |= 1ULL << INTERCEPT_VINTR; - else - control->intercept &= ~(1ULL << INTERCEPT_VINTR); + svm_set_vintr(svm); + else + svm_clear_vintr(svm); } static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) -- cgit v1.2.3-70-g09d2 From 9962d032bbff0268f22068787831405f8468c8b4 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 25 Nov 2008 20:17:02 +0100 Subject: KVM: SVM: Move EFER and MSR constants to generic x86 code MSR_EFER_SVME_MASK, MSR_VM_CR and MSR_VM_HSAVE_PA are set in KVM specific headers. Linux does have nice header files to collect EFER bits and MSR IDs, so IMHO we should put them there. While at it, I also changed the naming scheme to match that of the other defines. (introduced in v6) Acked-by: Joerg Roedel Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/include/asm/msr-index.h | 7 +++++++ arch/x86/include/asm/svm.h | 4 ---- arch/x86/include/asm/virtext.h | 2 +- arch/x86/kvm/svm.c | 6 +++--- 5 files changed, 12 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 730843d1d2f..2998efe8927 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -22,6 +22,7 @@ #include #include #include +#include #define KVM_MAX_VCPUS 16 #define KVM_MEMORY_SLOTS 32 diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 358acc59ae0..46e9646e7a6 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -18,11 +18,13 @@ #define _EFER_LME 8 /* Long mode enable */ #define _EFER_LMA 10 /* Long mode active (read-only) */ #define _EFER_NX 11 /* No execute enable */ +#define _EFER_SVME 12 /* Enable virtualization */ #define EFER_SCE (1<<_EFER_SCE) #define EFER_LME (1<<_EFER_LME) #define EFER_LMA (1<<_EFER_LMA) #define EFER_NX (1<<_EFER_NX) +#define EFER_SVME (1<<_EFER_SVME) /* Intel MSRs. Some also available on other CPUs */ #define MSR_IA32_PERFCTR0 0x000000c1 @@ -360,4 +362,9 @@ #define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b #define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c +/* AMD-V MSRs */ + +#define MSR_VM_CR 0xc0010114 +#define MSR_VM_HSAVE_PA 0xc0010117 + #endif /* _ASM_X86_MSR_INDEX_H */ diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 1b8afa78e86..82ada75f3eb 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -174,10 +174,6 @@ struct __attribute__ ((__packed__)) vmcb { #define SVM_CPUID_FEATURE_SHIFT 2 #define SVM_CPUID_FUNC 0x8000000a -#define MSR_EFER_SVME_MASK (1ULL << 12) -#define MSR_VM_CR 0xc0010114 -#define MSR_VM_HSAVE_PA 0xc0010117ULL - #define SVM_VM_CR_SVM_DISABLE 4 #define SVM_SELECTOR_S_SHIFT 4 diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h index 59363627523..e0f9aa16358 100644 --- a/arch/x86/include/asm/virtext.h +++ b/arch/x86/include/asm/virtext.h @@ -118,7 +118,7 @@ static inline void cpu_svm_disable(void) wrmsrl(MSR_VM_HSAVE_PA, 0); rdmsrl(MSR_EFER, efer); - wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); + wrmsrl(MSR_EFER, efer & ~EFER_SVME); } /** Makes sure SVM is disabled, if it is supported on the CPU diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 33407d95761..e4eb3fd91b9 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -198,7 +198,7 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) if (!npt_enabled && !(efer & EFER_LMA)) efer &= ~EFER_LME; - to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK; + to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; vcpu->arch.shadow_efer = efer; } @@ -292,7 +292,7 @@ static void svm_hardware_enable(void *garbage) svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); rdmsrl(MSR_EFER, efer); - wrmsrl(MSR_EFER, efer | MSR_EFER_SVME_MASK); + wrmsrl(MSR_EFER, efer | EFER_SVME); wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(svm_data->save_area) << PAGE_SHIFT); @@ -559,7 +559,7 @@ static void init_vmcb(struct vcpu_svm *svm) init_sys_seg(&save->ldtr, SEG_TYPE_LDT); init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16); - save->efer = MSR_EFER_SVME_MASK; + save->efer = EFER_SVME; save->dr6 = 0xffff0ff0; save->dr7 = 0x400; save->rflags = 2; -- cgit v1.2.3-70-g09d2 From c0725420cfdcf6dd9705b164a8c6cba86684080d Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 25 Nov 2008 20:17:03 +0100 Subject: KVM: SVM: Add helper functions for nested SVM These are helpers for the nested SVM implementation. - nsvm_printk implements a debug printk variant - nested_svm_do calls a handler that can accesses gpa-based memory v3 makes use of the new permission checker v6 changes: - streamline nsvm_debug() - remove printk(KERN_ERR) - SVME check before CPL check - give GP error code - use new EFER constant Acked-by: Joerg Roedel Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e4eb3fd91b9..87debdcd1b9 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -50,6 +50,15 @@ MODULE_LICENSE("GPL"); #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) +/* Turn on to get debugging output*/ +/* #define NESTED_DEBUG */ + +#ifdef NESTED_DEBUG +#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args) +#else +#define nsvm_printk(fmt, args...) do {} while(0) +#endif + /* enable NPT for AMD64 and X86 with PAE */ #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) static bool npt_enabled = true; @@ -1149,6 +1158,82 @@ static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } +static int nested_svm_check_permissions(struct vcpu_svm *svm) +{ + if (!(svm->vcpu.arch.shadow_efer & EFER_SVME) + || !is_paging(&svm->vcpu)) { + kvm_queue_exception(&svm->vcpu, UD_VECTOR); + return 1; + } + + if (svm->vmcb->save.cpl) { + kvm_inject_gp(&svm->vcpu, 0); + return 1; + } + + return 0; +} + +static struct page *nested_svm_get_page(struct vcpu_svm *svm, u64 gpa) +{ + struct page *page; + + down_read(¤t->mm->mmap_sem); + page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); + up_read(¤t->mm->mmap_sem); + + if (is_error_page(page)) { + printk(KERN_INFO "%s: could not find page at 0x%llx\n", + __func__, gpa); + kvm_release_page_clean(page); + kvm_inject_gp(&svm->vcpu, 0); + return NULL; + } + return page; +} + +static int nested_svm_do(struct vcpu_svm *svm, + u64 arg1_gpa, u64 arg2_gpa, void *opaque, + int (*handler)(struct vcpu_svm *svm, + void *arg1, + void *arg2, + void *opaque)) +{ + struct page *arg1_page; + struct page *arg2_page = NULL; + void *arg1; + void *arg2 = NULL; + int retval; + + arg1_page = nested_svm_get_page(svm, arg1_gpa); + if(arg1_page == NULL) + return 1; + + if (arg2_gpa) { + arg2_page = nested_svm_get_page(svm, arg2_gpa); + if(arg2_page == NULL) { + kvm_release_page_clean(arg1_page); + return 1; + } + } + + arg1 = kmap_atomic(arg1_page, KM_USER0); + if (arg2_gpa) + arg2 = kmap_atomic(arg2_page, KM_USER1); + + retval = handler(svm, arg1, arg2, opaque); + + kunmap_atomic(arg1, KM_USER0); + if (arg2_gpa) + kunmap_atomic(arg2, KM_USER1); + + kvm_release_page_dirty(arg1_page); + if (arg2_gpa) + kvm_release_page_dirty(arg2_page); + + return retval; +} + static int invalid_op_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { -- cgit v1.2.3-70-g09d2 From 1371d90460189d02bf1bcca19dbfe6bd10dc6031 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 25 Nov 2008 20:17:04 +0100 Subject: KVM: SVM: Implement GIF, clgi and stgi This patch implements the GIF flag and the clgi and stgi instructions that set this flag. Only if the flag is set (default), interrupts can be received by the CPU. To keep the information about that somewhere, this patch adds a new hidden flags vector. that is used to store information that does not go into the vmcb, but is SVM specific. I tried to write some code to make -no-kvm-irqchip work too, but the first level guest won't even boot with that atm, so I ditched it. v2 moves the hflags to x86 generic code v3 makes use of the new permission helper v6 only enables interrupt_window if GIF=1 Acked-by: Joerg Roedel Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 3 +++ arch/x86/kvm/svm.c | 47 ++++++++++++++++++++++++++++++++++++----- 2 files changed, 45 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2998efe8927..29e4157732d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -259,6 +259,7 @@ struct kvm_vcpu_arch { unsigned long cr3; unsigned long cr4; unsigned long cr8; + u32 hflags; u64 pdptrs[4]; /* pae */ u64 shadow_efer; u64 apic_base; @@ -738,6 +739,8 @@ enum { TASK_SWITCH_GATE = 3, }; +#define HF_GIF_MASK (1 << 0) + /* * Hardware virtualization extension instructions may fault if a * reboot turns off virtualization while processes are running. diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 87debdcd1b9..79cc06bfe57 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -251,7 +251,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) kvm_rip_write(vcpu, svm->next_rip); svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; - vcpu->arch.interrupt_window_open = 1; + vcpu->arch.interrupt_window_open = (svm->vcpu.arch.hflags & HF_GIF_MASK); } static int has_svm(void) @@ -600,6 +600,8 @@ static void init_vmcb(struct vcpu_svm *svm) save->cr4 = 0; } force_new_asid(&svm->vcpu); + + svm->vcpu.arch.hflags = HF_GIF_MASK; } static int svm_vcpu_reset(struct kvm_vcpu *vcpu) @@ -1234,6 +1236,36 @@ static int nested_svm_do(struct vcpu_svm *svm, return retval; } +static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +{ + if (nested_svm_check_permissions(svm)) + return 1; + + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; + skip_emulated_instruction(&svm->vcpu); + + svm->vcpu.arch.hflags |= HF_GIF_MASK; + + return 1; +} + +static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +{ + if (nested_svm_check_permissions(svm)) + return 1; + + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; + skip_emulated_instruction(&svm->vcpu); + + svm->vcpu.arch.hflags &= ~HF_GIF_MASK; + + /* After a CLGI no interrupts should come */ + svm_clear_vintr(svm); + svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; + + return 1; +} + static int invalid_op_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { @@ -1535,8 +1567,8 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, [SVM_EXIT_VMMCALL] = vmmcall_interception, [SVM_EXIT_VMLOAD] = invalid_op_interception, [SVM_EXIT_VMSAVE] = invalid_op_interception, - [SVM_EXIT_STGI] = invalid_op_interception, - [SVM_EXIT_CLGI] = invalid_op_interception, + [SVM_EXIT_STGI] = stgi_interception, + [SVM_EXIT_CLGI] = clgi_interception, [SVM_EXIT_SKINIT] = invalid_op_interception, [SVM_EXIT_WBINVD] = emulate_on_interception, [SVM_EXIT_MONITOR] = invalid_op_interception, @@ -1684,6 +1716,9 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu) if (!kvm_cpu_has_interrupt(vcpu)) goto out; + if (!(svm->vcpu.arch.hflags & HF_GIF_MASK)) + goto out; + if (!(vmcb->save.rflags & X86_EFLAGS_IF) || (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) || (vmcb->control.event_inj & SVM_EVTINJ_VALID)) { @@ -1710,7 +1745,8 @@ static void kvm_reput_irq(struct vcpu_svm *svm) } svm->vcpu.arch.interrupt_window_open = - !(control->int_state & SVM_INTERRUPT_SHADOW_MASK); + !(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && + (svm->vcpu.arch.hflags & HF_GIF_MASK); } static void svm_do_inject_vector(struct vcpu_svm *svm) @@ -1734,7 +1770,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu, svm->vcpu.arch.interrupt_window_open = (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && - (svm->vmcb->save.rflags & X86_EFLAGS_IF)); + (svm->vmcb->save.rflags & X86_EFLAGS_IF) && + (svm->vcpu.arch.hflags & HF_GIF_MASK)); if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary) /* -- cgit v1.2.3-70-g09d2 From b286d5d8b0836e76832dafcc5a18b0e8e5a3bc5e Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 25 Nov 2008 20:17:05 +0100 Subject: KVM: SVM: Implement hsave Implement the hsave MSR, that gives the VCPU a GPA to save the old guest state in. v2 allows userspace to save/restore hsave v4 dummys out the hsave MSR, so we use a host page v6 remembers the guest's hsave and exports the MSR Acked-by: Joerg Roedel Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/x86/kvm/kvm_svm.h | 2 ++ arch/x86/kvm/svm.c | 13 +++++++++++++ arch/x86/kvm/x86.c | 2 +- 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h index 8e5ee99551f..a0877cac7b9 100644 --- a/arch/x86/kvm/kvm_svm.h +++ b/arch/x86/kvm/kvm_svm.h @@ -41,6 +41,8 @@ struct vcpu_svm { unsigned long host_dr7; u32 *msrpm; + struct vmcb *hsave; + u64 hsave_msr; }; #endif diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 79cc06bfe57..59aaff1c959 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -626,6 +626,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) struct vcpu_svm *svm; struct page *page; struct page *msrpm_pages; + struct page *hsave_page; int err; svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); @@ -651,6 +652,11 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) svm->msrpm = page_address(msrpm_pages); svm_vcpu_init_msrpm(svm->msrpm); + hsave_page = alloc_page(GFP_KERNEL); + if (!hsave_page) + goto uninit; + svm->hsave = page_address(hsave_page); + svm->vmcb = page_address(page); clear_page(svm->vmcb); svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; @@ -680,6 +686,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT)); __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); + __free_page(virt_to_page(svm->hsave)); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, svm); } @@ -1377,6 +1384,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) case MSR_IA32_LASTINTTOIP: *data = svm->vmcb->save.last_excp_to; break; + case MSR_VM_HSAVE_PA: + *data = svm->hsave_msr; + break; default: return kvm_get_msr_common(vcpu, ecx, data); } @@ -1470,6 +1480,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) */ pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", ecx, data); + break; + case MSR_VM_HSAVE_PA: + svm->hsave_msr = data; break; default: return kvm_set_msr_common(vcpu, ecx, data); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 758b7a155ae..99165a961f0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -456,7 +456,7 @@ static u32 msrs_to_save[] = { MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, - MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT + MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA }; static unsigned num_msrs_to_save; -- cgit v1.2.3-70-g09d2 From 5542675baa7e62ca4d18278c8758b6a4ec410639 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 25 Nov 2008 20:17:06 +0100 Subject: KVM: SVM: Add VMLOAD and VMSAVE handlers This implements the VMLOAD and VMSAVE instructions, that usually surround the VMRUN instructions. Both instructions load / restore the same elements, so we only need to implement them once. v2 fixes CPL checking and replaces memcpy by assignments v3 makes use of the new permission checking Acked-by: Joerg Roedel Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 59aaff1c959..a83c94eb577 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1243,6 +1243,62 @@ static int nested_svm_do(struct vcpu_svm *svm, return retval; } +static int nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) +{ + to_vmcb->save.fs = from_vmcb->save.fs; + to_vmcb->save.gs = from_vmcb->save.gs; + to_vmcb->save.tr = from_vmcb->save.tr; + to_vmcb->save.ldtr = from_vmcb->save.ldtr; + to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base; + to_vmcb->save.star = from_vmcb->save.star; + to_vmcb->save.lstar = from_vmcb->save.lstar; + to_vmcb->save.cstar = from_vmcb->save.cstar; + to_vmcb->save.sfmask = from_vmcb->save.sfmask; + to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs; + to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp; + to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; + + return 1; +} + +static int nested_svm_vmload(struct vcpu_svm *svm, void *nested_vmcb, + void *arg2, void *opaque) +{ + return nested_svm_vmloadsave((struct vmcb *)nested_vmcb, svm->vmcb); +} + +static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb, + void *arg2, void *opaque) +{ + return nested_svm_vmloadsave(svm->vmcb, (struct vmcb *)nested_vmcb); +} + +static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +{ + if (nested_svm_check_permissions(svm)) + return 1; + + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; + skip_emulated_instruction(&svm->vcpu); + + nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmload); + + return 1; +} + +static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +{ + if (nested_svm_check_permissions(svm)) + return 1; + + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; + skip_emulated_instruction(&svm->vcpu); + + nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmsave); + + return 1; +} + static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { if (nested_svm_check_permissions(svm)) @@ -1578,8 +1634,8 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, [SVM_EXIT_SHUTDOWN] = shutdown_interception, [SVM_EXIT_VMRUN] = invalid_op_interception, [SVM_EXIT_VMMCALL] = vmmcall_interception, - [SVM_EXIT_VMLOAD] = invalid_op_interception, - [SVM_EXIT_VMSAVE] = invalid_op_interception, + [SVM_EXIT_VMLOAD] = vmload_interception, + [SVM_EXIT_VMSAVE] = vmsave_interception, [SVM_EXIT_STGI] = stgi_interception, [SVM_EXIT_CLGI] = clgi_interception, [SVM_EXIT_SKINIT] = invalid_op_interception, -- cgit v1.2.3-70-g09d2 From 3d6368ef580a4dff012960834bba4e28d3c1430c Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 25 Nov 2008 20:17:07 +0100 Subject: KVM: SVM: Add VMRUN handler This patch implements VMRUN. VMRUN enters a virtual CPU and runs that in the same context as the normal guest CPU would run. So basically it is implemented the same way, a normal CPU would do it. We also prepare all intercepts that get OR'ed with the original intercepts, as we do not allow a level 2 guest to be intercepted less than the first level guest. v2 implements the following improvements: - fixes the CPL check - does not allocate iopm when not used - remembers the host's IF in the HIF bit in the hflags v3: - make use of the new permission checking - add support for V_INTR_MASKING_MASK v4: - use host page backed hsave v5: - remove IOPM merging code v6: - save cr4 so PAE l1 guests work v7: - return 0 on vmrun so we check the MSRs too - fix MSR check to use the correct variable Acked-by: Joerg Roedel Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 2 + arch/x86/kvm/kvm_svm.h | 8 ++ arch/x86/kvm/svm.c | 157 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 165 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 29e4157732d..53779309514 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -740,6 +740,8 @@ enum { }; #define HF_GIF_MASK (1 << 0) +#define HF_HIF_MASK (1 << 1) +#define HF_VINTR_MASK (1 << 2) /* * Hardware virtualization extension instructions may fault if a diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h index a0877cac7b9..91673413d8f 100644 --- a/arch/x86/kvm/kvm_svm.h +++ b/arch/x86/kvm/kvm_svm.h @@ -43,6 +43,14 @@ struct vcpu_svm { u32 *msrpm; struct vmcb *hsave; u64 hsave_msr; + + u64 nested_vmcb; + + /* These are the merged vectors */ + u32 *nested_msrpm; + + /* gpa pointers to the real vectors */ + u64 nested_vmcb_msrpm; }; #endif diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a83c94eb577..fad187cbfab 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -77,6 +77,11 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) return container_of(vcpu, struct vcpu_svm, vcpu); } +static inline bool is_nested(struct vcpu_svm *svm) +{ + return svm->nested_vmcb; +} + static unsigned long iopm_base; struct kvm_ldttss_desc { @@ -601,6 +606,7 @@ static void init_vmcb(struct vcpu_svm *svm) } force_new_asid(&svm->vcpu); + svm->nested_vmcb = 0; svm->vcpu.arch.hflags = HF_GIF_MASK; } @@ -627,6 +633,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) struct page *page; struct page *msrpm_pages; struct page *hsave_page; + struct page *nested_msrpm_pages; int err; svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); @@ -649,6 +656,11 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); if (!msrpm_pages) goto uninit; + + nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); + if (!nested_msrpm_pages) + goto uninit; + svm->msrpm = page_address(msrpm_pages); svm_vcpu_init_msrpm(svm->msrpm); @@ -657,6 +669,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) goto uninit; svm->hsave = page_address(hsave_page); + svm->nested_msrpm = page_address(nested_msrpm_pages); + svm->vmcb = page_address(page); clear_page(svm->vmcb); svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; @@ -687,6 +701,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT)); __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); __free_page(virt_to_page(svm->hsave)); + __free_pages(virt_to_page(svm->nested_msrpm), MSRPM_ALLOC_ORDER); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, svm); } @@ -1243,6 +1258,123 @@ static int nested_svm_do(struct vcpu_svm *svm, return retval; } + +static int nested_svm_vmrun_msrpm(struct vcpu_svm *svm, void *arg1, + void *arg2, void *opaque) +{ + int i; + u32 *nested_msrpm = (u32*)arg1; + for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++) + svm->nested_msrpm[i] = svm->msrpm[i] | nested_msrpm[i]; + svm->vmcb->control.msrpm_base_pa = __pa(svm->nested_msrpm); + + return 0; +} + +static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1, + void *arg2, void *opaque) +{ + struct vmcb *nested_vmcb = (struct vmcb *)arg1; + struct vmcb *hsave = svm->hsave; + + /* nested_vmcb is our indicator if nested SVM is activated */ + svm->nested_vmcb = svm->vmcb->save.rax; + + /* Clear internal status */ + svm->vcpu.arch.exception.pending = false; + + /* Save the old vmcb, so we don't need to pick what we save, but + can restore everything when a VMEXIT occurs */ + memcpy(hsave, svm->vmcb, sizeof(struct vmcb)); + /* We need to remember the original CR3 in the SPT case */ + if (!npt_enabled) + hsave->save.cr3 = svm->vcpu.arch.cr3; + hsave->save.cr4 = svm->vcpu.arch.cr4; + hsave->save.rip = svm->next_rip; + + if (svm->vmcb->save.rflags & X86_EFLAGS_IF) + svm->vcpu.arch.hflags |= HF_HIF_MASK; + else + svm->vcpu.arch.hflags &= ~HF_HIF_MASK; + + /* Load the nested guest state */ + svm->vmcb->save.es = nested_vmcb->save.es; + svm->vmcb->save.cs = nested_vmcb->save.cs; + svm->vmcb->save.ss = nested_vmcb->save.ss; + svm->vmcb->save.ds = nested_vmcb->save.ds; + svm->vmcb->save.gdtr = nested_vmcb->save.gdtr; + svm->vmcb->save.idtr = nested_vmcb->save.idtr; + svm->vmcb->save.rflags = nested_vmcb->save.rflags; + svm_set_efer(&svm->vcpu, nested_vmcb->save.efer); + svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0); + svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4); + if (npt_enabled) { + svm->vmcb->save.cr3 = nested_vmcb->save.cr3; + svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; + } else { + kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); + kvm_mmu_reset_context(&svm->vcpu); + } + svm->vmcb->save.cr2 = nested_vmcb->save.cr2; + kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax); + kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); + kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); + /* In case we don't even reach vcpu_run, the fields are not updated */ + svm->vmcb->save.rax = nested_vmcb->save.rax; + svm->vmcb->save.rsp = nested_vmcb->save.rsp; + svm->vmcb->save.rip = nested_vmcb->save.rip; + svm->vmcb->save.dr7 = nested_vmcb->save.dr7; + svm->vmcb->save.dr6 = nested_vmcb->save.dr6; + svm->vmcb->save.cpl = nested_vmcb->save.cpl; + + /* We don't want a nested guest to be more powerful than the guest, + so all intercepts are ORed */ + svm->vmcb->control.intercept_cr_read |= + nested_vmcb->control.intercept_cr_read; + svm->vmcb->control.intercept_cr_write |= + nested_vmcb->control.intercept_cr_write; + svm->vmcb->control.intercept_dr_read |= + nested_vmcb->control.intercept_dr_read; + svm->vmcb->control.intercept_dr_write |= + nested_vmcb->control.intercept_dr_write; + svm->vmcb->control.intercept_exceptions |= + nested_vmcb->control.intercept_exceptions; + + svm->vmcb->control.intercept |= nested_vmcb->control.intercept; + + svm->nested_vmcb_msrpm = nested_vmcb->control.msrpm_base_pa; + + force_new_asid(&svm->vcpu); + svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info; + svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err; + svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; + if (nested_vmcb->control.int_ctl & V_IRQ_MASK) { + nsvm_printk("nSVM Injecting Interrupt: 0x%x\n", + nested_vmcb->control.int_ctl); + } + if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) + svm->vcpu.arch.hflags |= HF_VINTR_MASK; + else + svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; + + nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n", + nested_vmcb->control.exit_int_info, + nested_vmcb->control.int_state); + + svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; + svm->vmcb->control.int_state = nested_vmcb->control.int_state; + svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; + if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID) + nsvm_printk("Injecting Event: 0x%x\n", + nested_vmcb->control.event_inj); + svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; + svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; + + svm->vcpu.arch.hflags |= HF_GIF_MASK; + + return 0; +} + static int nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) { to_vmcb->save.fs = from_vmcb->save.fs; @@ -1299,6 +1431,26 @@ static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } +static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +{ + nsvm_printk("VMrun\n"); + if (nested_svm_check_permissions(svm)) + return 1; + + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; + skip_emulated_instruction(&svm->vcpu); + + if (nested_svm_do(svm, svm->vmcb->save.rax, 0, + NULL, nested_svm_vmrun)) + return 1; + + if (nested_svm_do(svm, svm->nested_vmcb_msrpm, 0, + NULL, nested_svm_vmrun_msrpm)) + return 1; + + return 1; +} + static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { if (nested_svm_check_permissions(svm)) @@ -1632,7 +1784,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, [SVM_EXIT_MSR] = msr_interception, [SVM_EXIT_TASK_SWITCH] = task_switch_interception, [SVM_EXIT_SHUTDOWN] = shutdown_interception, - [SVM_EXIT_VMRUN] = invalid_op_interception, + [SVM_EXIT_VMRUN] = vmrun_interception, [SVM_EXIT_VMMCALL] = vmmcall_interception, [SVM_EXIT_VMLOAD] = vmload_interception, [SVM_EXIT_VMSAVE] = vmsave_interception, @@ -1939,7 +2091,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) svm->host_cr2 = kvm_read_cr2(); svm->host_dr6 = read_dr6(); svm->host_dr7 = read_dr7(); - svm->vmcb->save.cr2 = vcpu->arch.cr2; + if (!is_nested(svm)) + svm->vmcb->save.cr2 = vcpu->arch.cr2; /* required for live migration with NPT */ if (npt_enabled) svm->vmcb->save.cr3 = vcpu->arch.cr3; -- cgit v1.2.3-70-g09d2 From cf74a78b229d07f77416d2fe1f029f183a8a31df Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 25 Nov 2008 20:17:08 +0100 Subject: KVM: SVM: Add VMEXIT handler and intercepts This adds the #VMEXIT intercept, so we return to the level 1 guest when something happens in the level 2 guest that should return to the level 1 guest. v2 implements HIF handling and cleans up exception interception v3 adds support for V_INTR_MASKING_MASK v4 uses the host page hsave v5 removes IOPM merging code v6 moves mmu code out of the atomic section Acked-by: Joerg Roedel Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 293 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 293 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index fad187cbfab..4cb2920b152 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -72,6 +72,13 @@ module_param(npt, int, S_IRUGO); static void kvm_reput_irq(struct vcpu_svm *svm); static void svm_flush_tlb(struct kvm_vcpu *vcpu); +static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override); +static int nested_svm_vmexit(struct vcpu_svm *svm); +static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb, + void *arg2, void *opaque); +static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, + bool has_error_code, u32 error_code); + static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) { return container_of(vcpu, struct vcpu_svm, vcpu); @@ -221,6 +228,11 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, { struct vcpu_svm *svm = to_svm(vcpu); + /* If we are within a nested VM we'd better #VMEXIT and let the + guest handle the exception */ + if (nested_svm_check_exception(svm, nr, has_error_code, error_code)) + return; + svm->vmcb->control.event_inj = nr | SVM_EVTINJ_VALID | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) @@ -1198,6 +1210,46 @@ static int nested_svm_check_permissions(struct vcpu_svm *svm) return 0; } +static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, + bool has_error_code, u32 error_code) +{ + if (is_nested(svm)) { + svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; + svm->vmcb->control.exit_code_hi = 0; + svm->vmcb->control.exit_info_1 = error_code; + svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; + if (nested_svm_exit_handled(svm, false)) { + nsvm_printk("VMexit -> EXCP 0x%x\n", nr); + + nested_svm_vmexit(svm); + return 1; + } + } + + return 0; +} + +static inline int nested_svm_intr(struct vcpu_svm *svm) +{ + if (is_nested(svm)) { + if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) + return 0; + + if (!(svm->vcpu.arch.hflags & HF_HIF_MASK)) + return 0; + + svm->vmcb->control.exit_code = SVM_EXIT_INTR; + + if (nested_svm_exit_handled(svm, false)) { + nsvm_printk("VMexit -> INTR\n"); + nested_svm_vmexit(svm); + return 1; + } + } + + return 0; +} + static struct page *nested_svm_get_page(struct vcpu_svm *svm, u64 gpa) { struct page *page; @@ -1258,6 +1310,228 @@ static int nested_svm_do(struct vcpu_svm *svm, return retval; } +static int nested_svm_exit_handled_real(struct vcpu_svm *svm, + void *arg1, + void *arg2, + void *opaque) +{ + struct vmcb *nested_vmcb = (struct vmcb *)arg1; + bool kvm_overrides = *(bool *)opaque; + u32 exit_code = svm->vmcb->control.exit_code; + + if (kvm_overrides) { + switch (exit_code) { + case SVM_EXIT_INTR: + case SVM_EXIT_NMI: + return 0; + /* For now we are always handling NPFs when using them */ + case SVM_EXIT_NPF: + if (npt_enabled) + return 0; + break; + /* When we're shadowing, trap PFs */ + case SVM_EXIT_EXCP_BASE + PF_VECTOR: + if (!npt_enabled) + return 0; + break; + default: + break; + } + } + + switch (exit_code) { + case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: { + u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0); + if (nested_vmcb->control.intercept_cr_read & cr_bits) + return 1; + break; + } + case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR8: { + u32 cr_bits = 1 << (exit_code - SVM_EXIT_WRITE_CR0); + if (nested_vmcb->control.intercept_cr_write & cr_bits) + return 1; + break; + } + case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR7: { + u32 dr_bits = 1 << (exit_code - SVM_EXIT_READ_DR0); + if (nested_vmcb->control.intercept_dr_read & dr_bits) + return 1; + break; + } + case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR7: { + u32 dr_bits = 1 << (exit_code - SVM_EXIT_WRITE_DR0); + if (nested_vmcb->control.intercept_dr_write & dr_bits) + return 1; + break; + } + case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: { + u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); + if (nested_vmcb->control.intercept_exceptions & excp_bits) + return 1; + break; + } + default: { + u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); + nsvm_printk("exit code: 0x%x\n", exit_code); + if (nested_vmcb->control.intercept & exit_bits) + return 1; + } + } + + return 0; +} + +static int nested_svm_exit_handled_msr(struct vcpu_svm *svm, + void *arg1, void *arg2, + void *opaque) +{ + struct vmcb *nested_vmcb = (struct vmcb *)arg1; + u8 *msrpm = (u8 *)arg2; + u32 t0, t1; + u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; + u32 param = svm->vmcb->control.exit_info_1 & 1; + + if (!(nested_vmcb->control.intercept & (1ULL << INTERCEPT_MSR_PROT))) + return 0; + + switch(msr) { + case 0 ... 0x1fff: + t0 = (msr * 2) % 8; + t1 = msr / 8; + break; + case 0xc0000000 ... 0xc0001fff: + t0 = (8192 + msr - 0xc0000000) * 2; + t1 = (t0 / 8); + t0 %= 8; + break; + case 0xc0010000 ... 0xc0011fff: + t0 = (16384 + msr - 0xc0010000) * 2; + t1 = (t0 / 8); + t0 %= 8; + break; + default: + return 1; + break; + } + if (msrpm[t1] & ((1 << param) << t0)) + return 1; + + return 0; +} + +static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override) +{ + bool k = kvm_override; + + switch (svm->vmcb->control.exit_code) { + case SVM_EXIT_MSR: + return nested_svm_do(svm, svm->nested_vmcb, + svm->nested_vmcb_msrpm, NULL, + nested_svm_exit_handled_msr); + default: break; + } + + return nested_svm_do(svm, svm->nested_vmcb, 0, &k, + nested_svm_exit_handled_real); +} + +static int nested_svm_vmexit_real(struct vcpu_svm *svm, void *arg1, + void *arg2, void *opaque) +{ + struct vmcb *nested_vmcb = (struct vmcb *)arg1; + struct vmcb *hsave = svm->hsave; + u64 nested_save[] = { nested_vmcb->save.cr0, + nested_vmcb->save.cr3, + nested_vmcb->save.cr4, + nested_vmcb->save.efer, + nested_vmcb->control.intercept_cr_read, + nested_vmcb->control.intercept_cr_write, + nested_vmcb->control.intercept_dr_read, + nested_vmcb->control.intercept_dr_write, + nested_vmcb->control.intercept_exceptions, + nested_vmcb->control.intercept, + nested_vmcb->control.msrpm_base_pa, + nested_vmcb->control.iopm_base_pa, + nested_vmcb->control.tsc_offset }; + + /* Give the current vmcb to the guest */ + memcpy(nested_vmcb, svm->vmcb, sizeof(struct vmcb)); + nested_vmcb->save.cr0 = nested_save[0]; + if (!npt_enabled) + nested_vmcb->save.cr3 = nested_save[1]; + nested_vmcb->save.cr4 = nested_save[2]; + nested_vmcb->save.efer = nested_save[3]; + nested_vmcb->control.intercept_cr_read = nested_save[4]; + nested_vmcb->control.intercept_cr_write = nested_save[5]; + nested_vmcb->control.intercept_dr_read = nested_save[6]; + nested_vmcb->control.intercept_dr_write = nested_save[7]; + nested_vmcb->control.intercept_exceptions = nested_save[8]; + nested_vmcb->control.intercept = nested_save[9]; + nested_vmcb->control.msrpm_base_pa = nested_save[10]; + nested_vmcb->control.iopm_base_pa = nested_save[11]; + nested_vmcb->control.tsc_offset = nested_save[12]; + + /* We always set V_INTR_MASKING and remember the old value in hflags */ + if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) + nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK; + + if ((nested_vmcb->control.int_ctl & V_IRQ_MASK) && + (nested_vmcb->control.int_vector)) { + nsvm_printk("WARNING: IRQ 0x%x still enabled on #VMEXIT\n", + nested_vmcb->control.int_vector); + } + + /* Restore the original control entries */ + svm->vmcb->control = hsave->control; + + /* Kill any pending exceptions */ + if (svm->vcpu.arch.exception.pending == true) + nsvm_printk("WARNING: Pending Exception\n"); + svm->vcpu.arch.exception.pending = false; + + /* Restore selected save entries */ + svm->vmcb->save.es = hsave->save.es; + svm->vmcb->save.cs = hsave->save.cs; + svm->vmcb->save.ss = hsave->save.ss; + svm->vmcb->save.ds = hsave->save.ds; + svm->vmcb->save.gdtr = hsave->save.gdtr; + svm->vmcb->save.idtr = hsave->save.idtr; + svm->vmcb->save.rflags = hsave->save.rflags; + svm_set_efer(&svm->vcpu, hsave->save.efer); + svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE); + svm_set_cr4(&svm->vcpu, hsave->save.cr4); + if (npt_enabled) { + svm->vmcb->save.cr3 = hsave->save.cr3; + svm->vcpu.arch.cr3 = hsave->save.cr3; + } else { + kvm_set_cr3(&svm->vcpu, hsave->save.cr3); + } + kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax); + kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp); + kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip); + svm->vmcb->save.dr7 = 0; + svm->vmcb->save.cpl = 0; + svm->vmcb->control.exit_int_info = 0; + + svm->vcpu.arch.hflags &= ~HF_GIF_MASK; + /* Exit nested SVM mode */ + svm->nested_vmcb = 0; + + return 0; +} + +static int nested_svm_vmexit(struct vcpu_svm *svm) +{ + nsvm_printk("VMexit\n"); + if (nested_svm_do(svm, svm->nested_vmcb, 0, + NULL, nested_svm_vmexit_real)) + return 1; + + kvm_mmu_reset_context(&svm->vcpu); + kvm_mmu_load(&svm->vcpu); + + return 0; +} static int nested_svm_vmrun_msrpm(struct vcpu_svm *svm, void *arg1, void *arg2, void *opaque) @@ -1805,6 +2079,17 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip, (u32)((u64)svm->vmcb->save.rip >> 32), entryexit); + if (is_nested(svm)) { + nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n", + exit_code, svm->vmcb->control.exit_info_1, + svm->vmcb->control.exit_info_2, svm->vmcb->save.rip); + if (nested_svm_exit_handled(svm, true)) { + nested_svm_vmexit(svm); + nsvm_printk("-> #VMEXIT\n"); + return 1; + } + } + if (npt_enabled) { int mmu_reload = 0; if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) { @@ -1892,6 +2177,8 @@ static void svm_set_irq(struct kvm_vcpu *vcpu, int irq) { struct vcpu_svm *svm = to_svm(vcpu); + nested_svm_intr(svm); + svm_inject_irq(svm, irq); } @@ -1937,6 +2224,9 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu) if (!kvm_cpu_has_interrupt(vcpu)) goto out; + if (nested_svm_intr(svm)) + goto out; + if (!(svm->vcpu.arch.hflags & HF_GIF_MASK)) goto out; @@ -1989,6 +2279,9 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu, struct vcpu_svm *svm = to_svm(vcpu); struct vmcb_control_area *control = &svm->vmcb->control; + if (nested_svm_intr(svm)) + return; + svm->vcpu.arch.interrupt_window_open = (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && (svm->vmcb->save.rflags & X86_EFLAGS_IF) && -- cgit v1.2.3-70-g09d2 From eb6f302edff9da963e687bf6c15dcf88843b1c3b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 25 Nov 2008 20:17:09 +0100 Subject: KVM: SVM: Allow read access to MSR_VM_VR KVM tries to read the VM_CR MSR to find out if SVM was disabled by the BIOS. So implement read support for this MSR to make nested SVM running. Signed-off-by: Joerg Roedel Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 4cb2920b152..df5b4119266 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1869,6 +1869,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) case MSR_VM_HSAVE_PA: *data = svm->hsave_msr; break; + case MSR_VM_CR: + *data = 0; + break; default: return kvm_get_msr_common(vcpu, ecx, data); } -- cgit v1.2.3-70-g09d2 From 236de05553a7ef8f6940de8686ae9bf1272cd2cf Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 25 Nov 2008 20:17:10 +0100 Subject: KVM: SVM: Allow setting the SVME bit Normally setting the SVME bit in EFER is not allowed, as we did not support SVM. Not since we do, we should also allow enabling SVM mode. v2 comes as last patch, so we don't enable half-ready code v4 introduces a module option to enable SVM v6 warns that nesting is enabled Acked-by: Joerg Roedel Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index df5b4119266..0fbbde54eca 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -69,6 +69,9 @@ static int npt = 1; module_param(npt, int, S_IRUGO); +static int nested = 0; +module_param(nested, int, S_IRUGO); + static void kvm_reput_irq(struct vcpu_svm *svm); static void svm_flush_tlb(struct kvm_vcpu *vcpu); @@ -443,6 +446,11 @@ static __init int svm_hardware_setup(void) if (boot_cpu_has(X86_FEATURE_NX)) kvm_enable_efer_bits(EFER_NX); + if (nested) { + printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); + kvm_enable_efer_bits(EFER_SVME); + } + for_each_online_cpu(cpu) { r = svm_cpu_init(cpu); if (r) -- cgit v1.2.3-70-g09d2 From d80174745ba3938bc6abb8f95ed670ac0631a182 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 25 Nov 2008 20:17:11 +0100 Subject: KVM: SVM: Only allow setting of EFER_SVME when CPUID SVM is set Userspace has to tell the kernel module somehow that nested SVM should be used. The easiest way that doesn't break anything I could think of is to implement if (cpuid & svm) allow write to efer else deny write to efer Old userspaces mask the SVM capability bit, so they don't break. In order to find out that the SVM capability is set, I had to split the kvm_emulate_cpuid into a finding and an emulating part. (introduced in v6) Acked-by: Joerg Roedel Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 58 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 99165a961f0..b5e9932e0f6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -69,6 +69,8 @@ static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL; static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries); +struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, + u32 function, u32 index); struct kvm_x86_ops *kvm_x86_ops; EXPORT_SYMBOL_GPL(kvm_x86_ops); @@ -173,6 +175,7 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, u32 error_code) { ++vcpu->stat.pf_guest; + if (vcpu->arch.exception.pending) { if (vcpu->arch.exception.nr == PF_VECTOR) { printk(KERN_DEBUG "kvm: inject_page_fault:" @@ -442,6 +445,11 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_get_cr8); +static inline u32 bit(int bitno) +{ + return 1 << (bitno & 31); +} + /* * List of msr numbers which we expose to userspace through KVM_GET_MSRS * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. @@ -481,6 +489,17 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) return; } + if (efer & EFER_SVME) { + struct kvm_cpuid_entry2 *feat; + + feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); + if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { + printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n"); + kvm_inject_gp(vcpu, 0); + return; + } + } + kvm_x86_ops->set_efer(vcpu, efer); efer &= ~EFER_LMA; @@ -1181,11 +1200,6 @@ out: return r; } -static inline u32 bit(int bitno) -{ - return 1 << (bitno & 31); -} - static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, u32 index) { @@ -1228,7 +1242,8 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, const u32 kvm_supported_word3_x86_features = bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16); const u32 kvm_supported_word6_x86_features = - bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY); + bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY) | + bit(X86_FEATURE_SVM); /* all func 2 cpuid_count() should be called on the same cpu */ get_cpu(); @@ -2832,20 +2847,15 @@ static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e, return 1; } -void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) +struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, + u32 function, u32 index) { int i; - u32 function, index; - struct kvm_cpuid_entry2 *e, *best; + struct kvm_cpuid_entry2 *best = NULL; - function = kvm_register_read(vcpu, VCPU_REGS_RAX); - index = kvm_register_read(vcpu, VCPU_REGS_RCX); - kvm_register_write(vcpu, VCPU_REGS_RAX, 0); - kvm_register_write(vcpu, VCPU_REGS_RBX, 0); - kvm_register_write(vcpu, VCPU_REGS_RCX, 0); - kvm_register_write(vcpu, VCPU_REGS_RDX, 0); - best = NULL; for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { + struct kvm_cpuid_entry2 *e; + e = &vcpu->arch.cpuid_entries[i]; if (is_matching_cpuid_entry(e, function, index)) { if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) @@ -2860,6 +2870,22 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) if (!best || e->function > best->function) best = e; } + + return best; +} + +void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) +{ + u32 function, index; + struct kvm_cpuid_entry2 *best; + + function = kvm_register_read(vcpu, VCPU_REGS_RAX); + index = kvm_register_read(vcpu, VCPU_REGS_RCX); + kvm_register_write(vcpu, VCPU_REGS_RAX, 0); + kvm_register_write(vcpu, VCPU_REGS_RBX, 0); + kvm_register_write(vcpu, VCPU_REGS_RCX, 0); + kvm_register_write(vcpu, VCPU_REGS_RDX, 0); + best = kvm_find_cpuid_entry(vcpu, function, index); if (best) { kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax); kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx); -- cgit v1.2.3-70-g09d2 From 8ab2d2e231062814bd89bba2d6d92563190aa2bb Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 15 Dec 2008 13:52:10 +0100 Subject: KVM: VMX: Support for injecting software exceptions VMX differentiates between processor and software generated exceptions when injecting them into the guest. Extend vmx_queue_exception accordingly (and refactor related constants) so that we can use this service reliably for the new guest debugging framework. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/include/asm/vmx.h | 3 ++- arch/x86/kvm/vmx.c | 35 ++++++++++++++++++++--------------- 2 files changed, 22 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index d0238e6151d..32159f034ef 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -270,8 +270,9 @@ enum vmcs_field { #define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ #define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ -#define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */ +#define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */ #define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ +#define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */ /* GUEST_INTERRUPTIBILITY_INFO flags. */ #define GUEST_INTR_STATE_STI 0x00000001 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7611af57682..1d974c1eaa7 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -189,21 +189,21 @@ static inline int is_page_fault(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == - (INTR_TYPE_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); + (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); } static inline int is_no_device(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == - (INTR_TYPE_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); + (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); } static inline int is_invalid_opcode(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == - (INTR_TYPE_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); + (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); } static inline int is_external_interrupt(u32 intr_info) @@ -747,29 +747,33 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, bool has_error_code, u32 error_code) { struct vcpu_vmx *vmx = to_vmx(vcpu); + u32 intr_info = nr | INTR_INFO_VALID_MASK; - if (has_error_code) + if (has_error_code) { vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); + intr_info |= INTR_INFO_DELIVER_CODE_MASK; + } if (vcpu->arch.rmode.active) { vmx->rmode.irq.pending = true; vmx->rmode.irq.vector = nr; vmx->rmode.irq.rip = kvm_rip_read(vcpu); - if (nr == BP_VECTOR) + if (nr == BP_VECTOR || nr == OF_VECTOR) vmx->rmode.irq.rip++; - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, - nr | INTR_TYPE_SOFT_INTR - | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0) - | INTR_INFO_VALID_MASK); + intr_info |= INTR_TYPE_SOFT_INTR; + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); return; } - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, - nr | INTR_TYPE_EXCEPTION - | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0) - | INTR_INFO_VALID_MASK); + if (nr == BP_VECTOR || nr == OF_VECTOR) { + vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); + intr_info |= INTR_TYPE_SOFT_EXCEPTION; + } else + intr_info |= INTR_TYPE_HARD_EXCEPTION; + + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); } static bool vmx_exception_injected(struct kvm_vcpu *vcpu) @@ -2650,7 +2654,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) == - (INTR_TYPE_EXCEPTION | 1)) { + (INTR_TYPE_HARD_EXCEPTION | 1)) { kvm_run->exit_reason = KVM_EXIT_DEBUG; return 0; } @@ -3238,7 +3242,8 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) vmx->vcpu.arch.nmi_injected = false; } kvm_clear_exception_queue(&vmx->vcpu); - if (idtv_info_valid && type == INTR_TYPE_EXCEPTION) { + if (idtv_info_valid && (type == INTR_TYPE_HARD_EXCEPTION || + type == INTR_TYPE_SOFT_EXCEPTION)) { if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { error = vmcs_read32(IDT_VECTORING_ERROR_CODE); kvm_queue_exception_e(&vmx->vcpu, vector, error); -- cgit v1.2.3-70-g09d2 From d0bfb940ecabf0b44fb1fd80d8d60594e569e5ec Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 15 Dec 2008 13:52:10 +0100 Subject: KVM: New guest debug interface This rips out the support for KVM_DEBUG_GUEST and introduces a new IOCTL instead: KVM_SET_GUEST_DEBUG. The IOCTL payload consists of a generic part, controlling the "main switch" and the single-step feature. The arch specific part adds an x86 interface for intercepting both types of debug exceptions separately and re-injecting them when the host was not interested. Moveover, the foundation for guest debugging via debug registers is layed. To signal breakpoint events properly back to userland, an arch-specific data block is now returned along KVM_EXIT_DEBUG. For x86, the arch block contains the PC, the debug exception, and relevant debug registers to tell debug events properly apart. The availability of this new interface is signaled by KVM_CAP_SET_GUEST_DEBUG. Empty stubs for not yet supported archs are provided. Note that both SVM and VTX are supported, but only the latter was tested yet. Based on the experience with all those VTX corner case, I would be fairly surprised if SVM will work out of the box. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/ia64/include/asm/kvm.h | 7 ++++ arch/ia64/kvm/kvm-ia64.c | 4 +- arch/powerpc/include/asm/kvm.h | 7 ++++ arch/powerpc/kvm/powerpc.c | 4 +- arch/s390/include/asm/kvm.h | 7 ++++ arch/s390/kvm/kvm-s390.c | 4 +- arch/x86/include/asm/kvm.h | 18 ++++++++ arch/x86/include/asm/kvm_host.h | 9 +--- arch/x86/kvm/svm.c | 50 +++++++++++++++++++++- arch/x86/kvm/vmx.c | 93 ++++++++++++++++------------------------- arch/x86/kvm/x86.c | 14 ++++--- include/linux/kvm.h | 51 +++++++++++++++------- include/linux/kvm_host.h | 6 +-- virt/kvm/kvm_main.c | 6 +-- 14 files changed, 179 insertions(+), 101 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h index bfa86b6af7c..be3fdb89121 100644 --- a/arch/ia64/include/asm/kvm.h +++ b/arch/ia64/include/asm/kvm.h @@ -214,4 +214,11 @@ struct kvm_sregs { struct kvm_fpu { }; +struct kvm_debug_exit_arch { +}; + +/* for KVM_SET_GUEST_DEBUG */ +struct kvm_guest_debug_arch { +}; + #endif diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 28f982045f2..de47467a0e6 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1303,8 +1303,8 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return -EINVAL; } -int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, - struct kvm_debug_guest *dbg) +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) { return -EINVAL; } diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index f993e4198d5..755f1b1948c 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -52,4 +52,11 @@ struct kvm_fpu { __u64 fpr[32]; }; +struct kvm_debug_exit_arch { +}; + +/* for KVM_SET_GUEST_DEBUG */ +struct kvm_guest_debug_arch { +}; + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 5f81256287f..7c2ad4017d6 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -240,8 +240,8 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) kvmppc_core_vcpu_put(vcpu); } -int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, - struct kvm_debug_guest *dbg) +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) { int i; diff --git a/arch/s390/include/asm/kvm.h b/arch/s390/include/asm/kvm.h index e1f54654e3a..0b2f829f6d5 100644 --- a/arch/s390/include/asm/kvm.h +++ b/arch/s390/include/asm/kvm.h @@ -42,4 +42,11 @@ struct kvm_fpu { __u64 fprs[16]; }; +struct kvm_debug_exit_arch { +}; + +/* for KVM_SET_GUEST_DEBUG */ +struct kvm_guest_debug_arch { +}; + #endif diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 0d33893e1e8..cbfe91e1012 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -422,8 +422,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, return -EINVAL; /* not implemented yet */ } -int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, - struct kvm_debug_guest *dbg) +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) { return -EINVAL; /* not implemented yet */ } diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 886c9402ec4..32eb96c7ca2 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -212,6 +212,24 @@ struct kvm_pit_channel_state { __s64 count_load_time; }; +struct kvm_debug_exit_arch { + __u32 exception; + __u32 pad; + __u64 pc; + __u64 dr6; + __u64 dr7; +}; + +#define KVM_GUESTDBG_USE_SW_BP 0x00010000 +#define KVM_GUESTDBG_USE_HW_BP 0x00020000 +#define KVM_GUESTDBG_INJECT_DB 0x00040000 +#define KVM_GUESTDBG_INJECT_BP 0x00080000 + +/* for KVM_SET_GUEST_DEBUG */ +struct kvm_guest_debug_arch { + __u64 debugreg[8]; +}; + struct kvm_pit_state { struct kvm_pit_channel_state channels[3]; }; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 53779309514..c430cd580ee 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -135,12 +135,6 @@ enum { #define KVM_NR_MEM_OBJS 40 -struct kvm_guest_debug { - int enabled; - unsigned long bp[4]; - int singlestep; -}; - /* * We don't want allocation failures within the mmu code, so we preallocate * enough memory for a single page fault in a cache. @@ -448,8 +442,7 @@ struct kvm_x86_ops { void (*vcpu_put)(struct kvm_vcpu *vcpu); int (*set_guest_debug)(struct kvm_vcpu *vcpu, - struct kvm_debug_guest *dbg); - void (*guest_debug_pre)(struct kvm_vcpu *vcpu); + struct kvm_guest_debug *dbg); int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 0fbbde54eca..88d9062f454 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -968,9 +968,32 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, } -static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) +static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - return -EOPNOTSUPP; + int old_debug = vcpu->guest_debug; + struct vcpu_svm *svm = to_svm(vcpu); + + vcpu->guest_debug = dbg->control; + + svm->vmcb->control.intercept_exceptions &= + ~((1 << DB_VECTOR) | (1 << BP_VECTOR)); + if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { + if (vcpu->guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) + svm->vmcb->control.intercept_exceptions |= + 1 << DB_VECTOR; + if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) + svm->vmcb->control.intercept_exceptions |= + 1 << BP_VECTOR; + } else + vcpu->guest_debug = 0; + + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) + svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; + else if (old_debug & KVM_GUESTDBG_SINGLESTEP) + svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); + + return 0; } static int svm_get_irq(struct kvm_vcpu *vcpu) @@ -1094,6 +1117,27 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); } +static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +{ + if (!(svm->vcpu.guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { + kvm_queue_exception(&svm->vcpu, DB_VECTOR); + return 1; + } + kvm_run->exit_reason = KVM_EXIT_DEBUG; + kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; + kvm_run->debug.arch.exception = DB_VECTOR; + return 0; +} + +static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +{ + kvm_run->exit_reason = KVM_EXIT_DEBUG; + kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; + kvm_run->debug.arch.exception = BP_VECTOR; + return 0; +} + static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { int er; @@ -2050,6 +2094,8 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, [SVM_EXIT_WRITE_DR3] = emulate_on_interception, [SVM_EXIT_WRITE_DR5] = emulate_on_interception, [SVM_EXIT_WRITE_DR7] = emulate_on_interception, + [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, + [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1d974c1eaa7..f55690ddb3a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -480,8 +480,13 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) eb = (1u << PF_VECTOR) | (1u << UD_VECTOR); if (!vcpu->fpu_active) eb |= 1u << NM_VECTOR; - if (vcpu->guest_debug.enabled) - eb |= 1u << DB_VECTOR; + if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { + if (vcpu->guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) + eb |= 1u << DB_VECTOR; + if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) + eb |= 1u << BP_VECTOR; + } if (vcpu->arch.rmode.active) eb = ~0; if (vm_need_ept()) @@ -1003,40 +1008,23 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) } } -static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) +static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - unsigned long dr7 = 0x400; - int old_singlestep; - - old_singlestep = vcpu->guest_debug.singlestep; - - vcpu->guest_debug.enabled = dbg->enabled; - if (vcpu->guest_debug.enabled) { - int i; - - dr7 |= 0x200; /* exact */ - for (i = 0; i < 4; ++i) { - if (!dbg->breakpoints[i].enabled) - continue; - vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address; - dr7 |= 2 << (i*2); /* global enable */ - dr7 |= 0 << (i*4+16); /* execution breakpoint */ - } - - vcpu->guest_debug.singlestep = dbg->singlestep; - } else - vcpu->guest_debug.singlestep = 0; + int old_debug = vcpu->guest_debug; + unsigned long flags; - if (old_singlestep && !vcpu->guest_debug.singlestep) { - unsigned long flags; + vcpu->guest_debug = dbg->control; + if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)) + vcpu->guest_debug = 0; - flags = vmcs_readl(GUEST_RFLAGS); + flags = vmcs_readl(GUEST_RFLAGS); + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) + flags |= X86_EFLAGS_TF | X86_EFLAGS_RF; + else if (old_debug & KVM_GUESTDBG_SINGLESTEP) flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); - vmcs_writel(GUEST_RFLAGS, flags); - } + vmcs_writel(GUEST_RFLAGS, flags); update_exception_bitmap(vcpu); - vmcs_writel(GUEST_DR7, dr7); return 0; } @@ -2540,24 +2528,6 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) return 0; } -static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu) -{ - struct kvm_guest_debug *dbg = &vcpu->guest_debug; - - set_debugreg(dbg->bp[0], 0); - set_debugreg(dbg->bp[1], 1); - set_debugreg(dbg->bp[2], 2); - set_debugreg(dbg->bp[3], 3); - - if (dbg->singlestep) { - unsigned long flags; - - flags = vmcs_readl(GUEST_RFLAGS); - flags |= X86_EFLAGS_TF | X86_EFLAGS_RF; - vmcs_writel(GUEST_RFLAGS, flags); - } -} - static int handle_rmode_exception(struct kvm_vcpu *vcpu, int vec, u32 err_code) { @@ -2574,9 +2544,17 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, * the required debugging infrastructure rework. */ switch (vec) { - case DE_VECTOR: case DB_VECTOR: + if (vcpu->guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) + return 0; + kvm_queue_exception(vcpu, vec); + return 1; case BP_VECTOR: + if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) + return 0; + /* fall through */ + case DE_VECTOR: case OF_VECTOR: case BR_VECTOR: case UD_VECTOR: @@ -2593,7 +2571,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 intr_info, error_code; + u32 intr_info, ex_no, error_code; unsigned long cr2, rip; u32 vect_info; enum emulation_result er; @@ -2653,14 +2631,16 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; } - if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) == - (INTR_TYPE_HARD_EXCEPTION | 1)) { + ex_no = intr_info & INTR_INFO_VECTOR_MASK; + if (ex_no == DB_VECTOR || ex_no == BP_VECTOR) { kvm_run->exit_reason = KVM_EXIT_DEBUG; - return 0; + kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; + kvm_run->debug.arch.exception = ex_no; + } else { + kvm_run->exit_reason = KVM_EXIT_EXCEPTION; + kvm_run->ex.exception = ex_no; + kvm_run->ex.error_code = error_code; } - kvm_run->exit_reason = KVM_EXIT_EXCEPTION; - kvm_run->ex.exception = intr_info & INTR_INFO_VECTOR_MASK; - kvm_run->ex.error_code = error_code; return 0; } @@ -3600,7 +3580,6 @@ static struct kvm_x86_ops vmx_x86_ops = { .vcpu_put = vmx_vcpu_put, .set_guest_debug = set_guest_debug, - .guest_debug_pre = kvm_guest_debug_pre, .get_msr = vmx_get_msr, .set_msr = vmx_set_msr, .get_segment_base = vmx_get_segment_base, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b5e9932e0f6..e990d164b56 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3005,9 +3005,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) goto out; } - if (vcpu->guest_debug.enabled) - kvm_x86_ops->guest_debug_pre(vcpu); - vcpu->guest_mode = 1; /* * Make sure that guest_mode assignment won't happen after @@ -3218,7 +3215,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) /* * Don't leak debug flags in case they were set for guest debugging */ - if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep) + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); vcpu_put(vcpu); @@ -3837,8 +3834,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return 0; } -int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, - struct kvm_debug_guest *dbg) +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) { int r; @@ -3846,6 +3843,11 @@ int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, r = kvm_x86_ops->set_guest_debug(vcpu, dbg); + if (dbg->control & KVM_GUESTDBG_INJECT_DB) + kvm_queue_exception(vcpu, DB_VECTOR); + else if (dbg->control & KVM_GUESTDBG_INJECT_BP) + kvm_queue_exception(vcpu, BP_VECTOR); + vcpu_put(vcpu); return r; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 0424326f167..429a2ce202f 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -126,6 +126,7 @@ struct kvm_run { __u64 data_offset; /* relative to kvm_run start */ } io; struct { + struct kvm_debug_exit_arch arch; } debug; /* KVM_EXIT_MMIO */ struct { @@ -217,21 +218,6 @@ struct kvm_interrupt { __u32 irq; }; -struct kvm_breakpoint { - __u32 enabled; - __u32 padding; - __u64 address; -}; - -/* for KVM_DEBUG_GUEST */ -struct kvm_debug_guest { - /* int */ - __u32 enabled; - __u32 pad; - struct kvm_breakpoint breakpoints[4]; - __u32 singlestep; -}; - /* for KVM_GET_DIRTY_LOG */ struct kvm_dirty_log { __u32 slot; @@ -292,6 +278,17 @@ struct kvm_s390_interrupt { __u64 parm64; }; +/* for KVM_SET_GUEST_DEBUG */ + +#define KVM_GUESTDBG_ENABLE 0x00000001 +#define KVM_GUESTDBG_SINGLESTEP 0x00000002 + +struct kvm_guest_debug { + __u32 control; + __u32 pad; + struct kvm_guest_debug_arch arch; +}; + #define KVM_TRC_SHIFT 16 /* * kvm trace categories @@ -396,6 +393,7 @@ struct kvm_trace_rec { #ifdef __KVM_HAVE_USER_NMI #define KVM_CAP_USER_NMI 22 #endif +#define KVM_CAP_SET_GUEST_DEBUG 23 /* * ioctls for VM fds @@ -440,7 +438,8 @@ struct kvm_trace_rec { #define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs) #define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation) #define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt) -#define KVM_DEBUG_GUEST _IOW(KVMIO, 0x87, struct kvm_debug_guest) +/* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */ +#define KVM_DEBUG_GUEST __KVM_DEPRECATED_DEBUG_GUEST #define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs) #define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs) #define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid) @@ -469,6 +468,26 @@ struct kvm_trace_rec { #define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) /* Available with KVM_CAP_NMI */ #define KVM_NMI _IO(KVMIO, 0x9a) +/* Available with KVM_CAP_SET_GUEST_DEBUG */ +#define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug) + +/* + * Deprecated interfaces + */ +struct kvm_breakpoint { + __u32 enabled; + __u32 padding; + __u64 address; +}; + +struct kvm_debug_guest { + __u32 enabled; + __u32 pad; + struct kvm_breakpoint breakpoints[4]; + __u32 singlestep; +}; + +#define __KVM_DEPRECATED_DEBUG_GUEST _IOW(KVMIO, 0x87, struct kvm_debug_guest) #define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) #define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index bf6f703642f..e92212f970d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -73,7 +73,7 @@ struct kvm_vcpu { struct kvm_run *run; int guest_mode; unsigned long requests; - struct kvm_guest_debug guest_debug; + unsigned long guest_debug; int fpu_active; int guest_fpu_loaded; wait_queue_head_t wq; @@ -255,8 +255,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state); int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state); -int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, - struct kvm_debug_guest *dbg); +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg); int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run); int kvm_arch_init(void *opaque); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 29a667ce35b..f83ef9c7e89 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1755,13 +1755,13 @@ out_free2: r = 0; break; } - case KVM_DEBUG_GUEST: { - struct kvm_debug_guest dbg; + case KVM_SET_GUEST_DEBUG: { + struct kvm_guest_debug dbg; r = -EFAULT; if (copy_from_user(&dbg, argp, sizeof dbg)) goto out; - r = kvm_arch_vcpu_ioctl_debug_guest(vcpu, &dbg); + r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg); if (r) goto out; r = 0; -- cgit v1.2.3-70-g09d2 From 55934c0bd3bb232a9cf902820dd63ad18ed65e49 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 15 Dec 2008 13:52:10 +0100 Subject: KVM: VMX: Allow single-stepping when uninterruptible When single-stepping over STI and MOV SS, we must clear the corresponding interruptibility bits in the guest state. Otherwise vmentry fails as it then expects bit 14 (BS) in pending debug exceptions being set, but that's not correct for the guest debugging case. Note that clearing those bits is safe as we check for interruptibility based on the original state and do not inject interrupts or NMIs if guest interruptibility was blocked. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f55690ddb3a..c776868ffe4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2478,6 +2478,11 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu, { vmx_update_window_states(vcpu); + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) + vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_STI | + GUEST_INTR_STATE_MOV_SS); + if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { if (vcpu->arch.interrupt.pending) { enable_nmi_window(vcpu); @@ -3244,6 +3249,11 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) vmx_update_window_states(vcpu); + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) + vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_STI | + GUEST_INTR_STATE_MOV_SS); + if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { if (vcpu->arch.interrupt.pending) { enable_nmi_window(vcpu); -- cgit v1.2.3-70-g09d2 From 42dbaa5a057736bf8b5c22aa42dbe975bf1080e5 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 15 Dec 2008 13:52:10 +0100 Subject: KVM: x86: Virtualize debug registers So far KVM only had basic x86 debug register support, once introduced to realize guest debugging that way. The guest itself was not able to use those registers. This patch now adds (almost) full support for guest self-debugging via hardware registers. It refactors the code, moving generic parts out of SVM (VMX was already cleaned up by the KVM_SET_GUEST_DEBUG patches), and it ensures that the registers are properly switched between host and guest. This patch also prepares debug register usage by the host. The latter will (once wired-up by the following patch) allow for hardware breakpoints/watchpoints in guest code. If this is enabled, the guest will only see faked debug registers without functionality, but with content reflecting the guest's modifications. Tested on Intel only, but SVM /should/ work as well, but who knows... Known limitations: Trapping on tss switch won't work - most probably on Intel. Credits also go to Joerg Roedel - I used his once posted debugging series as platform for this patch. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 22 ++++++++ arch/x86/include/asm/vmx.h | 2 +- arch/x86/kvm/kvm_svm.h | 6 --- arch/x86/kvm/svm.c | 116 +++++++++++++++------------------------- arch/x86/kvm/vmx.c | 114 +++++++++++++++++++++++++++++++++------ arch/x86/kvm/x86.c | 29 ++++++++++ 6 files changed, 193 insertions(+), 96 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c430cd580ee..0a4dab25a91 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -135,6 +135,19 @@ enum { #define KVM_NR_MEM_OBJS 40 +#define KVM_NR_DB_REGS 4 + +#define DR6_BD (1 << 13) +#define DR6_BS (1 << 14) +#define DR6_FIXED_1 0xffff0ff0 +#define DR6_VOLATILE 0x0000e00f + +#define DR7_BP_EN_MASK 0x000000ff +#define DR7_GE (1 << 9) +#define DR7_GD (1 << 13) +#define DR7_FIXED_1 0x00000400 +#define DR7_VOLATILE 0xffff23ff + /* * We don't want allocation failures within the mmu code, so we preallocate * enough memory for a single page fault in a cache. @@ -334,6 +347,15 @@ struct kvm_vcpu_arch { struct mtrr_state_type mtrr_state; u32 pat; + + int switch_db_regs; + unsigned long host_db[KVM_NR_DB_REGS]; + unsigned long host_dr6; + unsigned long host_dr7; + unsigned long db[KVM_NR_DB_REGS]; + unsigned long dr6; + unsigned long dr7; + unsigned long eff_db[KVM_NR_DB_REGS]; }; struct kvm_mem_alias { diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 32159f034ef..498f944010b 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -312,7 +312,7 @@ enum vmcs_field { #define DEBUG_REG_ACCESS_TYPE 0x10 /* 4, direction of access */ #define TYPE_MOV_TO_DR (0 << 4) #define TYPE_MOV_FROM_DR (1 << 4) -#define DEBUG_REG_ACCESS_REG 0xf00 /* 11:8, general purpose reg. */ +#define DEBUG_REG_ACCESS_REG(eq) (((eq) >> 8) & 0xf) /* 11:8, general purpose reg. */ /* segment AR */ diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h index 91673413d8f..ed66e4c078d 100644 --- a/arch/x86/kvm/kvm_svm.h +++ b/arch/x86/kvm/kvm_svm.h @@ -18,7 +18,6 @@ static const u32 host_save_user_msrs[] = { }; #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) -#define NUM_DB_REGS 4 struct kvm_vcpu; @@ -29,16 +28,11 @@ struct vcpu_svm { struct svm_cpu_data *svm_data; uint64_t asid_generation; - unsigned long db_regs[NUM_DB_REGS]; - u64 next_rip; u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; u64 host_gs_base; unsigned long host_cr2; - unsigned long host_db_regs[NUM_DB_REGS]; - unsigned long host_dr6; - unsigned long host_dr7; u32 *msrpm; struct vmcb *hsave; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 88d9062f454..815f50e425a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -38,9 +38,6 @@ MODULE_LICENSE("GPL"); #define IOPM_ALLOC_ORDER 2 #define MSRPM_ALLOC_ORDER 1 -#define DR7_GD_MASK (1 << 13) -#define DR6_BD_MASK (1 << 13) - #define SEG_TYPE_LDT 2 #define SEG_TYPE_BUSY_TSS16 3 @@ -181,32 +178,6 @@ static inline void kvm_write_cr2(unsigned long val) asm volatile ("mov %0, %%cr2" :: "r" (val)); } -static inline unsigned long read_dr6(void) -{ - unsigned long dr6; - - asm volatile ("mov %%dr6, %0" : "=r" (dr6)); - return dr6; -} - -static inline void write_dr6(unsigned long val) -{ - asm volatile ("mov %0, %%dr6" :: "r" (val)); -} - -static inline unsigned long read_dr7(void) -{ - unsigned long dr7; - - asm volatile ("mov %%dr7, %0" : "=r" (dr7)); - return dr7; -} - -static inline void write_dr7(unsigned long val) -{ - asm volatile ("mov %0, %%dr7" :: "r" (val)); -} - static inline void force_new_asid(struct kvm_vcpu *vcpu) { to_svm(vcpu)->asid_generation--; @@ -695,7 +666,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) clear_page(svm->vmcb); svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; svm->asid_generation = 0; - memset(svm->db_regs, 0, sizeof(svm->db_regs)); init_vmcb(svm); fx_init(&svm->vcpu); @@ -1035,7 +1005,29 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data) static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr) { - unsigned long val = to_svm(vcpu)->db_regs[dr]; + struct vcpu_svm *svm = to_svm(vcpu); + unsigned long val; + + switch (dr) { + case 0 ... 3: + val = vcpu->arch.db[dr]; + break; + case 6: + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) + val = vcpu->arch.dr6; + else + val = svm->vmcb->save.dr6; + break; + case 7: + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) + val = vcpu->arch.dr7; + else + val = svm->vmcb->save.dr7; + break; + default: + val = 0; + } + KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler); return val; } @@ -1045,33 +1037,40 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, { struct vcpu_svm *svm = to_svm(vcpu); - *exception = 0; + KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)value, handler); - if (svm->vmcb->save.dr7 & DR7_GD_MASK) { - svm->vmcb->save.dr7 &= ~DR7_GD_MASK; - svm->vmcb->save.dr6 |= DR6_BD_MASK; - *exception = DB_VECTOR; - return; - } + *exception = 0; switch (dr) { case 0 ... 3: - svm->db_regs[dr] = value; + vcpu->arch.db[dr] = value; + if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) + vcpu->arch.eff_db[dr] = value; return; case 4 ... 5: - if (vcpu->arch.cr4 & X86_CR4_DE) { + if (vcpu->arch.cr4 & X86_CR4_DE) *exception = UD_VECTOR; + return; + case 6: + if (value & 0xffffffff00000000ULL) { + *exception = GP_VECTOR; return; } - case 7: { - if (value & ~((1ULL << 32) - 1)) { + vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1; + return; + case 7: + if (value & 0xffffffff00000000ULL) { *exception = GP_VECTOR; return; } - svm->vmcb->save.dr7 = value; + vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1; + if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { + svm->vmcb->save.dr7 = vcpu->arch.dr7; + vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK); + } return; - } default: + /* FIXME: Possible case? */ printk(KERN_DEBUG "%s: unexpected dr %u\n", __func__, dr); *exception = UD_VECTOR; @@ -2365,22 +2364,6 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) return 0; } -static void save_db_regs(unsigned long *db_regs) -{ - asm volatile ("mov %%dr0, %0" : "=r"(db_regs[0])); - asm volatile ("mov %%dr1, %0" : "=r"(db_regs[1])); - asm volatile ("mov %%dr2, %0" : "=r"(db_regs[2])); - asm volatile ("mov %%dr3, %0" : "=r"(db_regs[3])); -} - -static void load_db_regs(unsigned long *db_regs) -{ - asm volatile ("mov %0, %%dr0" : : "r"(db_regs[0])); - asm volatile ("mov %0, %%dr1" : : "r"(db_regs[1])); - asm volatile ("mov %0, %%dr2" : : "r"(db_regs[2])); - asm volatile ("mov %0, %%dr3" : : "r"(db_regs[3])); -} - static void svm_flush_tlb(struct kvm_vcpu *vcpu) { force_new_asid(vcpu); @@ -2439,20 +2422,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) gs_selector = kvm_read_gs(); ldt_selector = kvm_read_ldt(); svm->host_cr2 = kvm_read_cr2(); - svm->host_dr6 = read_dr6(); - svm->host_dr7 = read_dr7(); if (!is_nested(svm)) svm->vmcb->save.cr2 = vcpu->arch.cr2; /* required for live migration with NPT */ if (npt_enabled) svm->vmcb->save.cr3 = vcpu->arch.cr3; - if (svm->vmcb->save.dr7 & 0xff) { - write_dr7(0); - save_db_regs(svm->host_db_regs); - load_db_regs(svm->db_regs); - } - clgi(); local_irq_enable(); @@ -2528,16 +2503,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) #endif ); - if ((svm->vmcb->save.dr7 & 0xff)) - load_db_regs(svm->host_db_regs); - vcpu->arch.cr2 = svm->vmcb->save.cr2; vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; - write_dr6(svm->host_dr6); - write_dr7(svm->host_dr7); kvm_write_cr2(svm->host_cr2); kvm_load_fs(fs_selector); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c776868ffe4..0989776ee7b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2311,7 +2311,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) kvm_rip_write(vcpu, 0); kvm_register_write(vcpu, VCPU_REGS_RSP, 0); - /* todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 */ vmcs_writel(GUEST_DR7, 0x400); vmcs_writel(GUEST_GDTR_BASE, 0); @@ -2577,7 +2576,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct vcpu_vmx *vmx = to_vmx(vcpu); u32 intr_info, ex_no, error_code; - unsigned long cr2, rip; + unsigned long cr2, rip, dr6; u32 vect_info; enum emulation_result er; @@ -2637,14 +2636,28 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } ex_no = intr_info & INTR_INFO_VECTOR_MASK; - if (ex_no == DB_VECTOR || ex_no == BP_VECTOR) { + switch (ex_no) { + case DB_VECTOR: + dr6 = vmcs_readl(EXIT_QUALIFICATION); + if (!(vcpu->guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { + vcpu->arch.dr6 = dr6 | DR6_FIXED_1; + kvm_queue_exception(vcpu, DB_VECTOR); + return 1; + } + kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1; + kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); + /* fall through */ + case BP_VECTOR: kvm_run->exit_reason = KVM_EXIT_DEBUG; kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; kvm_run->debug.arch.exception = ex_no; - } else { + break; + default: kvm_run->exit_reason = KVM_EXIT_EXCEPTION; kvm_run->ex.exception = ex_no; kvm_run->ex.error_code = error_code; + break; } return 0; } @@ -2784,21 +2797,44 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) unsigned long val; int dr, reg; - /* - * FIXME: this code assumes the host is debugging the guest. - * need to deal with guest debugging itself too. - */ + dr = vmcs_readl(GUEST_DR7); + if (dr & DR7_GD) { + /* + * As the vm-exit takes precedence over the debug trap, we + * need to emulate the latter, either for the host or the + * guest debugging itself. + */ + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { + kvm_run->debug.arch.dr6 = vcpu->arch.dr6; + kvm_run->debug.arch.dr7 = dr; + kvm_run->debug.arch.pc = + vmcs_readl(GUEST_CS_BASE) + + vmcs_readl(GUEST_RIP); + kvm_run->debug.arch.exception = DB_VECTOR; + kvm_run->exit_reason = KVM_EXIT_DEBUG; + return 0; + } else { + vcpu->arch.dr7 &= ~DR7_GD; + vcpu->arch.dr6 |= DR6_BD; + vmcs_writel(GUEST_DR7, vcpu->arch.dr7); + kvm_queue_exception(vcpu, DB_VECTOR); + return 1; + } + } + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - dr = exit_qualification & 7; - reg = (exit_qualification >> 8) & 15; - if (exit_qualification & 16) { - /* mov from dr */ + dr = exit_qualification & DEBUG_REG_ACCESS_NUM; + reg = DEBUG_REG_ACCESS_REG(exit_qualification); + if (exit_qualification & TYPE_MOV_FROM_DR) { switch (dr) { + case 0 ... 3: + val = vcpu->arch.db[dr]; + break; case 6: - val = 0xffff0ff0; + val = vcpu->arch.dr6; break; case 7: - val = 0x400; + val = vcpu->arch.dr7; break; default: val = 0; @@ -2806,7 +2842,38 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_register_write(vcpu, reg, val); KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler); } else { - /* mov to dr */ + val = vcpu->arch.regs[reg]; + switch (dr) { + case 0 ... 3: + vcpu->arch.db[dr] = val; + if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) + vcpu->arch.eff_db[dr] = val; + break; + case 4 ... 5: + if (vcpu->arch.cr4 & X86_CR4_DE) + kvm_queue_exception(vcpu, UD_VECTOR); + break; + case 6: + if (val & 0xffffffff00000000ULL) { + kvm_queue_exception(vcpu, GP_VECTOR); + break; + } + vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; + break; + case 7: + if (val & 0xffffffff00000000ULL) { + kvm_queue_exception(vcpu, GP_VECTOR); + break; + } + vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; + if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { + vmcs_writel(GUEST_DR7, vcpu->arch.dr7); + vcpu->arch.switch_db_regs = + (val & DR7_BP_EN_MASK); + } + break; + } + KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)val, handler); } skip_emulated_instruction(vcpu); return 1; @@ -2957,7 +3024,18 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } tss_selector = exit_qualification; - return kvm_task_switch(vcpu, tss_selector, reason); + if (!kvm_task_switch(vcpu, tss_selector, reason)) + return 0; + + /* clear all local breakpoint enable flags */ + vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55); + + /* + * TODO: What about debug traps on tss switch? + * Are we supposed to inject them and update dr6? + */ + + return 1; } static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) @@ -3342,6 +3420,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) */ vmcs_writel(HOST_CR0, read_cr0()); + set_debugreg(vcpu->arch.dr6, 6); + asm( /* Store host registers */ "push %%"R"dx; push %%"R"bp;" @@ -3436,6 +3516,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); vcpu->arch.regs_dirty = 0; + get_debugreg(vcpu->arch.dr6, 6); + vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); if (vmx->rmode.irq.pending) fixup_rmode_irq(vmx); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e990d164b56..300bc4d42ab 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3025,10 +3025,34 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_guest_enter(); + get_debugreg(vcpu->arch.host_dr6, 6); + get_debugreg(vcpu->arch.host_dr7, 7); + if (unlikely(vcpu->arch.switch_db_regs)) { + get_debugreg(vcpu->arch.host_db[0], 0); + get_debugreg(vcpu->arch.host_db[1], 1); + get_debugreg(vcpu->arch.host_db[2], 2); + get_debugreg(vcpu->arch.host_db[3], 3); + + set_debugreg(0, 7); + set_debugreg(vcpu->arch.eff_db[0], 0); + set_debugreg(vcpu->arch.eff_db[1], 1); + set_debugreg(vcpu->arch.eff_db[2], 2); + set_debugreg(vcpu->arch.eff_db[3], 3); + } KVMTRACE_0D(VMENTRY, vcpu, entryexit); kvm_x86_ops->run(vcpu, kvm_run); + if (unlikely(vcpu->arch.switch_db_regs)) { + set_debugreg(0, 7); + set_debugreg(vcpu->arch.host_db[0], 0); + set_debugreg(vcpu->arch.host_db[1], 1); + set_debugreg(vcpu->arch.host_db[2], 2); + set_debugreg(vcpu->arch.host_db[3], 3); + } + set_debugreg(vcpu->arch.host_dr6, 6); + set_debugreg(vcpu->arch.host_dr7, 7); + vcpu->guest_mode = 0; local_irq_enable(); @@ -4035,6 +4059,11 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) vcpu->arch.nmi_pending = false; vcpu->arch.nmi_injected = false; + vcpu->arch.switch_db_regs = 0; + memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); + vcpu->arch.dr6 = DR6_FIXED_1; + vcpu->arch.dr7 = DR7_FIXED_1; + return kvm_x86_ops->vcpu_reset(vcpu); } -- cgit v1.2.3-70-g09d2 From ae675ef01cd86014acf8da5dee87876b71122495 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 15 Dec 2008 13:52:10 +0100 Subject: KVM: x86: Wire-up hardware breakpoints for guest debugging Add the remaining bits to make use of debug registers also for guest debugging, thus enabling the use of hardware breakpoints and watchpoints. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 5 +++++ arch/x86/kvm/vmx.c | 5 +++++ arch/x86/kvm/x86.c | 14 +++++++++++++- 3 files changed, 23 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 815f50e425a..b3a7a314d55 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -958,6 +958,11 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) } else vcpu->guest_debug = 0; + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) + svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; + else + svm->vmcb->save.dr7 = vcpu->arch.dr7; + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; else if (old_debug & KVM_GUESTDBG_SINGLESTEP) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0989776ee7b..cee81c9a665 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1017,6 +1017,11 @@ static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)) vcpu->guest_debug = 0; + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) + vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]); + else + vmcs_writel(GUEST_DR7, vcpu->arch.dr7); + flags = vmcs_readl(GUEST_RFLAGS); if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) flags |= X86_EFLAGS_TF | X86_EFLAGS_RF; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 300bc4d42ab..2477e87b2f8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3861,10 +3861,22 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - int r; + int i, r; vcpu_load(vcpu); + if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) == + (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) { + for (i = 0; i < KVM_NR_DB_REGS; ++i) + vcpu->arch.eff_db[i] = dbg->arch.debugreg[i]; + vcpu->arch.switch_db_regs = + (dbg->arch.debugreg[7] & DR7_BP_EN_MASK); + } else { + for (i = 0; i < KVM_NR_DB_REGS; i++) + vcpu->arch.eff_db[i] = vcpu->arch.db[i]; + vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); + } + r = kvm_x86_ops->set_guest_debug(vcpu, dbg); if (dbg->control & KVM_GUESTDBG_INJECT_DB) -- cgit v1.2.3-70-g09d2 From a770f6f28b1a9287189f3dc8333eb694d9a2f0ab Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 21 Dec 2008 19:20:09 +0200 Subject: KVM: MMU: Inherit a shadow page's guest level count from vcpu setup Instead of "calculating" it on every shadow page allocation, set it once when switching modes, and copy it when allocating pages. This doesn't buy us much, but sets up the stage for inheriting more information related to the mmu setup. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/mmu.c | 17 +++++++++++------ 2 files changed, 12 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0a4dab25a91..28f875f28f5 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -244,6 +244,7 @@ struct kvm_mmu { hpa_t root_hpa; int root_level; int shadow_root_level; + union kvm_mmu_page_role base_role; u64 *pae_root; }; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 2d4477c7147..f15023c11fe 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1204,8 +1204,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp; struct hlist_node *node, *tmp; - role.word = 0; - role.glevels = vcpu->arch.mmu.root_level; + role = vcpu->arch.mmu.base_role; role.level = level; role.metaphysical = metaphysical; role.access = access; @@ -2251,17 +2250,23 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) static int init_kvm_softmmu(struct kvm_vcpu *vcpu) { + int r; + ASSERT(vcpu); ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); if (!is_paging(vcpu)) - return nonpaging_init_context(vcpu); + r = nonpaging_init_context(vcpu); else if (is_long_mode(vcpu)) - return paging64_init_context(vcpu); + r = paging64_init_context(vcpu); else if (is_pae(vcpu)) - return paging32E_init_context(vcpu); + r = paging32E_init_context(vcpu); else - return paging32_init_context(vcpu); + r = paging32_init_context(vcpu); + + vcpu->arch.mmu.base_role.glevels = vcpu->arch.mmu.root_level; + + return r; } static int init_kvm_mmu(struct kvm_vcpu *vcpu) -- cgit v1.2.3-70-g09d2 From 2f0b3d60b2c43aef7cd10169c425c052169c622a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 21 Dec 2008 19:27:36 +0200 Subject: KVM: MMU: Segregate mmu pages created with different cr4.pge settings Don't allow a vcpu with cr4.pge cleared to use a shadow page created with cr4.pge set; this might cause a cr3 switch not to sync ptes that have the global bit set (the global bit has no effect if !cr4.pge). This can only occur on smp with different cr4.pge settings for different vcpus (since a cr4 change will resync the shadow ptes), but there's no cost to being correct here. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 1 + 2 files changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 28f875f28f5..c2a01d0513f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -183,6 +183,7 @@ union kvm_mmu_page_role { unsigned metaphysical:1; unsigned access:3; unsigned invalid:1; + unsigned cr4_pge:1; }; }; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2477e87b2f8..873602b5edf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -364,6 +364,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) } kvm_x86_ops->set_cr4(vcpu, cr4); vcpu->arch.cr4 = cr4; + vcpu->arch.mmu.base_role.cr4_pge = !!(cr4 & X86_CR4_PGE); kvm_mmu_sync_global(vcpu); kvm_mmu_reset_context(vcpu); } -- cgit v1.2.3-70-g09d2 From e2078318042569682d0496cfd7bd962a766e82b1 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 21 Dec 2008 19:36:59 +0200 Subject: KVM: MMU: Initialize a shadow page's global attribute from cr4.pge If cr4.pge is cleared, we ought to treat any ptes in the page as non-global. This allows us to remove the check from set_spte(). Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f15023c11fe..5f03ec324e3 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -797,7 +797,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, ASSERT(is_empty_shadow_page(sp->spt)); bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); sp->multimapped = 0; - sp->global = 1; sp->parent_pte = parent_pte; --vcpu->kvm->arch.n_free_mmu_pages; return sp; @@ -1241,6 +1240,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word); sp->gfn = gfn; sp->role = role; + sp->global = role.cr4_pge; hlist_add_head(&sp->hash_link, bucket); if (!metaphysical) { if (rmap_write_protect(vcpu->kvm, gfn)) @@ -1668,8 +1668,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, u64 mt_mask = shadow_mt_mask; struct kvm_mmu_page *sp = page_header(__pa(shadow_pte)); - if (!(vcpu->arch.cr4 & X86_CR4_PGE)) - global = 0; if (!global && sp->global) { sp->global = 0; if (sp->unsync) { -- cgit v1.2.3-70-g09d2 From 2b3d2a206037b1471de6a6dc51427af034cfdb47 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 23 Dec 2008 19:46:01 +0200 Subject: KVM: Fix vmload and friends misinterpreted as lidt The AMD SVM instruction family all overload the 0f 01 /3 opcode, further multiplexing on the three r/m bits. But the code decided that anything that isn't a vmmcall must be an lidt (which shares the 0f 01 /3 opcode, for the case that mod = 3). Fix by aborting emulation if this isn't a vmmcall. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86_emulate.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index d174db7a337..54fb09889a8 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -1908,11 +1908,16 @@ twobyte_insn: c->dst.type = OP_NONE; break; case 3: /* lidt/vmmcall */ - if (c->modrm_mod == 3 && c->modrm_rm == 1) { - rc = kvm_fix_hypercall(ctxt->vcpu); - if (rc) - goto done; - kvm_emulate_hypercall(ctxt->vcpu); + if (c->modrm_mod == 3) { + switch (c->modrm_rm) { + case 1: + rc = kvm_fix_hypercall(ctxt->vcpu); + if (rc) + goto done; + break; + default: + goto cannot_emulate; + } } else { rc = read_descriptor(ctxt, ops, c->src.ptr, &size, &address, -- cgit v1.2.3-70-g09d2 From 2d11123a77e54d5cea262c958e8498f4a08bce3d Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 25 Dec 2008 14:39:47 +0200 Subject: KVM: MMU: Add for_each_shadow_entry(), a simpler alternative to walk_shadow() Using a for_each loop style removes the need to write callback and nasty casts. Implement the walk_shadow() using the for_each_shadow_entry(). Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 69 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 5f03ec324e3..c669f2af1d1 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -150,6 +150,20 @@ struct kvm_shadow_walk { u64 addr, u64 *spte, int level); }; +struct kvm_shadow_walk_iterator { + u64 addr; + hpa_t shadow_addr; + int level; + u64 *sptep; + unsigned index; +}; + +#define for_each_shadow_entry(_vcpu, _addr, _walker) \ + for (shadow_walk_init(&(_walker), _vcpu, _addr); \ + shadow_walk_okay(&(_walker)); \ + shadow_walk_next(&(_walker))) + + struct kvm_unsync_walk { int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk); }; @@ -1254,33 +1268,48 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, return sp; } +static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator, + struct kvm_vcpu *vcpu, u64 addr) +{ + iterator->addr = addr; + iterator->shadow_addr = vcpu->arch.mmu.root_hpa; + iterator->level = vcpu->arch.mmu.shadow_root_level; + if (iterator->level == PT32E_ROOT_LEVEL) { + iterator->shadow_addr + = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; + iterator->shadow_addr &= PT64_BASE_ADDR_MASK; + --iterator->level; + if (!iterator->shadow_addr) + iterator->level = 0; + } +} + +static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator) +{ + if (iterator->level < PT_PAGE_TABLE_LEVEL) + return false; + iterator->index = SHADOW_PT_INDEX(iterator->addr, iterator->level); + iterator->sptep = ((u64 *)__va(iterator->shadow_addr)) + iterator->index; + return true; +} + +static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator) +{ + iterator->shadow_addr = *iterator->sptep & PT64_BASE_ADDR_MASK; + --iterator->level; +} + static int walk_shadow(struct kvm_shadow_walk *walker, struct kvm_vcpu *vcpu, u64 addr) { - hpa_t shadow_addr; - int level; + struct kvm_shadow_walk_iterator iterator; int r; - u64 *sptep; - unsigned index; - - shadow_addr = vcpu->arch.mmu.root_hpa; - level = vcpu->arch.mmu.shadow_root_level; - if (level == PT32E_ROOT_LEVEL) { - shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; - shadow_addr &= PT64_BASE_ADDR_MASK; - if (!shadow_addr) - return 1; - --level; - } - while (level >= PT_PAGE_TABLE_LEVEL) { - index = SHADOW_PT_INDEX(addr, level); - sptep = ((u64 *)__va(shadow_addr)) + index; - r = walker->entry(walker, vcpu, addr, sptep, level); + for_each_shadow_entry(vcpu, addr, iterator) { + r = walker->entry(walker, vcpu, addr, + iterator.sptep, iterator.level); if (r) return r; - shadow_addr = *sptep & PT64_BASE_ADDR_MASK; - --level; } return 0; } -- cgit v1.2.3-70-g09d2 From 9f652d21c3f887075a33abae85bf53fec64e67b1 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 25 Dec 2008 14:54:25 +0200 Subject: KVM: MMU: Use for_each_shadow_entry() in __direct_map() Eliminating a callback and a useless structure. Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 83 +++++++++++++++++++----------------------------------- 1 file changed, 29 insertions(+), 54 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index c669f2af1d1..a25e1adb5cf 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1846,67 +1846,42 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) { } -struct direct_shadow_walk { - struct kvm_shadow_walk walker; - pfn_t pfn; - int write; - int largepage; - int pt_write; -}; - -static int direct_map_entry(struct kvm_shadow_walk *_walk, - struct kvm_vcpu *vcpu, - u64 addr, u64 *sptep, int level) +static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, + int largepage, gfn_t gfn, pfn_t pfn) { - struct direct_shadow_walk *walk = - container_of(_walk, struct direct_shadow_walk, walker); + struct kvm_shadow_walk_iterator iterator; struct kvm_mmu_page *sp; + int pt_write = 0; gfn_t pseudo_gfn; - gfn_t gfn = addr >> PAGE_SHIFT; - - if (level == PT_PAGE_TABLE_LEVEL - || (walk->largepage && level == PT_DIRECTORY_LEVEL)) { - mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL, - 0, walk->write, 1, &walk->pt_write, - walk->largepage, 0, gfn, walk->pfn, false); - ++vcpu->stat.pf_fixed; - return 1; - } - if (*sptep == shadow_trap_nonpresent_pte) { - pseudo_gfn = (addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT; - sp = kvm_mmu_get_page(vcpu, pseudo_gfn, (gva_t)addr, level - 1, - 1, ACC_ALL, sptep); - if (!sp) { - pgprintk("nonpaging_map: ENOMEM\n"); - kvm_release_pfn_clean(walk->pfn); - return -ENOMEM; + for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { + if (iterator.level == PT_PAGE_TABLE_LEVEL + || (largepage && iterator.level == PT_DIRECTORY_LEVEL)) { + mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, + 0, write, 1, &pt_write, + largepage, 0, gfn, pfn, false); + ++vcpu->stat.pf_fixed; + break; } - set_shadow_pte(sptep, - __pa(sp->spt) - | PT_PRESENT_MASK | PT_WRITABLE_MASK - | shadow_user_mask | shadow_x_mask); - } - return 0; -} + if (*iterator.sptep == shadow_trap_nonpresent_pte) { + pseudo_gfn = (iterator.addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT; + sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, + iterator.level - 1, + 1, ACC_ALL, iterator.sptep); + if (!sp) { + pgprintk("nonpaging_map: ENOMEM\n"); + kvm_release_pfn_clean(pfn); + return -ENOMEM; + } -static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, - int largepage, gfn_t gfn, pfn_t pfn) -{ - int r; - struct direct_shadow_walk walker = { - .walker = { .entry = direct_map_entry, }, - .pfn = pfn, - .largepage = largepage, - .write = write, - .pt_write = 0, - }; - - r = walk_shadow(&walker.walker, vcpu, gfn << PAGE_SHIFT); - if (r < 0) - return r; - return walker.pt_write; + set_shadow_pte(iterator.sptep, + __pa(sp->spt) + | PT_PRESENT_MASK | PT_WRITABLE_MASK + | shadow_user_mask | shadow_x_mask); + } + } + return pt_write; } static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) -- cgit v1.2.3-70-g09d2 From e7a04c99b54ad9acb98a56113ec3163bc1039e13 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 25 Dec 2008 15:10:50 +0200 Subject: KVM: MMU: Replace walk_shadow() by for_each_shadow_entry() in fetch() Effectively reverting to the pre walk_shadow() version -- but now with the reusable for_each(). Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 128 ++++++++++++++++++++------------------------- 1 file changed, 58 insertions(+), 70 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 9fd78b6e17a..69c7e3311b8 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -283,91 +283,79 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, /* * Fetch a shadow pte for a specific level in the paging hierarchy. */ -static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw, - struct kvm_vcpu *vcpu, u64 addr, - u64 *sptep, int level) +static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, + struct guest_walker *gw, + int user_fault, int write_fault, int largepage, + int *ptwrite, pfn_t pfn) { - struct shadow_walker *sw = - container_of(_sw, struct shadow_walker, walker); - struct guest_walker *gw = sw->guest_walker; unsigned access = gw->pt_access; struct kvm_mmu_page *shadow_page; - u64 spte; + u64 spte, *sptep; int metaphysical; gfn_t table_gfn; int r; + int level; pt_element_t curr_pte; + struct kvm_shadow_walk_iterator iterator; - if (level == PT_PAGE_TABLE_LEVEL - || (sw->largepage && level == PT_DIRECTORY_LEVEL)) { - mmu_set_spte(vcpu, sptep, access, gw->pte_access & access, - sw->user_fault, sw->write_fault, - gw->ptes[gw->level-1] & PT_DIRTY_MASK, - sw->ptwrite, sw->largepage, - gw->ptes[gw->level-1] & PT_GLOBAL_MASK, - gw->gfn, sw->pfn, false); - sw->sptep = sptep; - return 1; - } - - if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) - return 0; - - if (is_large_pte(*sptep)) { - set_shadow_pte(sptep, shadow_trap_nonpresent_pte); - kvm_flush_remote_tlbs(vcpu->kvm); - rmap_remove(vcpu->kvm, sptep); - } + if (!is_present_pte(gw->ptes[gw->level - 1])) + return NULL; - if (level == PT_DIRECTORY_LEVEL && gw->level == PT_DIRECTORY_LEVEL) { - metaphysical = 1; - if (!is_dirty_pte(gw->ptes[level - 1])) - access &= ~ACC_WRITE_MASK; - table_gfn = gpte_to_gfn(gw->ptes[level - 1]); - } else { - metaphysical = 0; - table_gfn = gw->table_gfn[level - 2]; - } - shadow_page = kvm_mmu_get_page(vcpu, table_gfn, (gva_t)addr, level-1, - metaphysical, access, sptep); - if (!metaphysical) { - r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2], - &curr_pte, sizeof(curr_pte)); - if (r || curr_pte != gw->ptes[level - 2]) { - kvm_mmu_put_page(shadow_page, sptep); - kvm_release_pfn_clean(sw->pfn); - sw->sptep = NULL; - return 1; + for_each_shadow_entry(vcpu, addr, iterator) { + level = iterator.level; + sptep = iterator.sptep; + if (level == PT_PAGE_TABLE_LEVEL + || (largepage && level == PT_DIRECTORY_LEVEL)) { + mmu_set_spte(vcpu, sptep, access, + gw->pte_access & access, + user_fault, write_fault, + gw->ptes[gw->level-1] & PT_DIRTY_MASK, + ptwrite, largepage, + gw->ptes[gw->level-1] & PT_GLOBAL_MASK, + gw->gfn, pfn, false); + break; } - } - spte = __pa(shadow_page->spt) | PT_PRESENT_MASK | PT_ACCESSED_MASK - | PT_WRITABLE_MASK | PT_USER_MASK; - *sptep = spte; - return 0; -} + if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) + continue; -static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, - struct guest_walker *guest_walker, - int user_fault, int write_fault, int largepage, - int *ptwrite, pfn_t pfn) -{ - struct shadow_walker walker = { - .walker = { .entry = FNAME(shadow_walk_entry), }, - .guest_walker = guest_walker, - .user_fault = user_fault, - .write_fault = write_fault, - .largepage = largepage, - .ptwrite = ptwrite, - .pfn = pfn, - }; + if (is_large_pte(*sptep)) { + set_shadow_pte(sptep, shadow_trap_nonpresent_pte); + kvm_flush_remote_tlbs(vcpu->kvm); + rmap_remove(vcpu->kvm, sptep); + } - if (!is_present_pte(guest_walker->ptes[guest_walker->level - 1])) - return NULL; + if (level == PT_DIRECTORY_LEVEL + && gw->level == PT_DIRECTORY_LEVEL) { + metaphysical = 1; + if (!is_dirty_pte(gw->ptes[level - 1])) + access &= ~ACC_WRITE_MASK; + table_gfn = gpte_to_gfn(gw->ptes[level - 1]); + } else { + metaphysical = 0; + table_gfn = gw->table_gfn[level - 2]; + } + shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, + metaphysical, access, sptep); + if (!metaphysical) { + r = kvm_read_guest_atomic(vcpu->kvm, + gw->pte_gpa[level - 2], + &curr_pte, sizeof(curr_pte)); + if (r || curr_pte != gw->ptes[level - 2]) { + kvm_mmu_put_page(shadow_page, sptep); + kvm_release_pfn_clean(pfn); + sptep = NULL; + break; + } + } - walk_shadow(&walker.walker, vcpu, addr); + spte = __pa(shadow_page->spt) + | PT_PRESENT_MASK | PT_ACCESSED_MASK + | PT_WRITABLE_MASK | PT_USER_MASK; + *sptep = spte; + } - return walker.sptep; + return sptep; } /* -- cgit v1.2.3-70-g09d2 From a461930bc3cece021f8f89a80dcc1d0691a92b52 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 25 Dec 2008 15:19:00 +0200 Subject: KVM: MMU: Replace walk_shadow() by for_each_shadow_entry() in invlpg() Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 81 ++++++++++++++++++---------------------------- 1 file changed, 32 insertions(+), 49 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 69c7e3311b8..46b68f941f6 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -25,7 +25,6 @@ #if PTTYPE == 64 #define pt_element_t u64 #define guest_walker guest_walker64 - #define shadow_walker shadow_walker64 #define FNAME(name) paging##64_##name #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK #define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK @@ -42,7 +41,6 @@ #elif PTTYPE == 32 #define pt_element_t u32 #define guest_walker guest_walker32 - #define shadow_walker shadow_walker32 #define FNAME(name) paging##32_##name #define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK #define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK @@ -73,18 +71,6 @@ struct guest_walker { u32 error_code; }; -struct shadow_walker { - struct kvm_shadow_walk walker; - struct guest_walker *guest_walker; - int user_fault; - int write_fault; - int largepage; - int *ptwrite; - pfn_t pfn; - u64 *sptep; - gpa_t pte_gpa; -}; - static gfn_t gpte_to_gfn(pt_element_t gpte) { return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; @@ -453,54 +439,52 @@ out_unlock: return 0; } -static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw, - struct kvm_vcpu *vcpu, u64 addr, - u64 *sptep, int level) +static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) { - struct shadow_walker *sw = - container_of(_sw, struct shadow_walker, walker); + struct kvm_shadow_walk_iterator iterator; + pt_element_t gpte; + gpa_t pte_gpa = -1; + int level; + u64 *sptep; + + spin_lock(&vcpu->kvm->mmu_lock); - /* FIXME: properly handle invlpg on large guest pages */ - if (level == PT_PAGE_TABLE_LEVEL || - ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) { - struct kvm_mmu_page *sp = page_header(__pa(sptep)); + for_each_shadow_entry(vcpu, gva, iterator) { + level = iterator.level; + sptep = iterator.sptep; - sw->pte_gpa = (sp->gfn << PAGE_SHIFT); - sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); + /* FIXME: properly handle invlpg on large guest pages */ + if (level == PT_PAGE_TABLE_LEVEL || + ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) { + struct kvm_mmu_page *sp = page_header(__pa(sptep)); - if (is_shadow_present_pte(*sptep)) { - rmap_remove(vcpu->kvm, sptep); - if (is_large_pte(*sptep)) - --vcpu->kvm->stat.lpages; + pte_gpa = (sp->gfn << PAGE_SHIFT); + pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); + + if (is_shadow_present_pte(*sptep)) { + rmap_remove(vcpu->kvm, sptep); + if (is_large_pte(*sptep)) + --vcpu->kvm->stat.lpages; + } + set_shadow_pte(sptep, shadow_trap_nonpresent_pte); + break; } - set_shadow_pte(sptep, shadow_trap_nonpresent_pte); - return 1; - } - if (!is_shadow_present_pte(*sptep)) - return 1; - return 0; -} -static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) -{ - pt_element_t gpte; - struct shadow_walker walker = { - .walker = { .entry = FNAME(shadow_invlpg_entry), }, - .pte_gpa = -1, - }; + if (!is_shadow_present_pte(*sptep)) + break; + } - spin_lock(&vcpu->kvm->mmu_lock); - walk_shadow(&walker.walker, vcpu, gva); spin_unlock(&vcpu->kvm->mmu_lock); - if (walker.pte_gpa == -1) + + if (pte_gpa == -1) return; - if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte, + if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, sizeof(pt_element_t))) return; if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) { if (mmu_topup_memory_caches(vcpu)) return; - kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte, + kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte, sizeof(pt_element_t), 0); } } @@ -607,7 +591,6 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) #undef pt_element_t #undef guest_walker -#undef shadow_walker #undef FNAME #undef PT_BASE_ADDR_MASK #undef PT_INDEX -- cgit v1.2.3-70-g09d2 From e8c4a4e8a7cc047dfb3c26b2cbc8599ad3460364 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 25 Dec 2008 15:20:07 +0200 Subject: KVM: MMU: Drop walk_shadow() No longer used. Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a25e1adb5cf..15850809b55 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -145,11 +145,6 @@ struct kvm_rmap_desc { struct kvm_rmap_desc *more; }; -struct kvm_shadow_walk { - int (*entry)(struct kvm_shadow_walk *walk, struct kvm_vcpu *vcpu, - u64 addr, u64 *spte, int level); -}; - struct kvm_shadow_walk_iterator { u64 addr; hpa_t shadow_addr; @@ -1299,21 +1294,6 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator) --iterator->level; } -static int walk_shadow(struct kvm_shadow_walk *walker, - struct kvm_vcpu *vcpu, u64 addr) -{ - struct kvm_shadow_walk_iterator iterator; - int r; - - for_each_shadow_entry(vcpu, addr, iterator) { - r = walker->entry(walker, vcpu, addr, - iterator.sptep, iterator.level); - if (r) - return r; - } - return 0; -} - static void kvm_mmu_page_unlink_children(struct kvm *kvm, struct kvm_mmu_page *sp) { -- cgit v1.2.3-70-g09d2 From 53f658b3c33616a4997ee254311b335e59063289 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 11 Dec 2008 20:45:05 +0100 Subject: KVM: VMX: initialize TSC offset relative to vm creation time VMX initializes the TSC offset for each vcpu at different times, and also reinitializes it for vcpus other than 0 on APIC SIPI message. This bug causes the TSC's to appear unsynchronized in the guest, even if the host is good. Older Linux kernels don't handle the situation very well, so gettimeofday is likely to go backwards in time: http://www.mail-archive.com/kvm@vger.kernel.org/msg02955.html http://sourceforge.net/tracker/index.php?func=detail&aid=2025534&group_id=180599&atid=893831 Fix it by initializating the offset of each vcpu relative to vm creation time, and moving it from vmx_vcpu_reset to vmx_vcpu_setup, out of the APIC MP init path. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx.c | 19 +++++++++++-------- arch/x86/kvm/x86.c | 2 ++ 3 files changed, 14 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c2a01d0513f..9efc446b5ac 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -398,6 +398,7 @@ struct kvm_arch{ unsigned long irq_sources_bitmap; unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; + u64 vm_init_tsc; }; struct kvm_vm_stat { diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index cee81c9a665..3312047664a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -865,11 +865,8 @@ static u64 guest_read_tsc(void) * writes 'guest_tsc' into guest's timestamp counter "register" * guest_tsc = host_tsc + tsc_offset ==> tsc_offset = guest_tsc - host_tsc */ -static void guest_write_tsc(u64 guest_tsc) +static void guest_write_tsc(u64 guest_tsc, u64 host_tsc) { - u64 host_tsc; - - rdtscll(host_tsc); vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc); } @@ -934,6 +931,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct kvm_msr_entry *msr; + u64 host_tsc; int ret = 0; switch (msr_index) { @@ -959,7 +957,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) vmcs_writel(GUEST_SYSENTER_ESP, data); break; case MSR_IA32_TIME_STAMP_COUNTER: - guest_write_tsc(data); + rdtscll(host_tsc); + guest_write_tsc(data, host_tsc); break; case MSR_P6_PERFCTR0: case MSR_P6_PERFCTR1: @@ -2109,7 +2108,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) { u32 host_sysenter_cs, msr_low, msr_high; u32 junk; - u64 host_pat; + u64 host_pat, tsc_this, tsc_base; unsigned long a; struct descriptor_table dt; int i; @@ -2237,6 +2236,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); + tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc; + rdtscll(tsc_this); + if (tsc_this < vmx->vcpu.kvm->arch.vm_init_tsc) + tsc_base = tsc_this; + + guest_write_tsc(0, tsc_base); return 0; } @@ -2328,8 +2333,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); - guest_write_tsc(0); - /* Special registers */ vmcs_write64(GUEST_IA32_DEBUGCTL, 0); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 873602b5edf..3b2acfd72d7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4170,6 +4170,8 @@ struct kvm *kvm_arch_create_vm(void) /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); + rdtscll(kvm->arch.vm_init_tsc); + return kvm; } -- cgit v1.2.3-70-g09d2 From 77c2002e7c6f019f59a6f3cc5f8b16b41748dbe1 Mon Sep 17 00:00:00 2001 From: Izik Eidus Date: Mon, 29 Dec 2008 01:42:19 +0200 Subject: KVM: introduce kvm_read_guest_virt, kvm_write_guest_virt This commit change the name of emulator_read_std into kvm_read_guest_virt, and add new function name kvm_write_guest_virt that allow writing into a guest virtual address. Signed-off-by: Izik Eidus Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 4 --- arch/x86/kvm/x86.c | 56 ++++++++++++++++++++++++++++++----------- 2 files changed, 42 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9efc446b5ac..b74576aec19 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -609,10 +609,6 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu); void fx_init(struct kvm_vcpu *vcpu); -int emulator_read_std(unsigned long addr, - void *val, - unsigned int bytes, - struct kvm_vcpu *vcpu); int emulator_write_emulated(unsigned long addr, const void *val, unsigned int bytes, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3b2acfd72d7..67f91764e99 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1976,10 +1976,8 @@ static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, return dev; } -int emulator_read_std(unsigned long addr, - void *val, - unsigned int bytes, - struct kvm_vcpu *vcpu) +int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, + struct kvm_vcpu *vcpu) { void *data = val; int r = X86EMUL_CONTINUE; @@ -1987,27 +1985,57 @@ int emulator_read_std(unsigned long addr, while (bytes) { gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); unsigned offset = addr & (PAGE_SIZE-1); - unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset); + unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); int ret; if (gpa == UNMAPPED_GVA) { r = X86EMUL_PROPAGATE_FAULT; goto out; } - ret = kvm_read_guest(vcpu->kvm, gpa, data, tocopy); + ret = kvm_read_guest(vcpu->kvm, gpa, data, toread); if (ret < 0) { r = X86EMUL_UNHANDLEABLE; goto out; } - bytes -= tocopy; - data += tocopy; - addr += tocopy; + bytes -= toread; + data += toread; + addr += toread; } out: return r; } -EXPORT_SYMBOL_GPL(emulator_read_std); + +int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, + struct kvm_vcpu *vcpu) +{ + void *data = val; + int r = X86EMUL_CONTINUE; + + while (bytes) { + gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); + unsigned offset = addr & (PAGE_SIZE-1); + unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); + int ret; + + if (gpa == UNMAPPED_GVA) { + r = X86EMUL_PROPAGATE_FAULT; + goto out; + } + ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite); + if (ret < 0) { + r = X86EMUL_UNHANDLEABLE; + goto out; + } + + bytes -= towrite; + data += towrite; + addr += towrite; + } +out: + return r; +} + static int emulator_read_emulated(unsigned long addr, void *val, @@ -2029,8 +2057,8 @@ static int emulator_read_emulated(unsigned long addr, if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) goto mmio; - if (emulator_read_std(addr, val, bytes, vcpu) - == X86EMUL_CONTINUE) + if (kvm_read_guest_virt(addr, val, bytes, vcpu) + == X86EMUL_CONTINUE) return X86EMUL_CONTINUE; if (gpa == UNMAPPED_GVA) return X86EMUL_PROPAGATE_FAULT; @@ -2233,7 +2261,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); - emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu); + kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu); printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); @@ -2241,7 +2269,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); static struct x86_emulate_ops emulate_ops = { - .read_std = emulator_read_std, + .read_std = kvm_read_guest_virt, .read_emulated = emulator_read_emulated, .write_emulated = emulator_write_emulated, .cmpxchg_emulated = emulator_cmpxchg_emulated, -- cgit v1.2.3-70-g09d2 From 0f346074403bc109f9569f14b45cb09e83729032 Mon Sep 17 00:00:00 2001 From: Izik Eidus Date: Mon, 29 Dec 2008 01:42:20 +0200 Subject: KVM: remove the vmap usage vmap() on guest pages hides those pages from the Linux mm for an extended (userspace determined) amount of time. Get rid of it. Signed-off-by: Izik Eidus Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 62 ++++++++++------------------------------------- include/linux/kvm_types.h | 3 +-- 2 files changed, 14 insertions(+), 51 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 67f91764e99..2a4f3a69734 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2371,40 +2371,19 @@ int emulate_instruction(struct kvm_vcpu *vcpu, } EXPORT_SYMBOL_GPL(emulate_instruction); -static void free_pio_guest_pages(struct kvm_vcpu *vcpu) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(vcpu->arch.pio.guest_pages); ++i) - if (vcpu->arch.pio.guest_pages[i]) { - kvm_release_page_dirty(vcpu->arch.pio.guest_pages[i]); - vcpu->arch.pio.guest_pages[i] = NULL; - } -} - static int pio_copy_data(struct kvm_vcpu *vcpu) { void *p = vcpu->arch.pio_data; - void *q; + gva_t q = vcpu->arch.pio.guest_gva; unsigned bytes; - int nr_pages = vcpu->arch.pio.guest_pages[1] ? 2 : 1; + int ret; - q = vmap(vcpu->arch.pio.guest_pages, nr_pages, VM_READ|VM_WRITE, - PAGE_KERNEL); - if (!q) { - free_pio_guest_pages(vcpu); - return -ENOMEM; - } - q += vcpu->arch.pio.guest_page_offset; bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; if (vcpu->arch.pio.in) - memcpy(q, p, bytes); + ret = kvm_write_guest_virt(q, p, bytes, vcpu); else - memcpy(p, q, bytes); - q -= vcpu->arch.pio.guest_page_offset; - vunmap(q); - free_pio_guest_pages(vcpu); - return 0; + ret = kvm_read_guest_virt(q, p, bytes, vcpu); + return ret; } int complete_pio(struct kvm_vcpu *vcpu) @@ -2515,7 +2494,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, vcpu->arch.pio.in = in; vcpu->arch.pio.string = 0; vcpu->arch.pio.down = 0; - vcpu->arch.pio.guest_page_offset = 0; vcpu->arch.pio.rep = 0; if (vcpu->run->io.direction == KVM_EXIT_IO_IN) @@ -2543,9 +2521,7 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, gva_t address, int rep, unsigned port) { unsigned now, in_page; - int i, ret = 0; - int nr_pages = 1; - struct page *page; + int ret = 0; struct kvm_io_device *pio_dev; vcpu->run->exit_reason = KVM_EXIT_IO; @@ -2557,7 +2533,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, vcpu->arch.pio.in = in; vcpu->arch.pio.string = 1; vcpu->arch.pio.down = down; - vcpu->arch.pio.guest_page_offset = offset_in_page(address); vcpu->arch.pio.rep = rep; if (vcpu->run->io.direction == KVM_EXIT_IO_IN) @@ -2577,15 +2552,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, else in_page = offset_in_page(address) + size; now = min(count, (unsigned long)in_page / size); - if (!now) { - /* - * String I/O straddles page boundary. Pin two guest pages - * so that we satisfy atomicity constraints. Do just one - * transaction to avoid complexity. - */ - nr_pages = 2; + if (!now) now = 1; - } if (down) { /* * String I/O in reverse. Yuck. Kill the guest, fix later. @@ -2600,15 +2568,7 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count) kvm_x86_ops->skip_emulated_instruction(vcpu); - for (i = 0; i < nr_pages; ++i) { - page = gva_to_page(vcpu, address + i * PAGE_SIZE); - vcpu->arch.pio.guest_pages[i] = page; - if (!page) { - kvm_inject_gp(vcpu, 0); - free_pio_guest_pages(vcpu); - return 1; - } - } + vcpu->arch.pio.guest_gva = address; pio_dev = vcpu_find_pio_dev(vcpu, port, vcpu->arch.pio.cur_count, @@ -2616,7 +2576,11 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, if (!vcpu->arch.pio.in) { /* string PIO write */ ret = pio_copy_data(vcpu); - if (ret >= 0 && pio_dev) { + if (ret == X86EMUL_PROPAGATE_FAULT) { + kvm_inject_gp(vcpu, 0); + return 1; + } + if (ret == 0 && pio_dev) { pio_string_write(pio_dev, vcpu); complete_pio(vcpu); if (vcpu->arch.pio.count == 0) diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 9b6f395c962..5f4a18cae26 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -43,8 +43,7 @@ typedef hfn_t pfn_t; struct kvm_pio_request { unsigned long count; int cur_count; - struct page *guest_pages[2]; - unsigned guest_page_offset; + gva_t guest_gva; int in; int port; int size; -- cgit v1.2.3-70-g09d2 From 61a6bd672bda3b9468bf5895c1be085c4e481138 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 29 Dec 2008 17:32:28 +0200 Subject: KVM: Fallback support for MSR_VM_HSAVE_PA Since we advertise MSR_VM_HSAVE_PA, userspace will attempt to read it even on Intel. Implement fake support for this MSR to avoid the warnings. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2a4f3a69734..c3fbe8c55c1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -742,6 +742,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) break; case MSR_IA32_UCODE_REV: case MSR_IA32_UCODE_WRITE: + case MSR_VM_HSAVE_PA: break; case 0x200 ... 0x2ff: return set_msr_mtrr(vcpu, msr, data); @@ -863,6 +864,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case MSR_IA32_LASTBRANCHTOIP: case MSR_IA32_LASTINTFROMIP: case MSR_IA32_LASTINTTOIP: + case MSR_VM_HSAVE_PA: data = 0; break; case MSR_MTRRcap: -- cgit v1.2.3-70-g09d2 From 52d939a0bf44081bc9f69b4fbdc9e7f416df27c7 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 30 Dec 2008 15:55:06 -0200 Subject: KVM: PIT: provide an option to disable interrupt reinjection Certain clocks (such as TSC) in older 2.6 guests overaccount for lost ticks, causing severe time drift. Interrupt reinjection magnifies the problem. Provide an option to disable it. [avi: allow room for expansion in case we want to disable reinjection of other timers] Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm.h | 5 +++++ arch/x86/kvm/i8254.c | 4 ++++ arch/x86/kvm/i8254.h | 1 + arch/x86/kvm/x86.c | 21 +++++++++++++++++++++ include/linux/kvm.h | 4 ++++ 5 files changed, 35 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 32eb96c7ca2..54bcf228152 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -233,4 +233,9 @@ struct kvm_guest_debug_arch { struct kvm_pit_state { struct kvm_pit_channel_state channels[3]; }; + +struct kvm_reinject_control { + __u8 pit_reinject; + __u8 reserved[31]; +}; #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 72bd275a9b5..528daadeba4 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -201,6 +201,9 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps) if (!atomic_inc_and_test(&pt->pending)) set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests); + if (!pt->reinject) + atomic_set(&pt->pending, 1); + if (vcpu0 && waitqueue_active(&vcpu0->wq)) wake_up_interruptible(&vcpu0->wq); @@ -580,6 +583,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm) pit_state->irq_ack_notifier.gsi = 0; pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq; kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier); + pit_state->pit_timer.reinject = true; mutex_unlock(&pit->pit_state.lock); kvm_pit_reset(pit); diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index 4178022b97a..76959c4b500 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -9,6 +9,7 @@ struct kvm_kpit_timer { s64 period; /* unit: ns */ s64 scheduled; atomic_t pending; + bool reinject; }; struct kvm_kpit_channel_state { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c3fbe8c55c1..a1f14611f4b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -993,6 +993,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_NOP_IO_DELAY: case KVM_CAP_MP_STATE: case KVM_CAP_SYNC_MMU: + case KVM_CAP_REINJECT_CONTROL: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -1728,6 +1729,15 @@ static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps) return r; } +static int kvm_vm_ioctl_reinject(struct kvm *kvm, + struct kvm_reinject_control *control) +{ + if (!kvm->arch.vpit) + return -ENXIO; + kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject; + return 0; +} + /* * Get (and clear) the dirty memory log for a memory slot. */ @@ -1925,6 +1935,17 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } + case KVM_REINJECT_CONTROL: { + struct kvm_reinject_control control; + r = -EFAULT; + if (copy_from_user(&control, argp, sizeof(control))) + goto out; + r = kvm_vm_ioctl_reinject(kvm, &control); + if (r) + goto out; + r = 0; + break; + } default: ; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 11e3e6197c8..ae7a12c7742 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -396,6 +396,9 @@ struct kvm_trace_rec { #if defined(CONFIG_X86) #define KVM_CAP_SET_GUEST_DEBUG 23 #endif +#if defined(CONFIG_X86) +#define KVM_CAP_REINJECT_CONTROL 24 +#endif /* * ioctls for VM fds @@ -429,6 +432,7 @@ struct kvm_trace_rec { struct kvm_assigned_pci_dev) #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ struct kvm_assigned_irq) +#define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) /* * ioctls for vcpu fds -- cgit v1.2.3-70-g09d2 From 1c08364c3565242f1e1bd585bc2ce458967941af Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 4 Jan 2009 12:39:07 +0200 Subject: KVM: Move struct kvm_pio_request into x86 kvm_host.h This is an x86 specific stucture and has no business living in common code. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 12 ++++++++++++ include/linux/kvm_types.h | 12 ------------ 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b74576aec19..863ea73431a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -227,6 +227,18 @@ struct kvm_pv_mmu_op_buffer { char buf[512] __aligned(sizeof(long)); }; +struct kvm_pio_request { + unsigned long count; + int cur_count; + gva_t guest_gva; + int in; + int port; + int size; + int string; + int down; + int rep; +}; + /* * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level * 32-bit). The kvm_mmu structure abstracts the details of the current mmu diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 5f4a18cae26..2b8318c83e5 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -40,16 +40,4 @@ typedef unsigned long hfn_t; typedef hfn_t pfn_t; -struct kvm_pio_request { - unsigned long count; - int cur_count; - gva_t guest_gva; - int in; - int port; - int size; - int string; - int down; - int rep; -}; - #endif /* __KVM_TYPES_H__ */ -- cgit v1.2.3-70-g09d2 From 1872a3f411ffe95c8e92300e0986a3532db55ce9 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 4 Jan 2009 23:26:52 +0200 Subject: KVM: VMX: Fix guest state validity checks The vmx guest state validity checks are full of bugs. Make them conform to the manual. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3312047664a..be9441056ff 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1784,14 +1784,16 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu) vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); cs_rpl = cs.selector & SELECTOR_RPL_MASK; + if (cs.unusable) + return false; if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK)) return false; if (!cs.s) return false; - if (!(~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK))) { + if (cs.type & AR_TYPE_WRITEABLE_MASK) { if (cs.dpl > cs_rpl) return false; - } else if (cs.type & AR_TYPE_CODE_MASK) { + } else { if (cs.dpl != cs_rpl) return false; } @@ -1810,7 +1812,9 @@ static bool stack_segment_valid(struct kvm_vcpu *vcpu) vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); ss_rpl = ss.selector & SELECTOR_RPL_MASK; - if ((ss.type != 3) || (ss.type != 7)) + if (ss.unusable) + return true; + if (ss.type != 3 && ss.type != 7) return false; if (!ss.s) return false; @@ -1830,6 +1834,8 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg) vmx_get_segment(vcpu, &var, seg); rpl = var.selector & SELECTOR_RPL_MASK; + if (var.unusable) + return true; if (!var.s) return false; if (!var.present) @@ -1851,9 +1857,11 @@ static bool tr_valid(struct kvm_vcpu *vcpu) vmx_get_segment(vcpu, &tr, VCPU_SREG_TR); + if (tr.unusable) + return false; if (tr.selector & SELECTOR_TI_MASK) /* TI = 1 */ return false; - if ((tr.type != 3) || (tr.type != 11)) /* TODO: Check if guest is in IA32e mode */ + if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */ return false; if (!tr.present) return false; @@ -1867,6 +1875,8 @@ static bool ldtr_valid(struct kvm_vcpu *vcpu) vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR); + if (ldtr.unusable) + return true; if (ldtr.selector & SELECTOR_TI_MASK) /* TI = 1 */ return false; if (ldtr.type != 2) -- cgit v1.2.3-70-g09d2 From 9fd4a3b7a412f983696b23121413a79d2132fed6 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 4 Jan 2009 23:43:42 +0200 Subject: KVM: VMX: don't clobber segment AR if emulating invalid state The ususable bit is important for determining state validity; don't clobber it. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index be9441056ff..a6598cbaa00 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1649,7 +1649,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, var->limit = vmcs_read32(sf->limit); var->selector = vmcs_read16(sf->selector); ar = vmcs_read32(sf->ar_bytes); - if (ar & AR_UNUSABLE_MASK) + if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) ar = 0; var->type = ar & 15; var->s = (ar >> 4) & 1; -- cgit v1.2.3-70-g09d2 From 10f32d84c750ccf8c0afb3a4ea9d4059aa3e9ffc Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 5 Jan 2009 00:53:19 +0200 Subject: KVM: VMX: Prevent exit handler from running if emulating due to invalid state If we've just emulated an instruction, we won't have any valid exit reason and associated information. Fix by moving the clearing of the emulation_required flag to the exit handler. This way the exit handler can notice that we've been emulating and abort early. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a6598cbaa00..a309be6788e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3130,7 +3130,6 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - struct vcpu_vmx *vmx = to_vmx(vcpu); int err; preempt_enable(); @@ -3155,11 +3154,6 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, local_irq_disable(); preempt_disable(); - - /* Guest state should be valid now except if we need to - * emulate an MMIO */ - if (guest_state_valid(vcpu)) - vmx->emulation_required = 0; } /* @@ -3208,8 +3202,11 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) /* If we need to emulate an MMIO from handle_invalid_guest_state * we just return 0 */ - if (vmx->emulation_required && emulate_invalid_guest_state) + if (vmx->emulation_required && emulate_invalid_guest_state) { + if (guest_state_valid(vcpu)) + vmx->emulation_required = 0; return 0; + } /* Access CR3 don't cause VMExit in paging mode, so we need * to sync with guest real CR3. */ -- cgit v1.2.3-70-g09d2 From 350f69dcd169d536307aa4a8c38c480e3a51c0db Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 5 Jan 2009 11:12:40 +0200 Subject: KVM: x86 emulator: Make emulate_pop() a little more generic Allow emulate_pop() to read into arbitrary memory rather than just the source operand. Needed for complicated instructions like far returns. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86_emulate.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index 54fb09889a8..94459f313f1 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -1136,18 +1136,19 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt) } static int emulate_pop(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) + struct x86_emulate_ops *ops, + void *dest, int len) { struct decode_cache *c = &ctxt->decode; int rc; rc = ops->read_emulated(register_address(c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]), - &c->src.val, c->src.bytes, ctxt->vcpu); + dest, len, ctxt->vcpu); if (rc != 0) return rc; - register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes); + register_address_increment(c, &c->regs[VCPU_REGS_RSP], len); return rc; } @@ -1157,11 +1158,9 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, struct decode_cache *c = &ctxt->decode; int rc; - c->src.bytes = c->dst.bytes; - rc = emulate_pop(ctxt, ops); + rc = emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); if (rc != 0) return rc; - c->dst.val = c->src.val; return 0; } @@ -1467,11 +1466,9 @@ special_insn: break; case 0x58 ... 0x5f: /* pop reg */ pop_instruction: - c->src.bytes = c->op_bytes; - rc = emulate_pop(ctxt, ops); + rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); if (rc != 0) goto done; - c->dst.val = c->src.val; break; case 0x63: /* movsxd */ if (ctxt->mode != X86EMUL_MODE_PROT64) -- cgit v1.2.3-70-g09d2 From 8b3079a5c0c031de07c8390aa160a4229088274f Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 5 Jan 2009 12:10:54 +0200 Subject: KVM: VMX: When emulating on invalid vmx state, don't return to userspace unnecessarily If we aren't doing mmio there's no need to exit to userspace (which will just be confused). Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a309be6788e..df454de8acf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -91,6 +91,7 @@ struct vcpu_vmx { } rmode; int vpid; bool emulation_required; + enum emulation_result invalid_state_emulation_result; /* Support for vnmi-less CPUs */ int soft_vnmi_blocked; @@ -3130,7 +3131,8 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - int err; + struct vcpu_vmx *vmx = to_vmx(vcpu); + enum emulation_result err = EMULATE_DONE; preempt_enable(); local_irq_enable(); @@ -3154,6 +3156,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, local_irq_disable(); preempt_disable(); + + vmx->invalid_state_emulation_result = err; } /* @@ -3205,7 +3209,7 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if (vmx->emulation_required && emulate_invalid_guest_state) { if (guest_state_valid(vcpu)) vmx->emulation_required = 0; - return 0; + return vmx->invalid_state_emulation_result != EMULATE_DO_MMIO; } /* Access CR3 don't cause VMExit in paging mode, so we need -- cgit v1.2.3-70-g09d2 From a77ab5ead5c1fef2c6c5a9b3cf3765e52643a2aa Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 5 Jan 2009 13:27:34 +0200 Subject: KVM: x86 emulator: implement 'ret far' instruction (opcode 0xcb) Signed-off-by: Avi Kivity --- arch/x86/kvm/x86_emulate.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index 94459f313f1..ca91749d208 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -178,7 +178,7 @@ static u32 opcode_table[256] = { 0, ImplicitOps | Stack, 0, 0, ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov, /* 0xC8 - 0xCF */ - 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, ImplicitOps | Stack, 0, 0, 0, 0, /* 0xD0 - 0xD7 */ ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, @@ -1278,6 +1278,25 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, return 0; } +static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops) +{ + struct decode_cache *c = &ctxt->decode; + int rc; + unsigned long cs; + + rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes); + if (rc) + return rc; + if (c->op_bytes == 4) + c->eip = (u32)c->eip; + rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); + if (rc) + return rc; + rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, 1, VCPU_SREG_CS); + return rc; +} + static inline int writeback(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { @@ -1735,6 +1754,11 @@ special_insn: mov: c->dst.val = c->src.val; break; + case 0xcb: /* ret far */ + rc = emulate_ret_far(ctxt, ops); + if (rc) + goto done; + break; case 0xd0 ... 0xd1: /* Grp2 */ c->src.val = 1; emulate_grp2(ctxt); -- cgit v1.2.3-70-g09d2 From 269e05e48502f1cc06802e9fba90f5100dd6bb0d Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 5 Jan 2009 15:21:42 +0200 Subject: KVM: Properly lock PIT creation Otherwise, two threads can create a PIT in parallel and cause a memory leak. Signed-off-by: Avi Kivity --- arch/x86/kvm/i8254.c | 2 -- arch/x86/kvm/x86.c | 6 ++++++ 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 528daadeba4..69d1bbff3fd 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -548,9 +548,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm) if (!pit) return NULL; - mutex_lock(&kvm->lock); pit->irq_source_id = kvm_request_irq_source_id(kvm); - mutex_unlock(&kvm->lock); if (pit->irq_source_id < 0) { kfree(pit); return NULL; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a1f14611f4b..6fbc3460337 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1837,10 +1837,16 @@ long kvm_arch_vm_ioctl(struct file *filp, goto out; break; case KVM_CREATE_PIT: + mutex_lock(&kvm->lock); + r = -EEXIST; + if (kvm->arch.vpit) + goto create_pit_unlock; r = -ENOMEM; kvm->arch.vpit = kvm_create_pit(kvm); if (kvm->arch.vpit) r = 0; + create_pit_unlock: + mutex_unlock(&kvm->lock); break; case KVM_IRQ_LINE: { struct kvm_irq_level irq_event; -- cgit v1.2.3-70-g09d2 From c8a73f186bf62235b6fb5dd52601d641917dd50b Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 5 Jan 2009 16:02:47 +0100 Subject: KVM: SVM: Add microcode patch level dummy VMware ESX checks if the microcode level is correct when using a barcelona CPU, in order to see if it actually can use SVM. Let's tell it we're on the safe side... Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b3a7a314d55..3a60c3f04e6 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1928,6 +1928,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) case MSR_VM_CR: *data = 0; break; + case MSR_IA32_UCODE_REV: + *data = 0x01000065; + break; default: return kvm_get_msr_common(vcpu, ecx, data); } -- cgit v1.2.3-70-g09d2 From 4677a3b693e035f186e2875259b9a0bb94c42fbe Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 6 Jan 2009 13:00:27 +0200 Subject: KVM: MMU: Optimize page unshadowing Using kvm_mmu_lookup_page() will result in multiple scans of the hash chains; use hlist_for_each_entry_safe() to achieve a single scan instead. Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 15850809b55..aac0499947d 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1471,11 +1471,20 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) { + unsigned index; + struct hlist_head *bucket; struct kvm_mmu_page *sp; + struct hlist_node *node, *nn; - while ((sp = kvm_mmu_lookup_page(kvm, gfn)) != NULL) { - pgprintk("%s: zap %lx %x\n", __func__, gfn, sp->role.word); - kvm_mmu_zap_page(kvm, sp); + index = kvm_page_table_hashfn(gfn); + bucket = &kvm->arch.mmu_page_hash[index]; + hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) { + if (sp->gfn == gfn && !sp->role.metaphysical + && !sp->role.invalid) { + pgprintk("%s: zap %lx %x\n", + __func__, gfn, sp->role.word); + kvm_mmu_zap_page(kvm, sp); + } } } -- cgit v1.2.3-70-g09d2 From 5d9b8e30f543a9f21a968a4cda71e8f6d1c66a61 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 4 Jan 2009 18:04:18 +0200 Subject: KVM: Add CONFIG_HAVE_KVM_IRQCHIP Two KVM archs support irqchips and two don't. Add a Kconfig item to make selecting between the two models easier. Signed-off-by: Avi Kivity --- arch/ia64/kvm/Kconfig | 4 ++++ arch/powerpc/kvm/Kconfig | 3 +++ arch/s390/kvm/Kconfig | 3 +++ arch/x86/kvm/Kconfig | 4 ++++ 4 files changed, 14 insertions(+) (limited to 'arch/x86') diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig index f833a0b4188..0a2d6b86075 100644 --- a/arch/ia64/kvm/Kconfig +++ b/arch/ia64/kvm/Kconfig @@ -4,6 +4,10 @@ config HAVE_KVM bool +config HAVE_KVM_IRQCHIP + bool + default y + menuconfig VIRTUALIZATION bool "Virtualization" depends on HAVE_KVM || IA64 diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 14657055014..5a152a52796 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -2,6 +2,9 @@ # KVM configuration # +config HAVE_KVM_IRQCHIP + bool + menuconfig VIRTUALIZATION bool "Virtualization" ---help--- diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index e051cad1f1e..3e260b7e37b 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -4,6 +4,9 @@ config HAVE_KVM bool +config HAVE_KVM_IRQCHIP + bool + menuconfig VIRTUALIZATION bool "Virtualization" default y diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index b81125f0bde..0a303c3ed11 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -4,6 +4,10 @@ config HAVE_KVM bool +config HAVE_KVM_IRQCHIP + bool + default y + menuconfig VIRTUALIZATION bool "Virtualization" depends on HAVE_KVM || X86 -- cgit v1.2.3-70-g09d2 From 4780c65904f0fc4e312ee2da9383eacbe04e61ea Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 4 Jan 2009 18:06:06 +0200 Subject: KVM: Reset PIT irq injection logic when the PIT IRQ is unmasked While the PIT is masked the guest cannot ack the irq, so the reinject logic will never allow the interrupt to be injected. Fix by resetting the reinjection counters on unmask. Unbreaks Xen. Signed-off-by: Avi Kivity --- arch/x86/kvm/i8254.c | 15 +++++++++++++++ arch/x86/kvm/i8254.h | 1 + 2 files changed, 16 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 69d1bbff3fd..c13bb92d315 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -539,6 +539,16 @@ void kvm_pit_reset(struct kvm_pit *pit) pit->pit_state.irq_ack = 1; } +static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask) +{ + struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier); + + if (!mask) { + atomic_set(&pit->pit_state.pit_timer.pending, 0); + pit->pit_state.irq_ack = 1; + } +} + struct kvm_pit *kvm_create_pit(struct kvm *kvm) { struct kvm_pit *pit; @@ -586,6 +596,9 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm) kvm_pit_reset(pit); + pit->mask_notifier.func = pit_mask_notifer; + kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier); + return pit; } @@ -594,6 +607,8 @@ void kvm_free_pit(struct kvm *kvm) struct hrtimer *timer; if (kvm->arch.vpit) { + kvm_unregister_irq_mask_notifier(kvm, 0, + &kvm->arch.vpit->mask_notifier); mutex_lock(&kvm->arch.vpit->pit_state.lock); timer = &kvm->arch.vpit->pit_state.pit_timer.timer; hrtimer_cancel(timer); diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index 76959c4b500..6acbe4b505d 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -46,6 +46,7 @@ struct kvm_pit { struct kvm *kvm; struct kvm_kpit_state pit_state; int irq_source_id; + struct kvm_irq_mask_notifier mask_notifier; }; #define KVM_PIT_BASE_ADDRESS 0x40 -- cgit v1.2.3-70-g09d2 From ff81ff10b4417952919dcc0bd67effa9ceb411c0 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 8 Jan 2009 11:05:17 -0800 Subject: KVM: SVM: Fix typo in has_svm() Signed-off-by: Joe Perches Acked-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 3a60c3f04e6..db5021b2b5a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -250,7 +250,7 @@ static int has_svm(void) const char *msg; if (!cpu_has_svm(&msg)) { - printk(KERN_INFO "has_svn: %s\n", msg); + printk(KERN_INFO "has_svm: %s\n", msg); return 0; } -- cgit v1.2.3-70-g09d2 From 9903a927a4aea4b1ea42356a8453fca664df0b18 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 8 Jan 2009 17:44:19 -0200 Subject: KVM: MMU: drop zeroing on mmu_memory_cache_alloc Zeroing on mmu_memory_cache_alloc is unnecessary since: - Smaller areas are pre-allocated with kmem_cache_zalloc. - Page pointed by ->spt is overwritten with prefetch_page and entries in page pointed by ->gfns are initialized before reading. [avi: zeroing pages is unnecessary] Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index aac0499947d..de9a9fbc16e 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -352,7 +352,6 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, BUG_ON(!mc->nobjs); p = mc->objects[--mc->nobjs]; - memset(p, 0, size); return p; } -- cgit v1.2.3-70-g09d2 From f6e2c02b6d28ddabe99377c5640a833407a62632 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 11 Jan 2009 13:02:10 +0200 Subject: KVM: MMU: Rename "metaphysical" attribute to "direct" This actually describes what is going on, rather than alerting the reader that something strange is going on. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 5 +++-- arch/x86/kvm/mmu.c | 32 ++++++++++++++++---------------- arch/x86/kvm/paging_tmpl.h | 12 ++++++------ 3 files changed, 25 insertions(+), 24 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 863ea73431a..55fd4c5fd38 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -170,7 +170,8 @@ struct kvm_pte_chain { * bits 0:3 - total guest paging levels (2-4, or zero for real mode) * bits 4:7 - page table level for this shadow (1-4) * bits 8:9 - page table quadrant for 2-level guests - * bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode) + * bit 16 - direct mapping of virtual to physical mapping at gfn + * used for real mode and two-dimensional paging * bits 17:19 - common access permissions for all ptes in this shadow page */ union kvm_mmu_page_role { @@ -180,7 +181,7 @@ union kvm_mmu_page_role { unsigned level:4; unsigned quadrant:2; unsigned pad_for_nice_hex_output:6; - unsigned metaphysical:1; + unsigned direct:1; unsigned access:3; unsigned invalid:1; unsigned cr4_pge:1; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index de9a9fbc16e..ef060ec444a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1066,7 +1066,7 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) index = kvm_page_table_hashfn(gfn); bucket = &kvm->arch.mmu_page_hash[index]; hlist_for_each_entry(sp, node, bucket, hash_link) - if (sp->gfn == gfn && !sp->role.metaphysical + if (sp->gfn == gfn && !sp->role.direct && !sp->role.invalid) { pgprintk("%s: found role %x\n", __func__, sp->role.word); @@ -1200,7 +1200,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, gfn_t gfn, gva_t gaddr, unsigned level, - int metaphysical, + int direct, unsigned access, u64 *parent_pte) { @@ -1213,7 +1213,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, role = vcpu->arch.mmu.base_role; role.level = level; - role.metaphysical = metaphysical; + role.direct = direct; role.access = access; if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); @@ -1250,7 +1250,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, sp->role = role; sp->global = role.cr4_pge; hlist_add_head(&sp->hash_link, bucket); - if (!metaphysical) { + if (!direct) { if (rmap_write_protect(vcpu->kvm, gfn)) kvm_flush_remote_tlbs(vcpu->kvm); account_shadowed(vcpu->kvm, gfn); @@ -1395,7 +1395,7 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) kvm_mmu_page_unlink_children(kvm, sp); kvm_mmu_unlink_parents(kvm, sp); kvm_flush_remote_tlbs(kvm); - if (!sp->role.invalid && !sp->role.metaphysical) + if (!sp->role.invalid && !sp->role.direct) unaccount_shadowed(kvm, sp->gfn); if (sp->unsync) kvm_unlink_unsync_page(kvm, sp); @@ -1458,7 +1458,7 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) index = kvm_page_table_hashfn(gfn); bucket = &kvm->arch.mmu_page_hash[index]; hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) - if (sp->gfn == gfn && !sp->role.metaphysical) { + if (sp->gfn == gfn && !sp->role.direct) { pgprintk("%s: gfn %lx role %x\n", __func__, gfn, sp->role.word); r = 1; @@ -1478,7 +1478,7 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) index = kvm_page_table_hashfn(gfn); bucket = &kvm->arch.mmu_page_hash[index]; hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) { - if (sp->gfn == gfn && !sp->role.metaphysical + if (sp->gfn == gfn && !sp->role.direct && !sp->role.invalid) { pgprintk("%s: zap %lx %x\n", __func__, gfn, sp->role.word); @@ -1638,7 +1638,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) bucket = &vcpu->kvm->arch.mmu_page_hash[index]; /* don't unsync if pagetable is shadowed with multiple roles */ hlist_for_each_entry_safe(s, node, n, bucket, hash_link) { - if (s->gfn != sp->gfn || s->role.metaphysical) + if (s->gfn != sp->gfn || s->role.direct) continue; if (s->role.word != sp->role.word) return 1; @@ -1951,7 +1951,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) int i; gfn_t root_gfn; struct kvm_mmu_page *sp; - int metaphysical = 0; + int direct = 0; root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT; @@ -1960,18 +1960,18 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) ASSERT(!VALID_PAGE(root)); if (tdp_enabled) - metaphysical = 1; + direct = 1; sp = kvm_mmu_get_page(vcpu, root_gfn, 0, - PT64_ROOT_LEVEL, metaphysical, + PT64_ROOT_LEVEL, direct, ACC_ALL, NULL); root = __pa(sp->spt); ++sp->root_count; vcpu->arch.mmu.root_hpa = root; return; } - metaphysical = !is_paging(vcpu); + direct = !is_paging(vcpu); if (tdp_enabled) - metaphysical = 1; + direct = 1; for (i = 0; i < 4; ++i) { hpa_t root = vcpu->arch.mmu.pae_root[i]; @@ -1985,7 +1985,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) } else if (vcpu->arch.mmu.root_level == 0) root_gfn = 0; sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, - PT32_ROOT_LEVEL, metaphysical, + PT32_ROOT_LEVEL, direct, ACC_ALL, NULL); root = __pa(sp->spt); ++sp->root_count; @@ -2487,7 +2487,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, index = kvm_page_table_hashfn(gfn); bucket = &vcpu->kvm->arch.mmu_page_hash[index]; hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { - if (sp->gfn != gfn || sp->role.metaphysical || sp->role.invalid) + if (sp->gfn != gfn || sp->role.direct || sp->role.invalid) continue; pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); @@ -3125,7 +3125,7 @@ static void audit_write_protection(struct kvm_vcpu *vcpu) gfn_t gfn; list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) { - if (sp->role.metaphysical) + if (sp->role.direct) continue; gfn = unalias_gfn(vcpu->kvm, sp->gfn); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 46b68f941f6..7314c0944c5 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -277,7 +277,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, unsigned access = gw->pt_access; struct kvm_mmu_page *shadow_page; u64 spte, *sptep; - int metaphysical; + int direct; gfn_t table_gfn; int r; int level; @@ -313,17 +313,17 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, if (level == PT_DIRECTORY_LEVEL && gw->level == PT_DIRECTORY_LEVEL) { - metaphysical = 1; + direct = 1; if (!is_dirty_pte(gw->ptes[level - 1])) access &= ~ACC_WRITE_MASK; table_gfn = gpte_to_gfn(gw->ptes[level - 1]); } else { - metaphysical = 0; + direct = 0; table_gfn = gw->table_gfn[level - 2]; } shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, - metaphysical, access, sptep); - if (!metaphysical) { + direct, access, sptep); + if (!direct) { r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2], &curr_pte, sizeof(curr_pte)); @@ -512,7 +512,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, pt_element_t pt[256 / sizeof(pt_element_t)]; gpa_t pte_gpa; - if (sp->role.metaphysical + if (sp->role.direct || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) { nonpaging_prefetch_page(vcpu, sp); return; -- cgit v1.2.3-70-g09d2 From 5a41accd3f69c6ef1d58f0c148980bae70515937 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 11 Jan 2009 17:19:35 +0200 Subject: KVM: MMU: Only enable cr4_pge role in shadow mode Two dimensional paging is only confused by it. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6fbc3460337..19ccde621cd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -364,7 +364,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) } kvm_x86_ops->set_cr4(vcpu, cr4); vcpu->arch.cr4 = cr4; - vcpu->arch.mmu.base_role.cr4_pge = !!(cr4 & X86_CR4_PGE); + vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled; kvm_mmu_sync_global(vcpu); kvm_mmu_reset_context(vcpu); } -- cgit v1.2.3-70-g09d2 From 193554750441d91e127dd5066b8aebe0f769101c Mon Sep 17 00:00:00 2001 From: Amit Shah Date: Wed, 14 Jan 2009 16:56:00 +0000 Subject: KVM: x86: Fix typos and whitespace errors Some typos, comments, whitespace errors corrected in the cpuid code Signed-off-by: Amit Shah Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 19ccde621cd..141a0166e51 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1067,7 +1067,7 @@ long kvm_arch_dev_ioctl(struct file *filp, if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) goto out; r = kvm_dev_ioctl_get_supported_cpuid(&cpuid, - cpuid_arg->entries); + cpuid_arg->entries); if (r) goto out; @@ -1165,8 +1165,8 @@ out: } static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, - struct kvm_cpuid2 *cpuid, - struct kvm_cpuid_entry2 __user *entries) + struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries) { int r; @@ -1185,8 +1185,8 @@ out: } static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, - struct kvm_cpuid2 *cpuid, - struct kvm_cpuid_entry2 __user *entries) + struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries) { int r; @@ -1195,7 +1195,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, goto out; r = -EFAULT; if (copy_to_user(entries, &vcpu->arch.cpuid_entries, - vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) + vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) goto out; return 0; @@ -1205,12 +1205,12 @@ out: } static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, - u32 index) + u32 index) { entry->function = function; entry->index = index; cpuid_count(entry->function, entry->index, - &entry->eax, &entry->ebx, &entry->ecx, &entry->edx); + &entry->eax, &entry->ebx, &entry->ecx, &entry->edx); entry->flags = 0; } @@ -1249,7 +1249,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY) | bit(X86_FEATURE_SVM); - /* all func 2 cpuid_count() should be called on the same cpu */ + /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); do_cpuid_1_ent(entry, function, index); ++*nent; @@ -1323,7 +1323,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, } static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, - struct kvm_cpuid_entry2 __user *entries) + struct kvm_cpuid_entry2 __user *entries) { struct kvm_cpuid_entry2 *cpuid_entries; int limit, nent = 0, r = -E2BIG; @@ -1340,7 +1340,7 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, limit = cpuid_entries[0].eax; for (func = 1; func <= limit && nent < cpuid->nent; ++func) do_cpuid_ent(&cpuid_entries[nent], func, 0, - &nent, cpuid->nent); + &nent, cpuid->nent); r = -E2BIG; if (nent >= cpuid->nent) goto out_free; @@ -1349,10 +1349,10 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, limit = cpuid_entries[nent - 1].eax; for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) do_cpuid_ent(&cpuid_entries[nent], func, 0, - &nent, cpuid->nent); + &nent, cpuid->nent); r = -EFAULT; if (copy_to_user(entries, cpuid_entries, - nent * sizeof(struct kvm_cpuid_entry2))) + nent * sizeof(struct kvm_cpuid_entry2))) goto out_free; cpuid->nent = nent; r = 0; @@ -1496,7 +1496,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) goto out; r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid, - cpuid_arg->entries); + cpuid_arg->entries); if (r) goto out; break; @@ -1509,7 +1509,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) goto out; r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid, - cpuid_arg->entries); + cpuid_arg->entries); if (r) goto out; r = -EFAULT; @@ -2864,7 +2864,7 @@ static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e, if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index) return 0; if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) && - !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT)) + !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT)) return 0; return 1; } @@ -2892,7 +2892,6 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, if (!best || e->function > best->function) best = e; } - return best; } -- cgit v1.2.3-70-g09d2 From 399ec807ddc38ecccf8c06dbde04531cbdc63e11 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 19 Nov 2008 13:58:46 +0200 Subject: KVM: Userspace controlled irq routing Currently KVM has a static routing from GSI numbers to interrupts (namely, 0-15 are mapped 1:1 to both PIC and IOAPIC, and 16:23 are mapped 1:1 to the IOAPIC). This is insufficient for several reasons: - HPET requires non 1:1 mapping for the timer interrupt - MSIs need a new method to assign interrupt numbers and dispatch them - ACPI APIC mode needs to be able to reassign the PCI LINK interrupts to the ioapics This patch implements an interrupt routing table (as a linked list, but this can be easily changed) and a userspace interface to replace the table. The routing table is initialized according to the current hardwired mapping. Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 5 ++ arch/x86/kvm/x86.c | 6 ++ include/linux/kvm.h | 33 ++++++++++ include/linux/kvm_host.h | 31 +++++++++ virt/kvm/irq_comm.c | 168 +++++++++++++++++++++++++++++++++++++++++++++-- virt/kvm/kvm_main.c | 36 ++++++++++ 6 files changed, 275 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 1477f91617a..dbf527a5734 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -919,6 +919,11 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_ioapic_init(kvm); if (r) goto out; + r = kvm_setup_default_irq_routing(kvm); + if (r) { + kfree(kvm->arch.vioapic); + goto out; + } break; case KVM_IRQ_LINE: { struct kvm_irq_level irq_event; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 141a0166e51..32e3a7ec6ad 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1835,6 +1835,12 @@ long kvm_arch_vm_ioctl(struct file *filp, } } else goto out; + r = kvm_setup_default_irq_routing(kvm); + if (r) { + kfree(kvm->arch.vpic); + kfree(kvm->arch.vioapic); + goto out; + } break; case KVM_CREATE_PIT: mutex_lock(&kvm->lock); diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 73f34806686..7a5d73a8d4f 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -398,6 +398,38 @@ struct kvm_trace_rec { #endif #if defined(CONFIG_X86) #define KVM_CAP_REINJECT_CONTROL 24 +#endif +#if defined(CONFIG_X86)||defined(CONFIG_IA64) +#define KVM_CAP_IRQ_ROUTING 25 +#endif + +#ifdef KVM_CAP_IRQ_ROUTING + +struct kvm_irq_routing_irqchip { + __u32 irqchip; + __u32 pin; +}; + +/* gsi routing entry types */ +#define KVM_IRQ_ROUTING_IRQCHIP 1 + +struct kvm_irq_routing_entry { + __u32 gsi; + __u32 type; + __u32 flags; + __u32 pad; + union { + struct kvm_irq_routing_irqchip irqchip; + __u32 pad[8]; + } u; +}; + +struct kvm_irq_routing { + __u32 nr; + __u32 flags; + struct kvm_irq_routing_entry entries[0]; +}; + #endif /* @@ -430,6 +462,7 @@ struct kvm_trace_rec { _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone) #define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ struct kvm_assigned_pci_dev) +#define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ struct kvm_assigned_irq) #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 99963f36a6d..ce285e01bd5 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -107,6 +107,19 @@ struct kvm_memory_slot { int user_alloc; }; +struct kvm_kernel_irq_routing_entry { + u32 gsi; + void (*set)(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int level); + union { + struct { + unsigned irqchip; + unsigned pin; + } irqchip; + }; + struct list_head link; +}; + struct kvm { struct mutex lock; /* protects the vcpus array and APIC accesses */ spinlock_t mmu_lock; @@ -128,6 +141,7 @@ struct kvm { #endif #ifdef CONFIG_HAVE_KVM_IRQCHIP + struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */ struct hlist_head mask_notifier_list; #endif @@ -480,4 +494,21 @@ static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_se } #endif +#ifdef CONFIG_HAVE_KVM_IRQCHIP + +#define KVM_MAX_IRQ_ROUTES 1024 + +int kvm_setup_default_irq_routing(struct kvm *kvm); +int kvm_set_irq_routing(struct kvm *kvm, + const struct kvm_irq_routing_entry *entries, + unsigned nr, + unsigned flags); +void kvm_free_irq_routing(struct kvm *kvm); + +#else + +static inline void kvm_free_irq_routing(struct kvm *kvm) {} + +#endif + #endif diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 5162a411e4d..a797fa5e642 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -24,9 +24,24 @@ #include "ioapic.h" +static void kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int level) +{ +#ifdef CONFIG_X86 + kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level); +#endif +} + +static void kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int level) +{ + kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level); +} + /* This should be called with the kvm->lock mutex held */ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) { + struct kvm_kernel_irq_routing_entry *e; unsigned long *irq_state = (unsigned long *)&kvm->arch.irq_states[irq]; /* Logical OR for level trig interrupt */ @@ -39,10 +54,9 @@ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) * IOAPIC. So set the bit in both. The guest will ignore * writes to the unused one. */ - kvm_ioapic_set_irq(kvm->arch.vioapic, irq, !!(*irq_state)); -#ifdef CONFIG_X86 - kvm_pic_set_irq(pic_irqchip(kvm), irq, !!(*irq_state)); -#endif + list_for_each_entry(e, &kvm->irq_routing, link) + if (e->gsi == irq) + e->set(e, kvm, !!(*irq_state)); } void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi) @@ -123,3 +137,149 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) kimn->func(kimn, mask); } +static void __kvm_free_irq_routing(struct list_head *irq_routing) +{ + struct kvm_kernel_irq_routing_entry *e, *n; + + list_for_each_entry_safe(e, n, irq_routing, link) + kfree(e); +} + +void kvm_free_irq_routing(struct kvm *kvm) +{ + __kvm_free_irq_routing(&kvm->irq_routing); +} + +int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +{ + int r = -EINVAL; + int delta; + + e->gsi = ue->gsi; + switch (ue->type) { + case KVM_IRQ_ROUTING_IRQCHIP: + delta = 0; + switch (ue->u.irqchip.irqchip) { + case KVM_IRQCHIP_PIC_MASTER: + e->set = kvm_set_pic_irq; + break; + case KVM_IRQCHIP_PIC_SLAVE: + e->set = kvm_set_pic_irq; + delta = 8; + break; + case KVM_IRQCHIP_IOAPIC: + e->set = kvm_set_ioapic_irq; + break; + default: + goto out; + } + e->irqchip.irqchip = ue->u.irqchip.irqchip; + e->irqchip.pin = ue->u.irqchip.pin + delta; + break; + default: + goto out; + } + r = 0; +out: + return r; +} + + +int kvm_set_irq_routing(struct kvm *kvm, + const struct kvm_irq_routing_entry *ue, + unsigned nr, + unsigned flags) +{ + struct list_head irq_list = LIST_HEAD_INIT(irq_list); + struct list_head tmp = LIST_HEAD_INIT(tmp); + struct kvm_kernel_irq_routing_entry *e = NULL; + unsigned i; + int r; + + for (i = 0; i < nr; ++i) { + r = -EINVAL; + if (ue->gsi >= KVM_MAX_IRQ_ROUTES) + goto out; + if (ue->flags) + goto out; + r = -ENOMEM; + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (!e) + goto out; + r = setup_routing_entry(e, ue); + if (r) + goto out; + ++ue; + list_add(&e->link, &irq_list); + e = NULL; + } + + mutex_lock(&kvm->lock); + list_splice(&kvm->irq_routing, &tmp); + INIT_LIST_HEAD(&kvm->irq_routing); + list_splice(&irq_list, &kvm->irq_routing); + INIT_LIST_HEAD(&irq_list); + list_splice(&tmp, &irq_list); + mutex_unlock(&kvm->lock); + + r = 0; + +out: + kfree(e); + __kvm_free_irq_routing(&irq_list); + return r; +} + +#define IOAPIC_ROUTING_ENTRY(irq) \ + { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ + .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) } +#define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq) + +#ifdef CONFIG_X86 +#define SELECT_PIC(irq) \ + ((irq) < 8 ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE) +# define PIC_ROUTING_ENTRY(irq) \ + { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ + .u.irqchip.irqchip = SELECT_PIC(irq), .u.irqchip.pin = (irq) % 8 } +# define ROUTING_ENTRY2(irq) \ + IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq) +#else +# define ROUTING_ENTRY2(irq) \ + IOAPIC_ROUTING_ENTRY(irq) +#endif + +static const struct kvm_irq_routing_entry default_routing[] = { + ROUTING_ENTRY2(0), ROUTING_ENTRY2(1), + ROUTING_ENTRY2(2), ROUTING_ENTRY2(3), + ROUTING_ENTRY2(4), ROUTING_ENTRY2(5), + ROUTING_ENTRY2(6), ROUTING_ENTRY2(7), + ROUTING_ENTRY2(8), ROUTING_ENTRY2(9), + ROUTING_ENTRY2(10), ROUTING_ENTRY2(11), + ROUTING_ENTRY2(12), ROUTING_ENTRY2(13), + ROUTING_ENTRY2(14), ROUTING_ENTRY2(15), + ROUTING_ENTRY1(16), ROUTING_ENTRY1(17), + ROUTING_ENTRY1(18), ROUTING_ENTRY1(19), + ROUTING_ENTRY1(20), ROUTING_ENTRY1(21), + ROUTING_ENTRY1(22), ROUTING_ENTRY1(23), +#ifdef CONFIG_IA64 + ROUTING_ENTRY1(24), ROUTING_ENTRY1(25), + ROUTING_ENTRY1(26), ROUTING_ENTRY1(27), + ROUTING_ENTRY1(28), ROUTING_ENTRY1(29), + ROUTING_ENTRY1(30), ROUTING_ENTRY1(31), + ROUTING_ENTRY1(32), ROUTING_ENTRY1(33), + ROUTING_ENTRY1(34), ROUTING_ENTRY1(35), + ROUTING_ENTRY1(36), ROUTING_ENTRY1(37), + ROUTING_ENTRY1(38), ROUTING_ENTRY1(39), + ROUTING_ENTRY1(40), ROUTING_ENTRY1(41), + ROUTING_ENTRY1(42), ROUTING_ENTRY1(43), + ROUTING_ENTRY1(44), ROUTING_ENTRY1(45), + ROUTING_ENTRY1(46), ROUTING_ENTRY1(47), +#endif +}; + +int kvm_setup_default_irq_routing(struct kvm *kvm) +{ + return kvm_set_irq_routing(kvm, default_routing, + ARRAY_SIZE(default_routing), 0); +} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 786a3ae373b..c65484b471c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -843,6 +843,7 @@ static struct kvm *kvm_create_vm(void) if (IS_ERR(kvm)) goto out; #ifdef CONFIG_HAVE_KVM_IRQCHIP + INIT_LIST_HEAD(&kvm->irq_routing); INIT_HLIST_HEAD(&kvm->mask_notifier_list); #endif @@ -926,6 +927,7 @@ static void kvm_destroy_vm(struct kvm *kvm) spin_lock(&kvm_lock); list_del(&kvm->vm_list); spin_unlock(&kvm_lock); + kvm_free_irq_routing(kvm); kvm_io_bus_destroy(&kvm->pio_bus); kvm_io_bus_destroy(&kvm->mmio_bus); #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET @@ -1945,6 +1947,36 @@ static long kvm_vm_ioctl(struct file *filp, goto out; break; } +#endif +#ifdef KVM_CAP_IRQ_ROUTING + case KVM_SET_GSI_ROUTING: { + struct kvm_irq_routing routing; + struct kvm_irq_routing __user *urouting; + struct kvm_irq_routing_entry *entries; + + r = -EFAULT; + if (copy_from_user(&routing, argp, sizeof(routing))) + goto out; + r = -EINVAL; + if (routing.nr >= KVM_MAX_IRQ_ROUTES) + goto out; + if (routing.flags) + goto out; + r = -ENOMEM; + entries = vmalloc(routing.nr * sizeof(*entries)); + if (!entries) + goto out; + r = -EFAULT; + urouting = argp; + if (copy_from_user(entries, urouting->entries, + routing.nr * sizeof(*entries))) + goto out_free_irq_routing; + r = kvm_set_irq_routing(kvm, entries, routing.nr, + routing.flags); + out_free_irq_routing: + vfree(entries); + break; + } #endif default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); @@ -2012,6 +2044,10 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) case KVM_CAP_USER_MEMORY: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: return 1; +#ifdef CONFIG_HAVE_KVM_IRQCHIP + case KVM_CAP_IRQ_ROUTING: + return 1; +#endif default: break; } -- cgit v1.2.3-70-g09d2 From 91b2ae773d3b168b763237fac33f75b13d891f20 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 19 Jan 2009 14:57:52 +0200 Subject: KVM: Avoid using CONFIG_ in userspace visible headers Kconfig symbols are not available in userspace, and are not stripped by headers-install. Avoid their use by adding #defines in to suit each architecture. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm.h | 1 + include/linux/kvm.h | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 54bcf228152..dc3f6cf1170 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -15,6 +15,7 @@ #define __KVM_HAVE_DEVICE_ASSIGNMENT #define __KVM_HAVE_MSI #define __KVM_HAVE_USER_NMI +#define __KVM_HAVE_GUEST_DEBUG /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 7a5d73a8d4f..869462ca762 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -393,13 +393,13 @@ struct kvm_trace_rec { #ifdef __KVM_HAVE_USER_NMI #define KVM_CAP_USER_NMI 22 #endif -#if defined(CONFIG_X86) +#ifdef __KVM_HAVE_GUEST_DEBUG #define KVM_CAP_SET_GUEST_DEBUG 23 #endif -#if defined(CONFIG_X86) +#ifdef __KVM_HAVE_PIT #define KVM_CAP_REINJECT_CONTROL 24 #endif -#if defined(CONFIG_X86)||defined(CONFIG_IA64) +#ifdef __KVM_HAVE_IOAPIC #define KVM_CAP_IRQ_ROUTING 25 #endif -- cgit v1.2.3-70-g09d2 From 44882eed2ebe7f75f8cdae5671ab1d6e0fa40dbc Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 27 Jan 2009 15:12:38 -0200 Subject: KVM: make irq ack notifications aware of routing table IRQ ack notifications assume an identity mapping between pin->gsi, which might not be the case with, for example, HPET. Translate before acking. Signed-off-by: Marcelo Tosatti Acked-by: Gleb Natapov --- arch/x86/kvm/i8259.c | 5 +++-- arch/x86/kvm/irq.h | 2 ++ include/linux/kvm_host.h | 2 +- virt/kvm/ioapic.c | 10 +++++----- virt/kvm/irq_comm.c | 13 ++++++++++--- 5 files changed, 21 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 179dcb0103f..93160375c84 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -49,7 +49,8 @@ static void pic_unlock(struct kvm_pic *s) spin_unlock(&s->lock); while (acks) { - kvm_notify_acked_irq(kvm, __ffs(acks)); + kvm_notify_acked_irq(kvm, SELECT_PIC(__ffs(acks)), + __ffs(acks)); acks &= acks - 1; } @@ -232,7 +233,7 @@ int kvm_pic_read_irq(struct kvm *kvm) } pic_update_irq(s); pic_unlock(s); - kvm_notify_acked_irq(kvm, irq); + kvm_notify_acked_irq(kvm, SELECT_PIC(irq), irq); return intno; } diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 82579ee538d..9f593188129 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -32,6 +32,8 @@ #include "lapic.h" #define PIC_NUM_PINS 16 +#define SELECT_PIC(irq) \ + ((irq) < 8 ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE) struct kvm; struct kvm_vcpu; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ce285e01bd5..c03a0a9a858 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -352,7 +352,7 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask); void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level); -void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi); +void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian); diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index e85a2bcd2db..1c986ac59ad 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -293,20 +293,20 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) } } -static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi, +static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin, int trigger_mode) { union ioapic_redir_entry *ent; - ent = &ioapic->redirtbl[gsi]; + ent = &ioapic->redirtbl[pin]; - kvm_notify_acked_irq(ioapic->kvm, gsi); + kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin); if (trigger_mode == IOAPIC_LEVEL_TRIG) { ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); ent->fields.remote_irr = 0; - if (!ent->fields.mask && (ioapic->irr & (1 << gsi))) - ioapic_service(ioapic, gsi); + if (!ent->fields.mask && (ioapic->irr & (1 << pin))) + ioapic_service(ioapic, pin); } } diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index a797fa5e642..7aa5086c862 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -59,10 +59,19 @@ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) e->set(e, kvm, !!(*irq_state)); } -void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi) +void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) { + struct kvm_kernel_irq_routing_entry *e; struct kvm_irq_ack_notifier *kian; struct hlist_node *n; + unsigned gsi = pin; + + list_for_each_entry(e, &kvm->irq_routing, link) + if (e->irqchip.irqchip == irqchip && + e->irqchip.pin == pin) { + gsi = e->gsi; + break; + } hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link) if (kian->gsi == gsi) @@ -237,8 +246,6 @@ out: #define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq) #ifdef CONFIG_X86 -#define SELECT_PIC(irq) \ - ((irq) < 8 ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE) # define PIC_ROUTING_ENTRY(irq) \ { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ .u.irqchip.irqchip = SELECT_PIC(irq), .u.irqchip.pin = (irq) % 8 } -- cgit v1.2.3-70-g09d2 From d20626936dd6aa783760e780dae5abb127564316 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 2 Feb 2009 16:23:50 +0100 Subject: x86: Add EFER descriptions for FFXSR AMD k10 includes support for the FFXSR feature, which leaves out XMM registers on FXSAVE/FXSAVE when the EFER_FFXSR bit is set in EFER. The CPUID feature bit exists already, but the EFER bit is missing currently, so this patch adds it to the list of known EFER bits. Signed-off-by: Alexander Graf CC: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/msr-index.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 46e9646e7a6..f4e505f286b 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -19,12 +19,14 @@ #define _EFER_LMA 10 /* Long mode active (read-only) */ #define _EFER_NX 11 /* No execute enable */ #define _EFER_SVME 12 /* Enable virtualization */ +#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ #define EFER_SCE (1<<_EFER_SCE) #define EFER_LME (1<<_EFER_LME) #define EFER_LMA (1<<_EFER_LMA) #define EFER_NX (1<<_EFER_NX) #define EFER_SVME (1<<_EFER_SVME) +#define EFER_FFXSR (1<<_EFER_FFXSR) /* Intel MSRs. Some also available on other CPUs */ #define MSR_IA32_PERFCTR0 0x000000c1 -- cgit v1.2.3-70-g09d2 From 1b2fd70c4eddef53f32639296818c0253e7ca48d Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 2 Feb 2009 16:23:51 +0100 Subject: KVM: Add FFXSR support AMD K10 CPUs implement the FFXSR feature that gets enabled using EFER. Let's check if the virtual CPU description includes that CPUID feature bit and allow enabling it then. This is required for Windows Server 2008 in Hyper-V mode. v2 adds CPUID capability exposure Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 3 +++ arch/x86/kvm/x86.c | 12 ++++++++++++ 2 files changed, 15 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index db5021b2b5a..1c4a018bf4b 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -417,6 +417,9 @@ static __init int svm_hardware_setup(void) if (boot_cpu_has(X86_FEATURE_NX)) kvm_enable_efer_bits(EFER_NX); + if (boot_cpu_has(X86_FEATURE_FXSR_OPT)) + kvm_enable_efer_bits(EFER_FFXSR); + if (nested) { printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); kvm_enable_efer_bits(EFER_SVME); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 32e3a7ec6ad..8f83590b47d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -490,6 +490,17 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) return; } + if (efer & EFER_FFXSR) { + struct kvm_cpuid_entry2 *feat; + + feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); + if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { + printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n"); + kvm_inject_gp(vcpu, 0); + return; + } + } + if (efer & EFER_SVME) { struct kvm_cpuid_entry2 *feat; @@ -1240,6 +1251,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, #ifdef CONFIG_X86_64 bit(X86_FEATURE_LM) | #endif + bit(X86_FEATURE_FXSR_OPT) | bit(X86_FEATURE_MMXEXT) | bit(X86_FEATURE_3DNOWEXT) | bit(X86_FEATURE_3DNOW); -- cgit v1.2.3-70-g09d2 From 34c33d163fe509da8414a736c6328855f8c164e5 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 8 Feb 2009 13:28:15 +0100 Subject: KVM: Drop unused evaluations from string pio handlers Looks like neither the direction nor the rep prefix are used anymore. Drop related evaluations from SVM's and VMX's I/O exit handlers. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 4 +--- arch/x86/kvm/vmx.c | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1c4a018bf4b..22e88a4a51c 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1194,7 +1194,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ - int size, down, in, string, rep; + int size, in, string; unsigned port; ++svm->vcpu.stat.io_exits; @@ -1213,8 +1213,6 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) in = (io_info & SVM_IOIO_TYPE_MASK) != 0; port = io_info >> 16; size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; - rep = (io_info & SVM_IOIO_REP_MASK) != 0; - down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; skip_emulated_instruction(&svm->vcpu); return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index df454de8acf..509b3530540 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2698,7 +2698,7 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { unsigned long exit_qualification; - int size, down, in, string, rep; + int size, in, string; unsigned port; ++vcpu->stat.io_exits; @@ -2714,8 +2714,6 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) size = (exit_qualification & 7) + 1; in = (exit_qualification & 8) != 0; - down = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0; - rep = (exit_qualification & 32) != 0; port = exit_qualification >> 16; skip_emulated_instruction(vcpu); -- cgit v1.2.3-70-g09d2 From 49cd7d2238e44f7ee4269481cd8a1261cc8f93a5 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 11 Feb 2009 13:50:40 +0800 Subject: KVM: VMX: Use kvm_mmu_page_fault() handle EPT violation mmio Removed duplicated code. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 509b3530540..cb27ffccf46 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3058,11 +3058,8 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { u64 exit_qualification; - enum emulation_result er; gpa_t gpa; - unsigned long hva; int gla_validity; - int r; exit_qualification = vmcs_read64(EXIT_QUALIFICATION); @@ -3085,32 +3082,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); - hva = gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT); - if (!kvm_is_error_hva(hva)) { - r = kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0); - if (r < 0) { - printk(KERN_ERR "EPT: Not enough memory!\n"); - return -ENOMEM; - } - return 1; - } else { - /* must be MMIO */ - er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); - - if (er == EMULATE_FAIL) { - printk(KERN_ERR - "EPT: Fail to handle EPT violation vmexit!er is %d\n", - er); - printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", - (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), - (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS)); - printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", - (long unsigned int)exit_qualification); - return -ENOTSUPP; - } else if (er == EMULATE_DO_MMIO) - return 0; - } - return 1; + return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0); } static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -- cgit v1.2.3-70-g09d2 From c807660407a695f390034e402edfe544a1d2e40c Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Wed, 4 Feb 2009 17:52:04 +0100 Subject: KVM: Fix kvmclock on !constant_tsc boxes kvmclock currently falls apart on machines without constant tsc. This patch fixes it. Changes: * keep tsc frequency in a per-cpu variable. * handle kvmclock update using a new request flag, thus checking whenever we need an update each time we enter guest context. * use a cpufreq notifier to track frequency changes and force kvmclock updates. * send ipis to kick cpu out of guest context if needed to make sure the guest doesn't see stale values. Signed-off-by: Gerd Hoffmann Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 103 ++++++++++++++++++++++++++++++++++++++++++----- include/linux/kvm_host.h | 1 + 2 files changed, 95 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8f83590b47d..05d7be89b5e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -617,6 +618,8 @@ static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info * hv_clock->tsc_to_system_mul); } +static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); + static void kvm_write_guest_time(struct kvm_vcpu *v) { struct timespec ts; @@ -627,9 +630,9 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) if ((!vcpu->time_page)) return; - if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) { - kvm_set_time_scale(tsc_khz, &vcpu->hv_clock); - vcpu->hv_clock_tsc_khz = tsc_khz; + if (unlikely(vcpu->hv_clock_tsc_khz != __get_cpu_var(cpu_tsc_khz))) { + kvm_set_time_scale(__get_cpu_var(cpu_tsc_khz), &vcpu->hv_clock); + vcpu->hv_clock_tsc_khz = __get_cpu_var(cpu_tsc_khz); } /* Keep irq disabled to prevent changes to the clock */ @@ -660,6 +663,16 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); } +static int kvm_request_guest_time_update(struct kvm_vcpu *v) +{ + struct kvm_vcpu_arch *vcpu = &v->arch; + + if (!vcpu->time_page) + return 0; + set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests); + return 1; +} + static bool msr_mtrr_valid(unsigned msr) { switch (msr) { @@ -790,7 +803,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) vcpu->arch.time_page = NULL; } - kvm_write_guest_time(vcpu); + kvm_request_guest_time_update(vcpu); break; } default: @@ -1000,6 +1013,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: case KVM_CAP_SET_TSS_ADDR: case KVM_CAP_EXT_CPUID: + case KVM_CAP_CLOCKSOURCE: case KVM_CAP_PIT: case KVM_CAP_NOP_IO_DELAY: case KVM_CAP_MP_STATE: @@ -1025,9 +1039,6 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_IOMMU: r = iommu_found(); break; - case KVM_CAP_CLOCKSOURCE: - r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC); - break; default: r = 0; break; @@ -1098,7 +1109,7 @@ out: void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { kvm_x86_ops->vcpu_load(vcpu, cpu); - kvm_write_guest_time(vcpu); + kvm_request_guest_time_update(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -2642,9 +2653,72 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, } EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); +static void bounce_off(void *info) +{ + /* nothing */ +} + +static unsigned int ref_freq; +static unsigned long tsc_khz_ref; + +static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + struct kvm *kvm; + struct kvm_vcpu *vcpu; + int i, send_ipi = 0; + + if (!ref_freq) + ref_freq = freq->old; + + if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) + return 0; + if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) + return 0; + per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); + + spin_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) { + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + vcpu = kvm->vcpus[i]; + if (!vcpu) + continue; + if (vcpu->cpu != freq->cpu) + continue; + if (!kvm_request_guest_time_update(vcpu)) + continue; + if (vcpu->cpu != smp_processor_id()) + send_ipi++; + } + } + spin_unlock(&kvm_lock); + + if (freq->old < freq->new && send_ipi) { + /* + * We upscale the frequency. Must make the guest + * doesn't see old kvmclock values while running with + * the new frequency, otherwise we risk the guest sees + * time go backwards. + * + * In case we update the frequency for another cpu + * (which might be in guest context) send an interrupt + * to kick the cpu out of guest context. Next time + * guest context is entered kvmclock will be updated, + * so the guest will not see stale values. + */ + smp_call_function_single(freq->cpu, bounce_off, NULL, 1); + } + return 0; +} + +static struct notifier_block kvmclock_cpufreq_notifier_block = { + .notifier_call = kvmclock_cpufreq_notifier +}; + int kvm_arch_init(void *opaque) { - int r; + int r, cpu; struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; if (kvm_x86_ops) { @@ -2675,6 +2749,15 @@ int kvm_arch_init(void *opaque) kvm_mmu_set_base_ptes(PT_PRESENT_MASK); kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, PT_DIRTY_MASK, PT64_NX_MASK, 0, 0); + + for_each_possible_cpu(cpu) + per_cpu(cpu_tsc_khz, cpu) = tsc_khz; + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { + tsc_khz_ref = tsc_khz; + cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } + return 0; out: @@ -3010,6 +3093,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (vcpu->requests) { if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) __kvm_migrate_timers(vcpu); + if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests)) + kvm_write_guest_time(vcpu); if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests)) kvm_mmu_sync_roots(vcpu); if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 339eda3ca6e..18b4df8264c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -37,6 +37,7 @@ #define KVM_REQ_PENDING_TIMER 5 #define KVM_REQ_UNHALT 6 #define KVM_REQ_MMU_SYNC 7 +#define KVM_REQ_KVMCLOCK_UPDATE 8 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 -- cgit v1.2.3-70-g09d2 From 6bed6b9e84bf5e6b468847a50c0751389fdd01d4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 18 Feb 2009 14:08:59 +0100 Subject: KVM: MMU: remove redundant check in mmu_set_spte The following code flow is unnecessary: if (largepage) was_rmapped = is_large_pte(*shadow_pte); else was_rmapped = 1; The is_large_pte() function will always evaluate to one here because the (largepage && !is_large_pte) case is already handled in the first if-clause. So we can remove this check and set was_rmapped to one always here. Signed-off-by: Joerg Roedel Acked-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ef060ec444a..c90b4b2f2ab 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1791,12 +1791,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, pgprintk("hfn old %lx new %lx\n", spte_to_pfn(*shadow_pte), pfn); rmap_remove(vcpu->kvm, shadow_pte); - } else { - if (largepage) - was_rmapped = is_large_pte(*shadow_pte); - else - was_rmapped = 1; - } + } else + was_rmapped = 1; } if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, dirty, largepage, global, gfn, pfn, speculative, true)) { -- cgit v1.2.3-70-g09d2 From 452425dbaa1974e9fc489e64a8de46a47b4c2754 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 18 Feb 2009 14:54:37 +0100 Subject: KVM: MMU: remove assertion in kvm_mmu_alloc_page The assertion no longer makes sense since we don't clear page tables on allocation; instead we clear them during prefetch. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index c90b4b2f2ab..4a21b0f8491 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -802,7 +802,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, set_page_private(virt_to_page(sp->spt), (unsigned long)sp); list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&sp->oos_link); - ASSERT(is_empty_shadow_page(sp->spt)); bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); sp->multimapped = 0; sp->parent_pte = parent_pte; -- cgit v1.2.3-70-g09d2 From 4925663a079c77d95d8685228ad6675fc5639c8e Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 4 Feb 2009 17:28:14 +0200 Subject: KVM: Report IRQ injection status to userspace. IRQ injection status is either -1 (if there was no CPU found that should except the interrupt because IRQ was masked or ioapic was misconfigured or ...) or >= 0 in that case the number indicates to how many CPUs interrupt was injected. If the value is 0 it means that the interrupt was coalesced and probably should be reinjected. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 12 ++++++++++-- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/i8259.c | 18 +++++++++++++----- arch/x86/kvm/x86.c | 13 +++++++++++-- include/linux/kvm.h | 7 ++++++- include/linux/kvm_host.h | 4 ++-- virt/kvm/ioapic.c | 23 ++++++++++++++++------- virt/kvm/ioapic.h | 2 +- virt/kvm/irq_comm.c | 41 ++++++++++++++++++++++++++++------------- 9 files changed, 88 insertions(+), 34 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 9c77e3939e9..076b00d1dbf 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -182,7 +182,7 @@ int kvm_dev_ioctl_check_extension(long ext) switch (ext) { case KVM_CAP_IRQCHIP: case KVM_CAP_MP_STATE: - + case KVM_CAP_IRQ_INJECT_STATUS: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -927,6 +927,7 @@ long kvm_arch_vm_ioctl(struct file *filp, goto out; } break; + case KVM_IRQ_LINE_STATUS: case KVM_IRQ_LINE: { struct kvm_irq_level irq_event; @@ -934,10 +935,17 @@ long kvm_arch_vm_ioctl(struct file *filp, if (copy_from_user(&irq_event, argp, sizeof irq_event)) goto out; if (irqchip_in_kernel(kvm)) { + __s32 status; mutex_lock(&kvm->lock); - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, + status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irq_event.irq, irq_event.level); mutex_unlock(&kvm->lock); + if (ioctl == KVM_IRQ_LINE_STATUS) { + irq_event.status = status; + if (copy_to_user(argp, &irq_event, + sizeof irq_event)) + goto out; + } r = 0; } break; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 55fd4c5fd38..f0faf58044f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -616,7 +616,7 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, u32 error_code); -void kvm_pic_set_irq(void *opaque, int irq, int level); +int kvm_pic_set_irq(void *opaque, int irq, int level); void kvm_inject_nmi(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 93160375c84..b4e662e94dd 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -77,12 +77,13 @@ void kvm_pic_clear_isr_ack(struct kvm *kvm) /* * set irq level. If an edge is detected, then the IRR is set to 1 */ -static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level) +static inline int pic_set_irq1(struct kvm_kpic_state *s, int irq, int level) { - int mask; + int mask, ret = 1; mask = 1 << irq; if (s->elcr & mask) /* level triggered */ if (level) { + ret = !(s->irr & mask); s->irr |= mask; s->last_irr |= mask; } else { @@ -91,11 +92,15 @@ static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level) } else /* edge triggered */ if (level) { - if ((s->last_irr & mask) == 0) + if ((s->last_irr & mask) == 0) { + ret = !(s->irr & mask); s->irr |= mask; + } s->last_irr |= mask; } else s->last_irr &= ~mask; + + return (s->imr & mask) ? -1 : ret; } /* @@ -172,16 +177,19 @@ void kvm_pic_update_irq(struct kvm_pic *s) pic_unlock(s); } -void kvm_pic_set_irq(void *opaque, int irq, int level) +int kvm_pic_set_irq(void *opaque, int irq, int level) { struct kvm_pic *s = opaque; + int ret = -1; pic_lock(s); if (irq >= 0 && irq < PIC_NUM_PINS) { - pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); + ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); pic_update_irq(s); } pic_unlock(s); + + return ret; } /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 05d7be89b5e..e4db5be7c95 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1019,6 +1019,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_MP_STATE: case KVM_CAP_SYNC_MMU: case KVM_CAP_REINJECT_CONTROL: + case KVM_CAP_IRQ_INJECT_STATUS: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -1877,6 +1878,7 @@ long kvm_arch_vm_ioctl(struct file *filp, create_pit_unlock: mutex_unlock(&kvm->lock); break; + case KVM_IRQ_LINE_STATUS: case KVM_IRQ_LINE: { struct kvm_irq_level irq_event; @@ -1884,10 +1886,17 @@ long kvm_arch_vm_ioctl(struct file *filp, if (copy_from_user(&irq_event, argp, sizeof irq_event)) goto out; if (irqchip_in_kernel(kvm)) { + __s32 status; mutex_lock(&kvm->lock); - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, - irq_event.irq, irq_event.level); + status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, + irq_event.irq, irq_event.level); mutex_unlock(&kvm->lock); + if (ioctl == KVM_IRQ_LINE_STATUS) { + irq_event.status = status; + if (copy_to_user(argp, &irq_event, + sizeof irq_event)) + goto out; + } r = 0; } break; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 2163b3dd36e..dd48225d182 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -48,7 +48,10 @@ struct kvm_irq_level { * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47.. * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23.. */ - __u32 irq; + union { + __u32 irq; + __s32 status; + }; __u32 level; }; @@ -402,6 +405,7 @@ struct kvm_trace_rec { #ifdef __KVM_HAVE_IOAPIC #define KVM_CAP_IRQ_ROUTING 25 #endif +#define KVM_CAP_IRQ_INJECT_STATUS 26 #ifdef KVM_CAP_IRQ_ROUTING @@ -465,6 +469,7 @@ struct kvm_irq_routing { #define KVM_CREATE_PIT _IO(KVMIO, 0x64) #define KVM_GET_PIT _IOWR(KVMIO, 0x65, struct kvm_pit_state) #define KVM_SET_PIT _IOR(KVMIO, 0x66, struct kvm_pit_state) +#define KVM_IRQ_LINE_STATUS _IOWR(KVMIO, 0x67, struct kvm_irq_level) #define KVM_REGISTER_COALESCED_MMIO \ _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone) #define KVM_UNREGISTER_COALESCED_MMIO \ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 18b4df8264c..894a56e365e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -110,7 +110,7 @@ struct kvm_memory_slot { struct kvm_kernel_irq_routing_entry { u32 gsi; - void (*set)(struct kvm_kernel_irq_routing_entry *e, + int (*set)(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int level); union { struct { @@ -352,7 +352,7 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, struct kvm_irq_mask_notifier *kimn); void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask); -void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level); +int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 1c986ac59ad..c3b99def9cb 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -83,19 +83,22 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, return result; } -static void ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) +static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) { union ioapic_redir_entry *pent; + int injected = -1; pent = &ioapic->redirtbl[idx]; if (!pent->fields.mask) { - int injected = ioapic_deliver(ioapic, idx); + injected = ioapic_deliver(ioapic, idx); if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG) pent->fields.remote_irr = 1; } if (!pent->fields.trig_mode) ioapic->irr &= ~(1 << idx); + + return injected; } static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) @@ -207,7 +210,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode; u32 deliver_bitmask; struct kvm_vcpu *vcpu; - int vcpu_id, r = 0; + int vcpu_id, r = -1; ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " "vector=%x trig_mode=%x\n", @@ -247,7 +250,9 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) deliver_bitmask &= ~(1 << vcpu_id); vcpu = ioapic->kvm->vcpus[vcpu_id]; if (vcpu) { - r = ioapic_inj_irq(ioapic, vcpu, vector, + if (r < 0) + r = 0; + r += ioapic_inj_irq(ioapic, vcpu, vector, trig_mode, delivery_mode); } } @@ -258,8 +263,10 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) continue; deliver_bitmask &= ~(1 << vcpu_id); vcpu = ioapic->kvm->vcpus[vcpu_id]; - if (vcpu) + if (vcpu) { ioapic_inj_nmi(vcpu); + r = 1; + } else ioapic_debug("NMI to vcpu %d failed\n", vcpu->vcpu_id); @@ -273,11 +280,12 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) return r; } -void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) +int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) { u32 old_irr = ioapic->irr; u32 mask = 1 << irq; union ioapic_redir_entry entry; + int ret = 1; if (irq >= 0 && irq < IOAPIC_NUM_PINS) { entry = ioapic->redirtbl[irq]; @@ -288,9 +296,10 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) ioapic->irr |= mask; if ((!entry.fields.trig_mode && old_irr != ioapic->irr) || !entry.fields.remote_irr) - ioapic_service(ioapic, irq); + ret = ioapic_service(ioapic, irq); } } + return ret; } static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin, diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 49c9581d258..a34bd5e6436 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -83,7 +83,7 @@ struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, unsigned long bitmap); void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); int kvm_ioapic_init(struct kvm *kvm); -void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); +int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); void kvm_ioapic_reset(struct kvm_ioapic *ioapic); u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, u8 dest_mode); diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 6bc7439eff6..be8aba79155 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -29,22 +29,24 @@ #include "ioapic.h" -static void kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int level) +static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int level) { #ifdef CONFIG_X86 - kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level); + return kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level); +#else + return -1; #endif } -static void kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int level) +static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int level) { - kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level); + return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level); } -static void kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int level) +static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int level) { int vcpu_id; struct kvm_vcpu *vcpu; @@ -88,13 +90,20 @@ static void kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, default: break; } + return 1; } -/* This should be called with the kvm->lock mutex held */ -void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) +/* This should be called with the kvm->lock mutex held + * Return value: + * < 0 Interrupt was ignored (masked or not delivered for other reasons) + * = 0 Interrupt was coalesced (previous irq is still pending) + * > 0 Number of CPUs interrupt was delivered to + */ +int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) { struct kvm_kernel_irq_routing_entry *e; unsigned long *irq_state, sig_level; + int ret = -1; if (irq < KVM_IOAPIC_NUM_PINS) { irq_state = (unsigned long *)&kvm->arch.irq_states[irq]; @@ -113,8 +122,14 @@ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) * writes to the unused one. */ list_for_each_entry(e, &kvm->irq_routing, link) - if (e->gsi == irq) - e->set(e, kvm, sig_level); + if (e->gsi == irq) { + int r = e->set(e, kvm, sig_level); + if (r < 0) + continue; + + ret = r + ((ret < 0) ? 0 : ret); + } + return ret; } void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) @@ -232,7 +247,7 @@ int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, e->set = kvm_set_pic_irq; break; case KVM_IRQCHIP_PIC_SLAVE: - e->set = kvm_set_pic_irq; + e->set = kvm_set_pic_irq; delta = 8; break; case KVM_IRQCHIP_IOAPIC: -- cgit v1.2.3-70-g09d2 From 1fbdc7a58512a6283e10fd27108197679db95ffa Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Sun, 11 Jan 2009 22:39:44 +0100 Subject: KVM: SVM: set accessed bit for VMCB segment selectors In the segment descriptor _cache_ the accessed bit is always set (although it can be cleared in the descriptor itself). Since Intel checks for this condition on a VMENTRY, set this bit in the AMD path to enable cross vendor migration. Cc: stable@kernel.org Signed-off-by: Andre Przywara Acked-By: Amit Shah Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 22e88a4a51c..1821c207819 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -796,20 +796,37 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; - /* - * SVM always stores 0 for the 'G' bit in the CS selector in - * the VMCB on a VMEXIT. This hurts cross-vendor migration: - * Intel's VMENTRY has a check on the 'G' bit. - */ - if (seg == VCPU_SREG_CS) + switch (seg) { + case VCPU_SREG_CS: + /* + * SVM always stores 0 for the 'G' bit in the CS selector in + * the VMCB on a VMEXIT. This hurts cross-vendor migration: + * Intel's VMENTRY has a check on the 'G' bit. + */ var->g = s->limit > 0xfffff; - - /* - * Work around a bug where the busy flag in the tr selector - * isn't exposed - */ - if (seg == VCPU_SREG_TR) + break; + case VCPU_SREG_TR: + /* + * Work around a bug where the busy flag in the tr selector + * isn't exposed + */ var->type |= 0x2; + break; + case VCPU_SREG_DS: + case VCPU_SREG_ES: + case VCPU_SREG_FS: + case VCPU_SREG_GS: + /* + * The accessed bit must always be set in the segment + * descriptor cache, although it can be cleared in the + * descriptor, the cached bit always remains at 1. Since + * Intel has a check on this, set it here to support + * cross-vendor migration. + */ + if (!var->unusable) + var->type |= 0x1; + break; + } var->unusable = !var->present; } -- cgit v1.2.3-70-g09d2 From c5bc22424021cabda862727fb3f5098b866f074d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 19 Feb 2009 12:18:56 +0100 Subject: KVM: MMU: Fix another largepage memory leak In the paging_fetch function rmap_remove is called after setting a large pte to non-present. This causes rmap_remove to not drop the reference to the large page. The result is a memory leak of that page. Cc: stable@kernel.org Signed-off-by: Joerg Roedel Acked-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 7314c0944c5..0f11792fafa 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -306,9 +306,9 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, continue; if (is_large_pte(*sptep)) { + rmap_remove(vcpu->kvm, sptep); set_shadow_pte(sptep, shadow_trap_nonpresent_pte); kvm_flush_remote_tlbs(vcpu->kvm); - rmap_remove(vcpu->kvm, sptep); } if (level == PT_DIRECTORY_LEVEL -- cgit v1.2.3-70-g09d2 From 401d10dee083bda281f2fdcdf654080313ba30ec Mon Sep 17 00:00:00 2001 From: Amit Shah Date: Fri, 20 Feb 2009 22:53:37 +0530 Subject: KVM: VMX: Update necessary state when guest enters long mode setup_msrs() should be called when entering long mode to save the shadow state for the 64-bit guest state. Using vmx_set_efer() in enter_lmode() removes some duplicated code and also ensures we call setup_msrs(). We can safely pass the value of shadow_efer to vmx_set_efer() as no other bits in the efer change while enabling long mode (guest first sets EFER.LME, then sets CR0.PG which causes a vmexit where we activate long mode). With this fix, is_long_mode() can check for EFER.LMA set instead of EFER.LME and 5e23049e86dd298b72e206b420513dbc3a240cd9 can be reverted. Signed-off-by: Amit Shah Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 54 ++++++++++++++++++++++++------------------------------ 1 file changed, 24 insertions(+), 30 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index cb27ffccf46..48063a0aa24 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1430,6 +1430,29 @@ continue_rmode: init_rmode(vcpu->kvm); } +static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); + + vcpu->arch.shadow_efer = efer; + if (!msr) + return; + if (efer & EFER_LMA) { + vmcs_write32(VM_ENTRY_CONTROLS, + vmcs_read32(VM_ENTRY_CONTROLS) | + VM_ENTRY_IA32E_MODE); + msr->data = efer; + } else { + vmcs_write32(VM_ENTRY_CONTROLS, + vmcs_read32(VM_ENTRY_CONTROLS) & + ~VM_ENTRY_IA32E_MODE); + + msr->data = efer & ~EFER_LME; + } + setup_msrs(vmx); +} + #ifdef CONFIG_X86_64 static void enter_lmode(struct kvm_vcpu *vcpu) @@ -1444,13 +1467,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu) (guest_tr_ar & ~AR_TYPE_MASK) | AR_TYPE_BUSY_64_TSS); } - vcpu->arch.shadow_efer |= EFER_LMA; - - find_msr_entry(to_vmx(vcpu), MSR_EFER)->data |= EFER_LMA | EFER_LME; - vmcs_write32(VM_ENTRY_CONTROLS, - vmcs_read32(VM_ENTRY_CONTROLS) - | VM_ENTRY_IA32E_MODE); + vmx_set_efer(vcpu, vcpu->arch.shadow_efer); } static void exit_lmode(struct kvm_vcpu *vcpu) @@ -1609,30 +1627,6 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) vmcs_writel(GUEST_CR4, hw_cr4); } -static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); - - vcpu->arch.shadow_efer = efer; - if (!msr) - return; - if (efer & EFER_LMA) { - vmcs_write32(VM_ENTRY_CONTROLS, - vmcs_read32(VM_ENTRY_CONTROLS) | - VM_ENTRY_IA32E_MODE); - msr->data = efer; - - } else { - vmcs_write32(VM_ENTRY_CONTROLS, - vmcs_read32(VM_ENTRY_CONTROLS) & - ~VM_ENTRY_IA32E_MODE); - - msr->data = efer & ~EFER_LME; - } - setup_msrs(vmx); -} - static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) { struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; -- cgit v1.2.3-70-g09d2 From 41d6af119206e98764b4ae6d264d63acefcf851e Mon Sep 17 00:00:00 2001 From: Amit Shah Date: Thu, 28 Feb 2008 16:06:15 +0530 Subject: KVM: is_long_mode() should check for EFER.LMA is_long_mode currently checks the LongModeEnable bit in EFER instead of the LongModeActive bit. This is wrong, but we survived this till now since it wasn't triggered. This breaks guests that go from long mode to compatibility mode. This is noticed on a solaris guest and fixes bug #1842160 Signed-off-by: Amit Shah Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 258e5d56298..eaab2145f62 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -54,7 +54,7 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) static inline int is_long_mode(struct kvm_vcpu *vcpu) { #ifdef CONFIG_X86_64 - return vcpu->arch.shadow_efer & EFER_LME; + return vcpu->arch.shadow_efer & EFER_LMA; #else return 0; #endif -- cgit v1.2.3-70-g09d2 From d7364a29b305862cbc92b3f8b6ad9968270d068c Mon Sep 17 00:00:00 2001 From: Hannes Eder Date: Sat, 21 Feb 2009 02:18:13 +0100 Subject: KVM: fix sparse warnings: context imbalance Impact: Attribute function with __acquires(...) resp. __releases(...). Fix this sparse warnings: arch/x86/kvm/i8259.c:34:13: warning: context imbalance in 'pic_lock' - wrong count at exit arch/x86/kvm/i8259.c:39:13: warning: context imbalance in 'pic_unlock' - unexpected unlock Signed-off-by: Hannes Eder Signed-off-by: Avi Kivity --- arch/x86/kvm/i8259.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index b4e662e94dd..1ccb50c74f1 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -32,11 +32,13 @@ #include static void pic_lock(struct kvm_pic *s) + __acquires(&s->lock) { spin_lock(&s->lock); } static void pic_unlock(struct kvm_pic *s) + __releases(&s->lock) { struct kvm *kvm = s->kvm; unsigned acks = s->pending_acks; -- cgit v1.2.3-70-g09d2 From cded19f396bc3c9d299331709d0a9fbc6ecc148a Mon Sep 17 00:00:00 2001 From: Hannes Eder Date: Sat, 21 Feb 2009 02:19:13 +0100 Subject: KVM: fix sparse warnings: Should it be static? Impact: Make symbols static. Fix this sparse warnings: arch/x86/kvm/mmu.c:992:5: warning: symbol 'mmu_pages_add' was not declared. Should it be static? arch/x86/kvm/mmu.c:1124:5: warning: symbol 'mmu_pages_next' was not declared. Should it be static? arch/x86/kvm/mmu.c:1144:6: warning: symbol 'mmu_pages_clear_parents' was not declared. Should it be static? arch/x86/kvm/x86.c:2037:5: warning: symbol 'kvm_read_guest_virt' was not declared. Should it be static? arch/x86/kvm/x86.c:2067:5: warning: symbol 'kvm_write_guest_virt' was not declared. Should it be static? virt/kvm/irq_comm.c:220:5: warning: symbol 'setup_routing_entry' was not declared. Should it be static? Signed-off-by: Hannes Eder Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 11 ++++++----- arch/x86/kvm/x86.c | 8 ++++---- virt/kvm/irq_comm.c | 4 ++-- 3 files changed, 12 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 4a21b0f8491..2a36f7f7c4c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -989,8 +989,8 @@ struct kvm_mmu_pages { idx < 512; \ idx = find_next_bit(bitmap, 512, idx+1)) -int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, - int idx) +static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, + int idx) { int i; @@ -1121,8 +1121,9 @@ struct mmu_page_path { i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \ i = mmu_pages_next(&pvec, &parents, i)) -int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents, - int i) +static int mmu_pages_next(struct kvm_mmu_pages *pvec, + struct mmu_page_path *parents, + int i) { int n; @@ -1141,7 +1142,7 @@ int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents, return n; } -void mmu_pages_clear_parents(struct mmu_page_path *parents) +static void mmu_pages_clear_parents(struct mmu_page_path *parents) { struct kvm_mmu_page *sp; unsigned int level = 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e4db5be7c95..8ca100a9eca 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2043,8 +2043,8 @@ static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, return dev; } -int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu) +static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, + struct kvm_vcpu *vcpu) { void *data = val; int r = X86EMUL_CONTINUE; @@ -2073,8 +2073,8 @@ out: return r; } -int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu) +static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, + struct kvm_vcpu *vcpu) { void *data = val; int r = X86EMUL_CONTINUE; diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index a70d805e014..864ac5483ba 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -233,8 +233,8 @@ void kvm_free_irq_routing(struct kvm *kvm) __kvm_free_irq_routing(&kvm->irq_routing); } -int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, - const struct kvm_irq_routing_entry *ue) +static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) { int r = -EINVAL; int delta; -- cgit v1.2.3-70-g09d2 From 4539b35881ae9664b0e2953438dd83f5ee02c0b4 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 12 Mar 2009 18:18:43 +0100 Subject: KVM: Fix missing smp tlb flush in invlpg When kvm emulates an invlpg instruction, it can drop a shadow pte, but leaves the guest tlbs intact. This can cause memory corruption when swapping out. Without this the other cpu can still write to a freed host physical page. tlb smp flush must happen if rmap_remove is called always before mmu_lock is released because the VM will take the mmu_lock before it can finally add the page to the freelist after swapout. mmu notifier makes it safe to flush the tlb after freeing the page (otherwise it would never be safe) so we can do a single flush for multiple sptes invalidated. Cc: stable@kernel.org Signed-off-by: Andrea Arcangeli Acked-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 0f11792fafa..6bd70206c56 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -446,6 +446,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) gpa_t pte_gpa = -1; int level; u64 *sptep; + int need_flush = 0; spin_lock(&vcpu->kvm->mmu_lock); @@ -465,6 +466,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) rmap_remove(vcpu->kvm, sptep); if (is_large_pte(*sptep)) --vcpu->kvm->stat.lpages; + need_flush = 1; } set_shadow_pte(sptep, shadow_trap_nonpresent_pte); break; @@ -474,6 +476,8 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) break; } + if (need_flush) + kvm_flush_remote_tlbs(vcpu->kvm); spin_unlock(&vcpu->kvm->mmu_lock); if (pte_gpa == -1) -- cgit v1.2.3-70-g09d2 From 16175a796d061833aacfbd9672235f2d2725df65 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 23 Mar 2009 22:13:44 +0200 Subject: KVM: VMX: Don't allow uninhibited access to EFER on i386 vmx_set_msr() does not allow i386 guests to touch EFER, but they can still do so through the default: label in the switch. If they set EFER_LME, they can oops the host. Fix by having EFER access through the normal channel (which will check for EFER_LME) even on i386. Reported-and-tested-by: Benjamin Gilbert Cc: stable@kernel.org Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 48063a0aa24..bb481330716 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -936,11 +936,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) int ret = 0; switch (msr_index) { -#ifdef CONFIG_X86_64 case MSR_EFER: vmx_load_host_state(vmx); ret = kvm_set_msr_common(vcpu, msr_index, data); break; +#ifdef CONFIG_X86_64 case MSR_FS_BASE: vmcs_writel(GUEST_FS_BASE, data); break; -- cgit v1.2.3-70-g09d2 From fc2869f6a1993550c2765e934b117e993782db30 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 13 Mar 2009 16:37:48 +0100 Subject: x86: disable __do_IRQ support Impact: disable unused code x86 is fully converted to flow handlers. No need to keep the deprecated __do_IRQ() support active. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bc2fbadff9f..3a330a437c6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -165,6 +165,9 @@ config GENERIC_HARDIRQS bool default y +config GENERIC_HARDIRQS_NO__DO_IRQ + def_bool y + config GENERIC_IRQ_PROBE bool default y -- cgit v1.2.3-70-g09d2 From 17d140402e6f0fd5dde2fdf8d045e3f95f865663 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 14 Jan 2009 23:37:50 +0300 Subject: x86: headers cleanup - setup.h Impact: cleanup 'make headers_check' warn us about leaking of kernel private (mostly compile time vars) data to userspace in headers. Fix it. Guard this one by __KERNEL__. Signed-off-by: Cyrill Gorcunov Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/include/asm/setup.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 5a3a1371575..c2308f5250f 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_SETUP_H #define _ASM_X86_SETUP_H +#ifdef __KERNEL__ + #define COMMAND_LINE_SIZE 2048 #ifndef __ASSEMBLY__ @@ -33,8 +35,6 @@ struct x86_quirks { #endif /* __ASSEMBLY__ */ -#ifdef __KERNEL__ - #ifdef __i386__ #include -- cgit v1.2.3-70-g09d2